PageRenderTime 913ms CodeModel.GetById 474ms app.highlight 76ms RepoModel.GetById 356ms app.codeStats 0ms

/src/googlecl/docs/base.py

http://googlecl.googlecode.com/
Python | 463 lines | 402 code | 7 blank | 54 comment | 27 complexity | 80e13104aa682144663a4a0cf7c6f11e MD5 | raw file
  1# Copyright (C) 2010 Google Inc.
  2#
  3# Licensed under the Apache License, Version 2.0 (the "License");
  4# you may not use this file except in compliance with the License.
  5# You may obtain a copy of the License at
  6#
  7#      http://www.apache.org/licenses/LICENSE-2.0
  8#
  9# Unless required by applicable law or agreed to in writing, software
 10# distributed under the License is distributed on an "AS IS" BASIS,
 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12# See the License for the specific language governing permissions and
 13# limitations under the License.
 14
 15
 16"""Service details and instances for the Docs service using GData 3.0.
 17
 18Some use cases:
 19Upload a document:
 20  docs upload --folder "Some folder" path_to_doc
 21
 22Edit a document in your word editor:
 23  docs edit --title "Grocery List" --editor vim (editor also set in prefs)
 24
 25Download docs:
 26  docs get --folder "Some folder"
 27
 28"""
 29from __future__ import with_statement
 30
 31__author__ = 'tom.h.miller@gmail.com (Tom Miller)'
 32import ConfigParser
 33import logging
 34import os
 35import shlex
 36import shutil
 37import sys
 38import googlecl
 39from googlecl.docs import SECTION_HEADER
 40
 41# Renamed here to reduce verbosity in other sections
 42safe_encode = googlecl.safe_encode
 43safe_decode = googlecl.safe_decode
 44
 45
 46LOG = logging.getLogger(googlecl.docs.LOGGER_NAME + '.base')
 47
 48# For to_safe_filename
 49if sys.platform == 'win32':
 50  UNSAFE_FILE_CHARS = '\\/:*?"<>|'
 51else:
 52  UNSAFE_FILE_CHARS = '/'
 53
 54
 55class DocsError(googlecl.base.Error):
 56  """Base error for Docs errors."""
 57  pass
 58
 59
 60class DocsBaseCL(object):
 61
 62  """Class meant to be inherited by either DocsClientCL or DocsServiceCL."""
 63
 64  # Marked with leading underscore because people should use the method
 65  # for creating folders appropriate to the superclass.
 66  def _create_folder(folder_name, folder_or_uri=None):
 67    raise NotImplementedError('_modify_entry must be defined!')
 68
 69  def edit_doc(self, doc_entry_or_title, editor, file_ext,
 70               folder_entry_or_path=None):
 71    """Edit a document.
 72
 73    Keyword arguments:
 74      doc_entry_or_title: DocEntry of the existing document to edit,
 75                          or title of the document to create.
 76      editor: Name of the editor to use. Should be executable from the user's
 77              working directory.
 78      file_ext: Suffix of the file to download.
 79                For example, "txt", "csv", "xcl".
 80      folder_entry_or_path: Entry or string representing folder to upload into.
 81                   If a string, a new set of folders will ALWAYS be created.
 82                   For example, 'my_folder' to upload to my_folder,
 83                   'foo/bar' to upload into subfolder bar under folder foo.
 84                   Default None for root folder.
 85
 86    """
 87    import subprocess
 88    import tempfile
 89
 90    try:
 91      doc_title = safe_decode(doc_entry_or_title.title.text)
 92      new_doc = False
 93    except AttributeError:
 94      doc_title = doc_entry_or_title
 95      new_doc = True
 96
 97    temp_dir = tempfile.mkdtemp()
 98    # If we're creating a new document and not given a folder entry
 99    if new_doc and isinstance(folder_entry_or_path, basestring):
100      folder_path = os.path.normpath(folder_entry_or_path)
101      # Some systems allow more than one path separator
102      if os.altsep:
103        folder_path.replace(os.altsep, os.sep)
104      base_folder = folder_path.split(os.sep)[0]
105      # Define the base path such that upload_docs will create a folder
106      # named base_folder
107      base_path = os.path.join(temp_dir, base_folder)
108      total_basename = os.path.join(temp_dir, folder_path)
109      os.makedirs(total_basename)
110      path = os.path.join(total_basename,
111                          self.to_safe_filename(doc_title) + '.' + file_ext)
112    else:
113      path = os.path.join(temp_dir,
114                          self.to_safe_filename(doc_title) + '.' + file_ext)
115      base_path = path
116
117    if not new_doc:
118      # This used to be the following, passing just the URL instead of the
119      # entry object (which it's guaranteed to be since not new_doc).
120      # Both client and service seem happy with it, so it was probably
121      # unnecessary to reduce it to a URL first.
122      # self.Export(doc_entry_or_title.content.src, path)
123      self.Export(doc_entry_or_title, path)
124      file_hash = _md5_hash_file(path)
125    else:
126      file_hash = None
127
128    command_args = shlex.split(safe_encode(editor)) + [path]
129    subprocess.call(command_args)
130    impatient_editors = self.config.lazy_get(SECTION_HEADER,
131                                             'impatient_editors',
132                                             default='')
133    if impatient_editors:
134      impatient_editors = impatient_editors.split(',')
135      if command_args[0] in impatient_editors:
136        LOG.info('I noticed you are using an application that will not wait for '
137                 'you to finish editing your file.')
138        LOG.info('Hit enter in this shell when you finished editing and saved '
139                 'your work.')
140        raw_input('')
141    if file_hash and file_hash == _md5_hash_file(path):
142      LOG.info('No modifications to file, not uploading.')
143      return None
144    elif not os.path.exists(path):
145      LOG.info('No file written, not uploading.')
146      return None
147
148    if new_doc:
149      if isinstance(folder_entry_or_path, basestring):
150        # Let code in upload_docs handle the creation of new folder(s)
151        self.upload_docs([base_path], doc_title)
152      else:
153        # folder_entry_or_path is None or a GDataEntry.
154        doc_entry = self.upload_single_doc(path,
155                                           folder_entry=folder_entry_or_path)
156    else:
157      try:
158        doc_entry = self._modify_entry(doc_entry_or_title, path, file_ext)
159      except self.request_error, err:
160        LOG.error(err)
161        new_path = safe_move(path, '.')
162        LOG.info(safe_encode('Moved edited document to ' +
163                             safe_decode(new_path)))
164        return None
165
166    try:
167      # Good faith effort to keep the temp directory clean.
168      shutil.rmtree(temp_dir)
169    except OSError:
170      # Only seen errors on Windows, but catch the more general OSError.
171      pass
172    return doc_entry
173
174  EditDoc = edit_doc
175
176  def get_docs(self, base_path, entries, file_ext=None, grid_id=None):
177    """Download documents.
178
179    Keyword arguments:
180      base_path: The path to download files to. This plus an entry's title plus
181                 its format-specific extension will form the complete path.
182      entries: List of DocEntry items representing the files to download.
183      file_ext: Suffix to give the file(s) when downloading.
184                For example, "txt", "csv", "xcl". Default None to let
185                get_extension_from_doctype decide the extension. Ignored
186                when downloading arbitrary files.
187
188    """
189    if not os.path.isdir(base_path):
190      if len(entries) > 1:
191        raise DocsError(safe_encode(u'Specified multiple source files, but ' +
192                                    u'destination "' + base_path +
193                                    u'" is not a directory'))
194      format_from_filename = googlecl.get_extension_from_path(base_path)
195      if format_from_filename and not file_ext:
196        # Strip the extension off here if it exists. Don't want to double up
197        # on extension in for loop. (+1 for '.')
198        base_path = base_path[:-(len(format_from_filename)+1)]
199        # We can just set the file_ext here, since there's only one file.
200        file_ext = format_from_filename
201    for entry in entries:
202      # Don't set file_ext if we cannot do export.
203      # get_extension_from_doctype will check the config file for 'format'
204      # which will set an undesired entry_file_ext for
205      # unconverted downloads
206      if not file_ext and can_export(entry):
207        entry_file_ext = googlecl.docs.get_extension_from_doctype(
208                                         googlecl.docs.get_document_type(entry),
209                                         self.config)
210      else:
211        entry_file_ext = file_ext
212      if entry_file_ext:
213        LOG.debug('Decided file_ext is ' + entry_file_ext)
214        extension = '.' + entry_file_ext
215      else:
216        LOG.debug('Could not (or would not) set file_ext')
217        if can_export(entry):
218          extension = '.txt'
219        else:
220          # Files that cannot be exported typically have a file extension
221          # in their name / title.
222          extension = ''
223
224      entry_title = safe_decode(entry.title.text)
225      if os.path.isdir(base_path):
226        entry_title_safe = self.to_safe_filename(entry_title)
227        path = os.path.join(base_path, entry_title_safe + extension)
228      else:
229        path = base_path + extension
230      LOG.info(safe_encode('Downloading ' + entry_title + ' to ' + path))
231      try:
232        if can_export(entry):
233          self.Export(entry, path, grid_id)
234        else:
235          if hasattr(self, 'Download'):
236            self.Download(entry, path)
237          else:
238            self.DownloadResource(entry, path)
239      except self.request_error, err:
240        LOG.error(safe_encode('Download of ' + entry_title + ' failed: ' +
241                              unicode(err)))
242      except EnvironmentError, err:
243        LOG.error(err)
244        LOG.info('Does your destination filename contain invalid characters?')
245
246  GetDocs = get_docs
247
248  def _modify_entry(doc_entry, path_to_new_content, file_ext):
249    """Modify the file data associated with a document entry."""
250    raise NotImplementedError('_modify_entry must be defined!')
251
252  def to_safe_filename(self, text):
253    """Translate string to something that can be safely used as a filename.
254
255    Behavior of this function depends on the operating system.
256
257    Args:
258      text: Text to check for invalid characters
259    Returns:
260      Parameter with unsafe characters escaped or removed.
261      Type (unicode vs string)will match that of the parameter.
262    """
263    sub = self.config.lazy_get(SECTION_HEADER, 'invalid_filename_character_sub',
264                               default='')
265    sub = safe_decode(sub)
266    return ''.join([sub if c in UNSAFE_FILE_CHARS else c for c in text])
267
268  def upload_docs(self, paths, title=None, folder_entry=None,
269                  file_ext=None, **kwargs):
270    """Upload a list of documents or directories.
271
272    For each item in paths:
273      if item is a directory, upload all files found in the directory
274        in a manner roughly equivalent to "cp -R directory/ <docs_folder>"
275      if item is a file, upload that file to <docs_folder>
276
277    Keyword arguments:
278      paths: List of file paths and/or directories to upload.
279      title: Title to give the files once uploaded.
280             Default None for the names of the files.
281      folder_entry: GDataEntry of the folder to treat as the new root for
282                    directories/files.
283                    Default None for no folder (the Google Docs root).
284      file_ext: Replace (or specify) the extension on the file when figuring
285              out the upload format. For example, 'txt' will upload the
286              file as if it was plain text. Default None for the file's
287              extension (which defaults to 'txt' if there is none).
288      kwargs: Typically contains 'convert', indicates if we should convert the
289              file on upload. False will only be honored if the user is a Google
290              Apps Premier account.
291
292    Returns:
293      Dictionary mapping filenames to where they can be accessed online.
294    """
295    doc_entries = {}
296    for path in paths:
297      folder_root = folder_entry
298      if os.path.isdir(path):
299        folder_entries = {}
300        # final '/' sets folder_name to '' which causes
301        # 503 "Service Unavailable".
302        path = path.rstrip(os.path.sep)
303        for dirpath, dirnames, filenames in os.walk(path):
304          directory = os.path.dirname(dirpath)
305          folder_name = os.path.basename(dirpath)
306          if directory in folder_entries:
307            fentry = self._create_folder(folder_name, folder_entries[directory])
308          else:
309            fentry = self._create_folder(folder_name, folder_root)
310          folder_entries[dirpath] = fentry
311          LOG.debug('Created folder ' + dirpath + ' ' + folder_name)
312          for fname in filenames:
313            doc = self.upload_single_doc(os.path.join(dirpath, fname),
314                                         folder_entry=fentry)
315            if doc:
316              doc_entries[fname] = doc
317      else:
318        doc = self.upload_single_doc(path, title=title,
319                                     folder_entry=folder_entry,
320                                     file_ext=file_ext,
321                                     **kwargs)
322        if doc:
323          doc_entries[os.path.basename(path)] = doc
324    return doc_entries
325
326  UploadDocs = upload_docs
327
328  def upload_single_doc(self, path, title=None, folder_entry=None,
329                        file_ext=None, **kwargs):
330    """Upload one file to Google Docs.
331
332    Args:
333      path: str Path to file to upload.
334      title: str (optional) Title to give the upload. Defaults to the filename.
335      folder_entry: DocsEntry (optional) (sub)Folder to upload into.
336      file_ext: str (optional) Extension used to determine MIME type of
337          upload. If not specified, uses mimetypes module to guess it.
338      kwargs: Should contain value for 'convert', either True or False.
339          Indicates if upload should be converted. Only Apps Premier users can
340          specify False.
341
342    Returns:
343      Entry corresponding to the document on Google Docs
344    """
345    filename = os.path.basename(path)
346
347    try:
348      convert = kwargs['convert']
349    except KeyError:
350      convert = True
351
352    if not file_ext:
353      file_ext = googlecl.get_extension_from_path(filename)
354      file_title = filename.split('.')[0]
355    else:
356      file_title = filename
357
358    content_type = self._determine_content_type(file_ext)
359    if not content_type:
360      LOG.debug('Could not find content type using gdata, trying mimetypes')
361      import mimetypes
362      content_type = mimetypes.guess_type(path)[0]
363      if not content_type:
364        if convert:
365          content_type = 'text/plain'
366        else:
367          content_type = 'application/octet-stream'
368        entry_title = title or filename
369      else:
370        entry_title = title or file_title
371    else:
372      entry_title = title or file_title
373
374    LOG.debug('Uploading with content type %s', content_type)
375    LOG.info('Loading %s', path)
376
377    if folder_entry:
378      post_uri = folder_entry.content.src
379    else:
380      post_uri = self.DOCLIST_FEED_URI
381
382    if not convert:
383      post_uri += '?convert=false'
384    
385    try:
386      new_entry = self._transmit_doc(path, entry_title, post_uri, content_type,
387                                     file_ext)
388    except self.request_error, err:
389      LOG.error('Failed to upload %s: %s', path, err)
390      if (str(err).find('ServiceForbiddenException') != -1 or
391          str(err).find('Unsupported Media Type') != -1):
392        # Attempt to catch older gdata users and warn them when they try to upload
393        # unsupported file types
394        print "\n\nYour version of python-gdata may not support this action. "
395        print "Please see the wiki page for more details: "
396        print "http://code.google.com/p/googlecl/wiki/UploadingGoogleDocs\n\n"
397        if convert:
398          LOG.info('You may have to specify a format with --format. Try ' +
399                   '--format=txt')
400      return None
401    else:
402      LOG.info('Upload success! Direct link: %s',
403               new_entry.GetAlternateLink().href)
404    return new_entry
405
406  UploadSingleDoc = upload_single_doc
407
408# Read size is 128*20 for no good reason.
409# Just want to avoid reading in the whole file, and read in a multiple of 128.
410def _md5_hash_file(path, read_size=2560):
411  """Return a binary md5 checksum of file at path."""
412  import hashlib
413  hash_function = hashlib.md5()
414  with open(path, 'r') as my_file:
415    data = my_file.read(read_size)
416    while data:
417      hash_function.update(data)
418      data = my_file.read(read_size)
419  return hash_function.digest()
420
421
422def can_export(entry_or_url):
423  """See if the given entry can be exported.
424
425  Based off check done in gdata.docs.client.DocsClient.export
426
427  Returns:
428    True if entry can be exported to a specific format (can use client.export)
429    False if not (must use client.Download)
430
431  """
432  if isinstance(entry_or_url, (str, unicode)):
433    url = entry_or_url
434  else:
435    url = entry_or_url.content.src
436  can_export = url.find('/Export?') != -1
437  return can_export
438
439
440def safe_move(src, dst):
441  """Move file from src to dst.
442
443  If file with same name already exists at dst, rename the new file
444  while preserving the extension.
445
446  Returns:
447    path to new file.
448
449  """
450  new_dir = os.path.abspath(dst)
451  ext = googlecl.get_extension_from_path(src)
452  if not ext:
453    dotted_ext = ''
454  else:
455    dotted_ext = '.' + ext
456  filename = os.path.basename(src).rstrip(dotted_ext)
457  rename_num = 1
458  new_path = os.path.join(new_dir, filename + dotted_ext)
459  while os.path.exists(new_path):
460    new_filename = filename + '-' + str(rename_num) + dotted_ext
461    new_path = os.path.join(new_dir, new_filename)
462  shutil.move(src, new_path)
463  return new_path