PageRenderTime 207ms CodeModel.GetById 81ms app.highlight 66ms RepoModel.GetById 54ms app.codeStats 1ms

/src/googlecl/docs/client.py

http://googlecl.googlecode.com/
Python | 392 lines | 355 code | 4 blank | 33 comment | 2 complexity | 9c79e7cc09664accebb2f163a039d72d MD5 | raw file
  1# Copyright (C) 2010 Google Inc.
  2#
  3# Licensed under the Apache License, Version 2.0 (the "License");
  4# you may not use this file except in compliance with the License.
  5# You may obtain a copy of the License at
  6#
  7#      http://www.apache.org/licenses/LICENSE-2.0
  8#
  9# Unless required by applicable law or agreed to in writing, software
 10# distributed under the License is distributed on an "AS IS" BASIS,
 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12# See the License for the specific language governing permissions and
 13# limitations under the License.
 14
 15
 16"""Service details and instances for the Docs service using GData 3.0.
 17
 18Some use cases:
 19Upload a document:
 20  docs upload --folder "Some folder" path_to_doc
 21
 22Edit a document in your word editor:
 23  docs edit --title "Grocery List" --editor vim (editor also set in prefs)
 24
 25Download docs:
 26  docs get --folder "Some folder"
 27
 28"""
 29from __future__ import with_statement
 30
 31__author__ = 'tom.h.miller@gmail.com (Tom Miller)'
 32import gdata.docs.client
 33import logging
 34import os
 35import re
 36import shutil
 37import googlecl
 38import googlecl.client
 39from googlecl.docs import SECTION_HEADER
 40import googlecl.docs.base
 41import atom.data
 42
 43LOG = logging.getLogger(googlecl.docs.LOGGER_NAME + '.client')
 44
 45
 46class DocsClientCL(gdata.docs.client.DocsClient,
 47                   googlecl.docs.base.DocsBaseCL,
 48                   googlecl.client.BaseClientCL):
 49
 50  """Extends gdata.docs.client.DocsClient for the command line.
 51
 52  This class adds some features focused on using Google Docs via an installed
 53  app with a command line interface.
 54
 55  """
 56
 57  # Versions 2.0.5-2.0.14 of python gdata included a DOCLIST_FEED_URI variable,
 58  # but 2.0.15 removed it, so we hard code it here.
 59  DOCLIST_FEED_URI = '/feeds/default/private/full'
 60
 61  # Another casualty in 2.0.15.
 62  FILE_EXT_PATTERN = re.compile('.*\.([a-zA-Z]{3,}$)')
 63
 64  # File extension/mimetype pairs of common format.
 65  # These seem to have disappeared in python-gdata 2.0.15 and 2.0.16, so here
 66  # they are given explicitly.
 67  MIMETYPES = {
 68    'CSV': 'text/csv',
 69    'TSV': 'text/tab-separated-values',
 70    'TAB': 'text/tab-separated-values',
 71    'DOC': 'application/msword',
 72    'DOCX': ('application/vnd.openxmlformats-officedocument.'
 73             'wordprocessingml.document'),
 74    'ODS': 'application/x-vnd.oasis.opendocument.spreadsheet',
 75    'ODT': 'application/vnd.oasis.opendocument.text',
 76    'RTF': 'application/rtf',
 77    'SXW': 'application/vnd.sun.xml.writer',
 78    'TXT': 'text/plain',
 79    'XLS': 'application/vnd.ms-excel',
 80    'XLSX': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
 81    'PDF': 'application/pdf',
 82    'PNG': 'image/png',
 83    'PPT': 'application/vnd.ms-powerpoint',
 84    'PPS': 'application/vnd.ms-powerpoint',
 85    'HTM': 'text/html',
 86    'HTML': 'text/html',
 87    'ZIP': 'application/zip',
 88    'SWF': 'application/x-shockwave-flash'
 89  }
 90
 91  def __init__(self, config):
 92    """Constructor."""
 93    gdata.docs.client.DocsClient.__init__(self, source='GoogleCL')
 94    googlecl.client.BaseClientCL.__init__(self, SECTION_HEADER, config)
 95
 96  # Python gdata 2.0.15 drastically changed the API, including renaming
 97  # gdata.docs.data.DocList to ResourceFeed.
 98  def _doclist_class(self):
 99    if (hasattr(gdata.docs.data, 'ResourceFeed')):
100      return gdata.docs.data.ResourceFeed
101    else:
102      return gdata.docs.data.DocList
103
104  def _create_folder(self, title, folder_or_uri):
105    """Wrapper function to mesh with DocsBaseCL.upload_docs()."""
106    return self.create(gdata.docs.data.FOLDER_LABEL, title,
107                       folder_or_uri)
108
109  def _determine_content_type(self, file_ext):
110    if file_ext is None:
111        LOG.info('No supported filetype found as the extension is not provided')
112        return None
113    try:
114      return DocsClientCL.MIMETYPES[file_ext.upper()]
115    except KeyError:
116      LOG.info('No supported filetype found for extension %s', file_ext)
117      return None
118
119  def _download_file(self, uri, file_path, auth_token=None, **kwargs):
120    """Downloads a file, optionally decoding from UTF-8.
121
122    Overridden from gdata.docs.client to support decoding.
123
124    Args:
125      uri: string The full Export URL to download the file from.
126      file_path: string The full path to save the file to.
127      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
128          OAuthToken which authorizes this client to edit the user's data.
129      decode: bool (default False) Whether or not to decode UTF-8.
130      kwargs: Other parameters to pass to self.get_file_content().
131
132    Raises:
133      RequestError: on error response from server.
134
135    """
136    # More undocumented changes in python gdata 2.0.15
137    if hasattr(self, 'get_file_content'):
138      response_string = self.get_file_content(uri, auth_token=auth_token)
139    else:
140      response_string = self._get_content(uri, None);
141      
142    response_string = response_string.replace("\r\n\r\n", "\r\n")
143
144    if googlecl.docs.base.can_export(uri) and\
145       self.config.lazy_get(SECTION_HEADER, 'decode_utf_8', False, bool):
146      try:
147        file_string = response_string.decode('utf-8-sig')
148      except UnicodeError, err:
149        LOG.debug('Could not decode: ' + str(err))
150        file_string = response_string
151    else:
152      file_string = response_string
153    
154    with open(file_path, 'wb') as download_file:
155      download_file.write(file_string)
156      download_file.flush()
157
158  def export(self, entry, file_path, gid=None, auth_token=None,
159             **kwargs):
160    """Exports a document from the Document List in a different format.
161
162    Overloaded from docs.client.DocsClient to fix "always-download-as-pdf"
163    issue
164
165    Args:
166      entry: An entry object specifying the document to be exported.
167          Formerly, this was entry_or_id_or_url: a
168          gdata.data.GDEntry or string representing a
169          resource id or URL to download the document from (such as the content
170          src link).  But that wreaks havoc in python gdata >2.0.15, and it was
171          easy to ensure we only call with an actual Entry.
172      file_path: str The full path to save the file to.  The export
173          format is inferred from the the file extension.
174      gid: str (optional) grid id for downloading a single grid of a
175          spreadsheet. The param should only be used for .csv and .tsv
176          spreadsheet exports.
177      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
178          OAuthToken which authorizes this client to edit the user's data.
179      kwargs: Other parameters to pass to self.download().
180
181    Raises:
182      gdata.client.RequestError if the download URL is malformed or the server's
183      response was not successful.
184    """
185    extra_params = {}
186
187    match = DocsClientCL.FILE_EXT_PATTERN.match(file_path)
188    if match:
189      export_format = match.group(1)
190      # Hack for apps-api-issues Issue 2294
191      if export_format.lower() == 'html':
192        export_format = '102'
193      extra_params['exportFormat'] = export_format
194      # Fix issue with new-style docs always downloading to PDF
195      # (gdata-issues Issue 2157)
196      extra_params['format'] = export_format
197
198    if gid is not None:
199      extra_params['gid'] = gid
200
201    if not hasattr(entry, 'content'):
202      LOG.fatal("This shouldn't happen.  Export called with invalid entry")
203
204    # Sigh, more changes in python gdata 2.0.15. Has download_resource but not
205    # download.
206    if hasattr(self, 'download'):
207      self.download(entry, file_path, extra_params,
208                    auth_token=auth_token, **kwargs)
209    elif hasattr(self, 'download_resource'):
210      self.download_resource(entry, file_path, extra_params,
211                             **kwargs)
212    else:
213      LOG.fatal("Something is screwed up with python gdata.")
214
215  Export = export
216
217  def get_doclist(self, titles=None, folder_entry_list=None):
218    """Get a list of document entries from a feed.
219
220    Keyword arguments:
221      titles: list or string Title(s) of entries to return. Will be compared
222             to entry.title.text, using regular expressions if self.use_regex.
223             Default None for all entries from feed.
224      folder_entry_list: List of GDataEntry's of folders to get from.
225             Only files found in these folders will be returned.
226             Default None for all folders.
227
228    Returns:
229      List of entries.
230
231    """
232    if folder_entry_list:
233      entries = []
234      for folder in folder_entry_list:
235        # folder.content.src is the uri to query for documents in that folder.
236        entries.extend(self.GetEntries(folder.content.src,
237                                       titles,
238                                       desired_class=self._doclist_class()))
239    else:
240      entries = self.GetEntries(DocsClientCL.DOCLIST_FEED_URI,
241                                titles,
242                                desired_class=self._doclist_class())
243
244    return entries
245
246  def get_single_doc(self, title=None, folder_entry_list=None):
247    """Return exactly one doc_entry.
248
249    Keyword arguments:
250      title: Title to match on for document. Default None for any title.
251      folder_entry_list: GDataEntry of folders to look in.
252                         Default None for any folder.
253
254    Returns:
255      None if there were no matches, or one entry matching the given title.
256
257    """
258    if folder_entry_list:
259      if len(folder_entry_list) == 1:
260        return self.GetSingleEntry(folder_entry_list[0].content.src,
261                                   title,
262                                   desired_class=self._doclist_class())
263      else:
264        entries = self.get_doclist(title, folder_entry_list)
265        # Technically don't need the desired_class for this call
266        # because we have the entries.
267        return self.GetSingleEntry(entries, title)
268    else:
269      return self.GetSingleEntry(DocsClientCL.DOCLIST_FEED_URI,
270                                 title,
271                                 desired_class=self._doclist_class())
272
273  GetSingleDoc = get_single_doc
274
275  def get_folder(self, title):
276    """Return entries for one or more folders.
277
278    Keyword arguments:
279      title: Title of the folder.
280
281    Returns:
282      GDataEntry representing a folder, or None of title is None.
283
284    """
285    if title:
286      uri = DocsClientCL.DOCLIST_FEED_URI + '/-/folder'
287      folder_entries = self.GetEntries(uri, title)
288      if not folder_entries:
289        LOG.warning('No folder found that matches ' + title)
290      return folder_entries
291    else:
292      return None
293
294  GetFolder = get_folder
295
296  def is_token_valid(self, test_uri=None):
297    """Check that the token being used is valid."""
298    if not test_uri:
299      docs_uri = DocsClientCL.DOCLIST_FEED_URI
300      sheets_uri = ('https://spreadsheets.google.com/feeds/spreadsheets'
301                    '/private/full')
302    docs_test = googlecl.client.BaseClientCL.IsTokenValid(self, docs_uri)
303    sheets_test = googlecl.client.BaseClientCL.IsTokenValid(self, sheets_uri)
304    return docs_test and sheets_test
305
306  IsTokenValid = is_token_valid
307
308  def _modify_entry(self, doc_entry, path_to_new_content, file_ext):
309    """Replace content of a DocEntry.
310
311    Args:
312      doc_entry: DocEntry whose content will be replaced.
313      path_to_new_content: str Path to file that has new content.
314      file_ext: str Extension to use to determine MIME type of upload
315                (e.g. 'txt', 'doc')
316
317    """
318    try:
319      content_type = DocsClientCL.MIMETYPES[file_ext.upper()]
320    except KeyError:
321      print 'Could not find mimetype for ' + file_ext
322      while file_ext not in DocsClientCL.MIMETYPES.keys():
323        file_ext = raw_input('Please enter one of ' +
324                                DocsClientCL.MIMETYPES.keys() +
325                                ' to determine the content type to upload as.')
326      content_type = DocsClientCL.MIMETYPES[file_ext.upper()]
327    mediasource = gdata.data.MediaSource(file_path=path_to_new_content,
328                                         content_type=content_type)
329    return self.Update(doc_entry, media_source=mediasource)
330
331  def request_access(self, domain, display_name, scopes=None, browser=None):
332    """Request access as in BaseClientCL, but specify scopes."""
333    # When people use docs (writely), they expect access to
334    # spreadsheets as well (wise).
335    if not scopes:
336      scopes = gdata.gauth.AUTH_SCOPES['writely'] +\
337               gdata.gauth.AUTH_SCOPES['wise']
338    return googlecl.client.BaseClientCL.request_access(self, domain,
339                                                       display_name,
340                                                       scopes=scopes,
341                                                       browser=browser)
342
343  RequestAccess = request_access
344
345  def _transmit_doc(self, path, entry_title, post_uri, content_type, file_ext):
346    """Upload a document.
347
348    The final step in uploading a document. The process differs between versions
349    of the gdata python client library, hence its definition here.
350
351    Args:
352      path: Path to the file to upload.
353      entry_title: Name of the document.
354      post_uri: URI to make request to.
355      content_type: MIME type of request.
356      file_ext: File extension that determined the content_type.
357
358    Returns:
359      Entry representing the document uploaded.
360    """
361
362    # GoogleCL that uses gdata-2.0.0 through 2.0.4 won't ever see this code.
363    # If it uses gdata-2.0.5 through 2.0.7, it would otherwise give an error
364    # about a resumable uploader that it doesn't have. This avoids that error.
365    # If it uses gdata-2.0.8, 2.0.9, or 2.0.11 it can't upload docs due to an SSL error.
366    # If it uses gdata-2.0.10, 2.0.12, or later, this should allow it to
367    # upload all allowable file types.
368
369    if hasattr(gdata.client,"ResumableUploader"):
370      f = open(path)
371      file_size = os.path.getsize(f.name)
372      uploader = gdata.client.ResumableUploader(
373          self, f, content_type, file_size, chunk_size=1048576,
374          desired_class=gdata.data.GDEntry)
375
376      # Set metadata for our upload.
377      entry = gdata.data.GDEntry(title=atom.data.Title(text=entry_title))
378      new_entry = uploader.UploadFile('/feeds/upload/create-session/default/private/full', entry=entry)
379      # These might be useful for a verbose debug statement:
380      # print 'Document uploaded: ' + new_entry.title.text
381      # print 'Quota used: %s' % new_entry.quota_bytes_used.text
382      f.close()
383
384      return new_entry
385
386    else:
387      # If we have reached this point, we must be in gdata-2.0.5 through 2.0.7
388      # The upload is guaranteed to fail, so the self.upload call is here to
389      # return whatever the caller wanted.
390      return self.upload(path, entry_title, post_uri, content_type)
391
392SERVICE_CLASS = DocsClientCL