PageRenderTime 64ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/src/googlecl/docs/client.py

http://googlecl.googlecode.com/
Python | 392 lines | 355 code | 4 blank | 33 comment | 1 complexity | 9c79e7cc09664accebb2f163a039d72d MD5 | raw file
  1. # Copyright (C) 2010 Google Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Service details and instances for the Docs service using GData 3.0.
  15. Some use cases:
  16. Upload a document:
  17. docs upload --folder "Some folder" path_to_doc
  18. Edit a document in your word editor:
  19. docs edit --title "Grocery List" --editor vim (editor also set in prefs)
  20. Download docs:
  21. docs get --folder "Some folder"
  22. """
  23. from __future__ import with_statement
  24. __author__ = 'tom.h.miller@gmail.com (Tom Miller)'
  25. import gdata.docs.client
  26. import logging
  27. import os
  28. import re
  29. import shutil
  30. import googlecl
  31. import googlecl.client
  32. from googlecl.docs import SECTION_HEADER
  33. import googlecl.docs.base
  34. import atom.data
  35. LOG = logging.getLogger(googlecl.docs.LOGGER_NAME + '.client')
  36. class DocsClientCL(gdata.docs.client.DocsClient,
  37. googlecl.docs.base.DocsBaseCL,
  38. googlecl.client.BaseClientCL):
  39. """Extends gdata.docs.client.DocsClient for the command line.
  40. This class adds some features focused on using Google Docs via an installed
  41. app with a command line interface.
  42. """
  43. # Versions 2.0.5-2.0.14 of python gdata included a DOCLIST_FEED_URI variable,
  44. # but 2.0.15 removed it, so we hard code it here.
  45. DOCLIST_FEED_URI = '/feeds/default/private/full'
  46. # Another casualty in 2.0.15.
  47. FILE_EXT_PATTERN = re.compile('.*\.([a-zA-Z]{3,}$)')
  48. # File extension/mimetype pairs of common format.
  49. # These seem to have disappeared in python-gdata 2.0.15 and 2.0.16, so here
  50. # they are given explicitly.
  51. MIMETYPES = {
  52. 'CSV': 'text/csv',
  53. 'TSV': 'text/tab-separated-values',
  54. 'TAB': 'text/tab-separated-values',
  55. 'DOC': 'application/msword',
  56. 'DOCX': ('application/vnd.openxmlformats-officedocument.'
  57. 'wordprocessingml.document'),
  58. 'ODS': 'application/x-vnd.oasis.opendocument.spreadsheet',
  59. 'ODT': 'application/vnd.oasis.opendocument.text',
  60. 'RTF': 'application/rtf',
  61. 'SXW': 'application/vnd.sun.xml.writer',
  62. 'TXT': 'text/plain',
  63. 'XLS': 'application/vnd.ms-excel',
  64. 'XLSX': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
  65. 'PDF': 'application/pdf',
  66. 'PNG': 'image/png',
  67. 'PPT': 'application/vnd.ms-powerpoint',
  68. 'PPS': 'application/vnd.ms-powerpoint',
  69. 'HTM': 'text/html',
  70. 'HTML': 'text/html',
  71. 'ZIP': 'application/zip',
  72. 'SWF': 'application/x-shockwave-flash'
  73. }
  74. def __init__(self, config):
  75. """Constructor."""
  76. gdata.docs.client.DocsClient.__init__(self, source='GoogleCL')
  77. googlecl.client.BaseClientCL.__init__(self, SECTION_HEADER, config)
  78. # Python gdata 2.0.15 drastically changed the API, including renaming
  79. # gdata.docs.data.DocList to ResourceFeed.
  80. def _doclist_class(self):
  81. if (hasattr(gdata.docs.data, 'ResourceFeed')):
  82. return gdata.docs.data.ResourceFeed
  83. else:
  84. return gdata.docs.data.DocList
  85. def _create_folder(self, title, folder_or_uri):
  86. """Wrapper function to mesh with DocsBaseCL.upload_docs()."""
  87. return self.create(gdata.docs.data.FOLDER_LABEL, title,
  88. folder_or_uri)
  89. def _determine_content_type(self, file_ext):
  90. if file_ext is None:
  91. LOG.info('No supported filetype found as the extension is not provided')
  92. return None
  93. try:
  94. return DocsClientCL.MIMETYPES[file_ext.upper()]
  95. except KeyError:
  96. LOG.info('No supported filetype found for extension %s', file_ext)
  97. return None
  98. def _download_file(self, uri, file_path, auth_token=None, **kwargs):
  99. """Downloads a file, optionally decoding from UTF-8.
  100. Overridden from gdata.docs.client to support decoding.
  101. Args:
  102. uri: string The full Export URL to download the file from.
  103. file_path: string The full path to save the file to.
  104. auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
  105. OAuthToken which authorizes this client to edit the user's data.
  106. decode: bool (default False) Whether or not to decode UTF-8.
  107. kwargs: Other parameters to pass to self.get_file_content().
  108. Raises:
  109. RequestError: on error response from server.
  110. """
  111. # More undocumented changes in python gdata 2.0.15
  112. if hasattr(self, 'get_file_content'):
  113. response_string = self.get_file_content(uri, auth_token=auth_token)
  114. else:
  115. response_string = self._get_content(uri, None);
  116. response_string = response_string.replace("\r\n\r\n", "\r\n")
  117. if googlecl.docs.base.can_export(uri) and\
  118. self.config.lazy_get(SECTION_HEADER, 'decode_utf_8', False, bool):
  119. try:
  120. file_string = response_string.decode('utf-8-sig')
  121. except UnicodeError, err:
  122. LOG.debug('Could not decode: ' + str(err))
  123. file_string = response_string
  124. else:
  125. file_string = response_string
  126. with open(file_path, 'wb') as download_file:
  127. download_file.write(file_string)
  128. download_file.flush()
  129. def export(self, entry, file_path, gid=None, auth_token=None,
  130. **kwargs):
  131. """Exports a document from the Document List in a different format.
  132. Overloaded from docs.client.DocsClient to fix "always-download-as-pdf"
  133. issue
  134. Args:
  135. entry: An entry object specifying the document to be exported.
  136. Formerly, this was entry_or_id_or_url: a
  137. gdata.data.GDEntry or string representing a
  138. resource id or URL to download the document from (such as the content
  139. src link). But that wreaks havoc in python gdata >2.0.15, and it was
  140. easy to ensure we only call with an actual Entry.
  141. file_path: str The full path to save the file to. The export
  142. format is inferred from the the file extension.
  143. gid: str (optional) grid id for downloading a single grid of a
  144. spreadsheet. The param should only be used for .csv and .tsv
  145. spreadsheet exports.
  146. auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
  147. OAuthToken which authorizes this client to edit the user's data.
  148. kwargs: Other parameters to pass to self.download().
  149. Raises:
  150. gdata.client.RequestError if the download URL is malformed or the server's
  151. response was not successful.
  152. """
  153. extra_params = {}
  154. match = DocsClientCL.FILE_EXT_PATTERN.match(file_path)
  155. if match:
  156. export_format = match.group(1)
  157. # Hack for apps-api-issues Issue 2294
  158. if export_format.lower() == 'html':
  159. export_format = '102'
  160. extra_params['exportFormat'] = export_format
  161. # Fix issue with new-style docs always downloading to PDF
  162. # (gdata-issues Issue 2157)
  163. extra_params['format'] = export_format
  164. if gid is not None:
  165. extra_params['gid'] = gid
  166. if not hasattr(entry, 'content'):
  167. LOG.fatal("This shouldn't happen. Export called with invalid entry")
  168. # Sigh, more changes in python gdata 2.0.15. Has download_resource but not
  169. # download.
  170. if hasattr(self, 'download'):
  171. self.download(entry, file_path, extra_params,
  172. auth_token=auth_token, **kwargs)
  173. elif hasattr(self, 'download_resource'):
  174. self.download_resource(entry, file_path, extra_params,
  175. **kwargs)
  176. else:
  177. LOG.fatal("Something is screwed up with python gdata.")
  178. Export = export
  179. def get_doclist(self, titles=None, folder_entry_list=None):
  180. """Get a list of document entries from a feed.
  181. Keyword arguments:
  182. titles: list or string Title(s) of entries to return. Will be compared
  183. to entry.title.text, using regular expressions if self.use_regex.
  184. Default None for all entries from feed.
  185. folder_entry_list: List of GDataEntry's of folders to get from.
  186. Only files found in these folders will be returned.
  187. Default None for all folders.
  188. Returns:
  189. List of entries.
  190. """
  191. if folder_entry_list:
  192. entries = []
  193. for folder in folder_entry_list:
  194. # folder.content.src is the uri to query for documents in that folder.
  195. entries.extend(self.GetEntries(folder.content.src,
  196. titles,
  197. desired_class=self._doclist_class()))
  198. else:
  199. entries = self.GetEntries(DocsClientCL.DOCLIST_FEED_URI,
  200. titles,
  201. desired_class=self._doclist_class())
  202. return entries
  203. def get_single_doc(self, title=None, folder_entry_list=None):
  204. """Return exactly one doc_entry.
  205. Keyword arguments:
  206. title: Title to match on for document. Default None for any title.
  207. folder_entry_list: GDataEntry of folders to look in.
  208. Default None for any folder.
  209. Returns:
  210. None if there were no matches, or one entry matching the given title.
  211. """
  212. if folder_entry_list:
  213. if len(folder_entry_list) == 1:
  214. return self.GetSingleEntry(folder_entry_list[0].content.src,
  215. title,
  216. desired_class=self._doclist_class())
  217. else:
  218. entries = self.get_doclist(title, folder_entry_list)
  219. # Technically don't need the desired_class for this call
  220. # because we have the entries.
  221. return self.GetSingleEntry(entries, title)
  222. else:
  223. return self.GetSingleEntry(DocsClientCL.DOCLIST_FEED_URI,
  224. title,
  225. desired_class=self._doclist_class())
  226. GetSingleDoc = get_single_doc
  227. def get_folder(self, title):
  228. """Return entries for one or more folders.
  229. Keyword arguments:
  230. title: Title of the folder.
  231. Returns:
  232. GDataEntry representing a folder, or None of title is None.
  233. """
  234. if title:
  235. uri = DocsClientCL.DOCLIST_FEED_URI + '/-/folder'
  236. folder_entries = self.GetEntries(uri, title)
  237. if not folder_entries:
  238. LOG.warning('No folder found that matches ' + title)
  239. return folder_entries
  240. else:
  241. return None
  242. GetFolder = get_folder
  243. def is_token_valid(self, test_uri=None):
  244. """Check that the token being used is valid."""
  245. if not test_uri:
  246. docs_uri = DocsClientCL.DOCLIST_FEED_URI
  247. sheets_uri = ('https://spreadsheets.google.com/feeds/spreadsheets'
  248. '/private/full')
  249. docs_test = googlecl.client.BaseClientCL.IsTokenValid(self, docs_uri)
  250. sheets_test = googlecl.client.BaseClientCL.IsTokenValid(self, sheets_uri)
  251. return docs_test and sheets_test
  252. IsTokenValid = is_token_valid
  253. def _modify_entry(self, doc_entry, path_to_new_content, file_ext):
  254. """Replace content of a DocEntry.
  255. Args:
  256. doc_entry: DocEntry whose content will be replaced.
  257. path_to_new_content: str Path to file that has new content.
  258. file_ext: str Extension to use to determine MIME type of upload
  259. (e.g. 'txt', 'doc')
  260. """
  261. try:
  262. content_type = DocsClientCL.MIMETYPES[file_ext.upper()]
  263. except KeyError:
  264. print 'Could not find mimetype for ' + file_ext
  265. while file_ext not in DocsClientCL.MIMETYPES.keys():
  266. file_ext = raw_input('Please enter one of ' +
  267. DocsClientCL.MIMETYPES.keys() +
  268. ' to determine the content type to upload as.')
  269. content_type = DocsClientCL.MIMETYPES[file_ext.upper()]
  270. mediasource = gdata.data.MediaSource(file_path=path_to_new_content,
  271. content_type=content_type)
  272. return self.Update(doc_entry, media_source=mediasource)
  273. def request_access(self, domain, display_name, scopes=None, browser=None):
  274. """Request access as in BaseClientCL, but specify scopes."""
  275. # When people use docs (writely), they expect access to
  276. # spreadsheets as well (wise).
  277. if not scopes:
  278. scopes = gdata.gauth.AUTH_SCOPES['writely'] +\
  279. gdata.gauth.AUTH_SCOPES['wise']
  280. return googlecl.client.BaseClientCL.request_access(self, domain,
  281. display_name,
  282. scopes=scopes,
  283. browser=browser)
  284. RequestAccess = request_access
  285. def _transmit_doc(self, path, entry_title, post_uri, content_type, file_ext):
  286. """Upload a document.
  287. The final step in uploading a document. The process differs between versions
  288. of the gdata python client library, hence its definition here.
  289. Args:
  290. path: Path to the file to upload.
  291. entry_title: Name of the document.
  292. post_uri: URI to make request to.
  293. content_type: MIME type of request.
  294. file_ext: File extension that determined the content_type.
  295. Returns:
  296. Entry representing the document uploaded.
  297. """
  298. # GoogleCL that uses gdata-2.0.0 through 2.0.4 won't ever see this code.
  299. # If it uses gdata-2.0.5 through 2.0.7, it would otherwise give an error
  300. # about a resumable uploader that it doesn't have. This avoids that error.
  301. # If it uses gdata-2.0.8, 2.0.9, or 2.0.11 it can't upload docs due to an SSL error.
  302. # If it uses gdata-2.0.10, 2.0.12, or later, this should allow it to
  303. # upload all allowable file types.
  304. if hasattr(gdata.client,"ResumableUploader"):
  305. f = open(path)
  306. file_size = os.path.getsize(f.name)
  307. uploader = gdata.client.ResumableUploader(
  308. self, f, content_type, file_size, chunk_size=1048576,
  309. desired_class=gdata.data.GDEntry)
  310. # Set metadata for our upload.
  311. entry = gdata.data.GDEntry(title=atom.data.Title(text=entry_title))
  312. new_entry = uploader.UploadFile('/feeds/upload/create-session/default/private/full', entry=entry)
  313. # These might be useful for a verbose debug statement:
  314. # print 'Document uploaded: ' + new_entry.title.text
  315. # print 'Quota used: %s' % new_entry.quota_bytes_used.text
  316. f.close()
  317. return new_entry
  318. else:
  319. # If we have reached this point, we must be in gdata-2.0.5 through 2.0.7
  320. # The upload is guaranteed to fail, so the self.upload call is here to
  321. # return whatever the caller wanted.
  322. return self.upload(path, entry_title, post_uri, content_type)
  323. SERVICE_CLASS = DocsClientCL