PageRenderTime 56ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/gdata/docs/service.py

https://gitlab.com/karambir/gdata
Python | 618 lines | 484 code | 23 blank | 111 comment | 12 complexity | a7e65cab9350e509a8865888c2d88260 MD5 | raw file
  1. #!/usr/bin/python
  2. #
  3. # Copyright 2009 Google Inc. All Rights Reserved.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """DocsService extends the GDataService to streamline Google Documents
  17. operations.
  18. DocsService: Provides methods to query feeds and manipulate items.
  19. Extends GDataService.
  20. DocumentQuery: Queries a Google Document list feed.
  21. DocumentAclQuery: Queries a Google Document Acl feed.
  22. """
  23. __author__ = ('api.jfisher (Jeff Fisher), '
  24. 'e.bidelman (Eric Bidelman)')
  25. import re
  26. import atom
  27. import gdata.service
  28. import gdata.docs
  29. import urllib
  30. # XML Namespaces used in Google Documents entities.
  31. DATA_KIND_SCHEME = gdata.GDATA_NAMESPACE + '#kind'
  32. DOCUMENT_LABEL = 'document'
  33. SPREADSHEET_LABEL = 'spreadsheet'
  34. PRESENTATION_LABEL = 'presentation'
  35. FOLDER_LABEL = 'folder'
  36. PDF_LABEL = 'pdf'
  37. LABEL_SCHEME = gdata.GDATA_NAMESPACE + '/labels'
  38. STARRED_LABEL_TERM = LABEL_SCHEME + '#starred'
  39. TRASHED_LABEL_TERM = LABEL_SCHEME + '#trashed'
  40. HIDDEN_LABEL_TERM = LABEL_SCHEME + '#hidden'
  41. MINE_LABEL_TERM = LABEL_SCHEME + '#mine'
  42. PRIVATE_LABEL_TERM = LABEL_SCHEME + '#private'
  43. SHARED_WITH_DOMAIN_LABEL_TERM = LABEL_SCHEME + '#shared-with-domain'
  44. VIEWED_LABEL_TERM = LABEL_SCHEME + '#viewed'
  45. FOLDERS_SCHEME_PREFIX = gdata.docs.DOCUMENTS_NAMESPACE + '/folders/'
  46. # File extensions of documents that are permitted to be uploaded or downloaded.
  47. SUPPORTED_FILETYPES = {
  48. 'CSV': 'text/csv',
  49. 'TSV': 'text/tab-separated-values',
  50. 'TAB': 'text/tab-separated-values',
  51. 'DOC': 'application/msword',
  52. 'DOCX': ('application/vnd.openxmlformats-officedocument.'
  53. 'wordprocessingml.document'),
  54. 'ODS': 'application/x-vnd.oasis.opendocument.spreadsheet',
  55. 'ODT': 'application/vnd.oasis.opendocument.text',
  56. 'RTF': 'application/rtf',
  57. 'SXW': 'application/vnd.sun.xml.writer',
  58. 'TXT': 'text/plain',
  59. 'XLS': 'application/vnd.ms-excel',
  60. 'XLSX': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
  61. 'PDF': 'application/pdf',
  62. 'PNG': 'image/png',
  63. 'PPT': 'application/vnd.ms-powerpoint',
  64. 'PPS': 'application/vnd.ms-powerpoint',
  65. 'HTM': 'text/html',
  66. 'HTML': 'text/html',
  67. 'ZIP': 'application/zip',
  68. 'SWF': 'application/x-shockwave-flash'
  69. }
  70. class DocsService(gdata.service.GDataService):
  71. """Client extension for the Google Documents service Document List feed."""
  72. __FILE_EXT_PATTERN = re.compile('.*\.([a-zA-Z]{3,}$)')
  73. __RESOURCE_ID_PATTERN = re.compile('^([a-z]*)(:|%3A)([\w-]*)$')
  74. def __init__(self, email=None, password=None, source=None,
  75. server='docs.google.com', additional_headers=None, **kwargs):
  76. """Creates a client for the Google Documents service.
  77. Args:
  78. email: string (optional) The user's email address, used for
  79. authentication.
  80. password: string (optional) The user's password.
  81. source: string (optional) The name of the user's application.
  82. server: string (optional) The name of the server to which a connection
  83. will be opened. Default value: 'docs.google.com'.
  84. **kwargs: The other parameters to pass to gdata.service.GDataService
  85. constructor.
  86. """
  87. gdata.service.GDataService.__init__(
  88. self, email=email, password=password, service='writely', source=source,
  89. server=server, additional_headers=additional_headers, **kwargs)
  90. self.ssl = True
  91. def _MakeKindCategory(self, label):
  92. if label is None:
  93. return None
  94. return atom.Category(scheme=DATA_KIND_SCHEME,
  95. term=gdata.docs.DOCUMENTS_NAMESPACE + '#' + label, label=label)
  96. def _MakeContentLinkFromId(self, resource_id):
  97. match = self.__RESOURCE_ID_PATTERN.match(resource_id)
  98. label = match.group(1)
  99. doc_id = match.group(3)
  100. if label == DOCUMENT_LABEL:
  101. return '/feeds/download/documents/Export?docId=%s' % doc_id
  102. if label == PRESENTATION_LABEL:
  103. return '/feeds/download/presentations/Export?docId=%s' % doc_id
  104. if label == SPREADSHEET_LABEL:
  105. return ('https://spreadsheets.google.com/feeds/download/spreadsheets/'
  106. 'Export?key=%s' % doc_id)
  107. raise ValueError, 'Invalid resource id: %s' % resource_id
  108. def _UploadFile(self, media_source, title, category, folder_or_uri=None):
  109. """Uploads a file to the Document List feed.
  110. Args:
  111. media_source: A gdata.MediaSource object containing the file to be
  112. uploaded.
  113. title: string The title of the document on the server after being
  114. uploaded.
  115. category: An atom.Category object specifying the appropriate document
  116. type.
  117. folder_or_uri: DocumentListEntry or string (optional) An object with a
  118. link to a folder or a uri to a folder to upload to.
  119. Note: A valid uri for a folder is of the form:
  120. /feeds/folders/private/full/folder%3Afolder_id
  121. Returns:
  122. A DocumentListEntry containing information about the document created on
  123. the Google Documents service.
  124. """
  125. if folder_or_uri:
  126. try:
  127. uri = folder_or_uri.content.src
  128. except AttributeError:
  129. uri = folder_or_uri
  130. else:
  131. uri = '/feeds/documents/private/full'
  132. entry = gdata.docs.DocumentListEntry()
  133. entry.title = atom.Title(text=title)
  134. if category is not None:
  135. entry.category.append(category)
  136. entry = self.Post(entry, uri, media_source=media_source,
  137. extra_headers={'Slug': media_source.file_name},
  138. converter=gdata.docs.DocumentListEntryFromString)
  139. return entry
  140. def _DownloadFile(self, uri, file_path):
  141. """Downloads a file.
  142. Args:
  143. uri: string The full Export URL to download the file from.
  144. file_path: string The full path to save the file to.
  145. Raises:
  146. RequestError: on error response from server.
  147. """
  148. server_response = self.request('GET', uri)
  149. response_body = server_response.read()
  150. timeout = 5
  151. while server_response.status == 302 and timeout > 0:
  152. server_response = self.request('GET',
  153. server_response.getheader('Location'))
  154. response_body = server_response.read()
  155. timeout -= 1
  156. if server_response.status != 200:
  157. raise gdata.service.RequestError, {'status': server_response.status,
  158. 'reason': server_response.reason,
  159. 'body': response_body}
  160. f = open(file_path, 'wb')
  161. f.write(response_body)
  162. f.flush()
  163. f.close()
  164. def MoveIntoFolder(self, source_entry, folder_entry):
  165. """Moves a document into a folder in the Document List Feed.
  166. Args:
  167. source_entry: DocumentListEntry An object representing the source
  168. document/folder.
  169. folder_entry: DocumentListEntry An object with a link to the destination
  170. folder.
  171. Returns:
  172. A DocumentListEntry containing information about the document created on
  173. the Google Documents service.
  174. """
  175. entry = gdata.docs.DocumentListEntry()
  176. entry.id = source_entry.id
  177. entry = self.Post(entry, folder_entry.content.src,
  178. converter=gdata.docs.DocumentListEntryFromString)
  179. return entry
  180. def Query(self, uri, converter=gdata.docs.DocumentListFeedFromString):
  181. """Queries the Document List feed and returns the resulting feed of
  182. entries.
  183. Args:
  184. uri: string The full URI to be queried. This can contain query
  185. parameters, a hostname, or simply the relative path to a Document
  186. List feed. The DocumentQuery object is useful when constructing
  187. query parameters.
  188. converter: func (optional) A function which will be executed on the
  189. retrieved item, generally to render it into a Python object.
  190. By default the DocumentListFeedFromString function is used to
  191. return a DocumentListFeed object. This is because most feed
  192. queries will result in a feed and not a single entry.
  193. """
  194. return self.Get(uri, converter=converter)
  195. def QueryDocumentListFeed(self, uri):
  196. """Retrieves a DocumentListFeed by retrieving a URI based off the Document
  197. List feed, including any query parameters. A DocumentQuery object can
  198. be used to construct these parameters.
  199. Args:
  200. uri: string The URI of the feed being retrieved possibly with query
  201. parameters.
  202. Returns:
  203. A DocumentListFeed object representing the feed returned by the server.
  204. """
  205. return self.Get(uri, converter=gdata.docs.DocumentListFeedFromString)
  206. def GetDocumentListEntry(self, uri):
  207. """Retrieves a particular DocumentListEntry by its unique URI.
  208. Args:
  209. uri: string The unique URI of an entry in a Document List feed.
  210. Returns:
  211. A DocumentListEntry object representing the retrieved entry.
  212. """
  213. return self.Get(uri, converter=gdata.docs.DocumentListEntryFromString)
  214. def GetDocumentListFeed(self, uri=None):
  215. """Retrieves a feed containing all of a user's documents.
  216. Args:
  217. uri: string A full URI to query the Document List feed.
  218. """
  219. if not uri:
  220. uri = gdata.docs.service.DocumentQuery().ToUri()
  221. return self.QueryDocumentListFeed(uri)
  222. def GetDocumentListAclEntry(self, uri):
  223. """Retrieves a particular DocumentListAclEntry by its unique URI.
  224. Args:
  225. uri: string The unique URI of an entry in a Document List feed.
  226. Returns:
  227. A DocumentListAclEntry object representing the retrieved entry.
  228. """
  229. return self.Get(uri, converter=gdata.docs.DocumentListAclEntryFromString)
  230. def GetDocumentListAclFeed(self, uri):
  231. """Retrieves a feed containing all of a user's documents.
  232. Args:
  233. uri: string The URI of a document's Acl feed to retrieve.
  234. Returns:
  235. A DocumentListAclFeed object representing the ACL feed
  236. returned by the server.
  237. """
  238. return self.Get(uri, converter=gdata.docs.DocumentListAclFeedFromString)
  239. def Upload(self, media_source, title, folder_or_uri=None, label=None):
  240. """Uploads a document inside of a MediaSource object to the Document List
  241. feed with the given title.
  242. Args:
  243. media_source: MediaSource The gdata.MediaSource object containing a
  244. document file to be uploaded.
  245. title: string The title of the document on the server after being
  246. uploaded.
  247. folder_or_uri: DocumentListEntry or string (optional) An object with a
  248. link to a folder or a uri to a folder to upload to.
  249. Note: A valid uri for a folder is of the form:
  250. /feeds/folders/private/full/folder%3Afolder_id
  251. label: optional label describing the type of the document to be created.
  252. Returns:
  253. A DocumentListEntry containing information about the document created
  254. on the Google Documents service.
  255. """
  256. return self._UploadFile(media_source, title, self._MakeKindCategory(label),
  257. folder_or_uri)
  258. def Download(self, entry_or_id_or_url, file_path, export_format=None,
  259. gid=None, extra_params=None):
  260. """Downloads a document from the Document List.
  261. Args:
  262. entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry,
  263. or a url to download from (such as the content src).
  264. file_path: string The full path to save the file to.
  265. export_format: the format to convert to, if conversion is required.
  266. gid: grid id, for downloading a single grid of a spreadsheet
  267. extra_params: a map of any further parameters to control how the document
  268. is downloaded
  269. Raises:
  270. RequestError if the service does not respond with success
  271. """
  272. if isinstance(entry_or_id_or_url, gdata.docs.DocumentListEntry):
  273. url = entry_or_id_or_url.content.src
  274. else:
  275. if self.__RESOURCE_ID_PATTERN.match(entry_or_id_or_url):
  276. url = self._MakeContentLinkFromId(entry_or_id_or_url)
  277. else:
  278. url = entry_or_id_or_url
  279. if export_format is not None:
  280. if url.find('/Export?') == -1:
  281. raise gdata.service.Error, ('This entry cannot be exported '
  282. 'as a different format')
  283. url += '&exportFormat=%s' % export_format
  284. if gid is not None:
  285. if url.find('spreadsheets') == -1:
  286. raise gdata.service.Error, 'grid id param is not valid for this entry'
  287. url += '&gid=%s' % gid
  288. if extra_params:
  289. url += '&' + urllib.urlencode(extra_params)
  290. self._DownloadFile(url, file_path)
  291. def Export(self, entry_or_id_or_url, file_path, gid=None, extra_params=None):
  292. """Downloads a document from the Document List in a different format.
  293. Args:
  294. entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry,
  295. or a url to download from (such as the content src).
  296. file_path: string The full path to save the file to. The export
  297. format is inferred from the the file extension.
  298. gid: grid id, for downloading a single grid of a spreadsheet
  299. extra_params: a map of any further parameters to control how the document
  300. is downloaded
  301. Raises:
  302. RequestError if the service does not respond with success
  303. """
  304. ext = None
  305. match = self.__FILE_EXT_PATTERN.match(file_path)
  306. if match:
  307. ext = match.group(1)
  308. self.Download(entry_or_id_or_url, file_path, ext, gid, extra_params)
  309. def CreateFolder(self, title, folder_or_uri=None):
  310. """Creates a folder in the Document List feed.
  311. Args:
  312. title: string The title of the folder on the server after being created.
  313. folder_or_uri: DocumentListEntry or string (optional) An object with a
  314. link to a folder or a uri to a folder to upload to.
  315. Note: A valid uri for a folder is of the form:
  316. /feeds/folders/private/full/folder%3Afolder_id
  317. Returns:
  318. A DocumentListEntry containing information about the folder created on
  319. the Google Documents service.
  320. """
  321. if folder_or_uri:
  322. try:
  323. uri = folder_or_uri.content.src
  324. except AttributeError:
  325. uri = folder_or_uri
  326. else:
  327. uri = '/feeds/documents/private/full'
  328. folder_entry = gdata.docs.DocumentListEntry()
  329. folder_entry.title = atom.Title(text=title)
  330. folder_entry.category.append(self._MakeKindCategory(FOLDER_LABEL))
  331. folder_entry = self.Post(folder_entry, uri,
  332. converter=gdata.docs.DocumentListEntryFromString)
  333. return folder_entry
  334. def MoveOutOfFolder(self, source_entry):
  335. """Moves a document into a folder in the Document List Feed.
  336. Args:
  337. source_entry: DocumentListEntry An object representing the source
  338. document/folder.
  339. Returns:
  340. True if the entry was moved out.
  341. """
  342. return self.Delete(source_entry.GetEditLink().href)
  343. # Deprecated methods
  344. #@atom.deprecated('Please use Upload instead')
  345. def UploadPresentation(self, media_source, title, folder_or_uri=None):
  346. """Uploads a presentation inside of a MediaSource object to the Document
  347. List feed with the given title.
  348. This method is deprecated, use Upload instead.
  349. Args:
  350. media_source: MediaSource The MediaSource object containing a
  351. presentation file to be uploaded.
  352. title: string The title of the presentation on the server after being
  353. uploaded.
  354. folder_or_uri: DocumentListEntry or string (optional) An object with a
  355. link to a folder or a uri to a folder to upload to.
  356. Note: A valid uri for a folder is of the form:
  357. /feeds/folders/private/full/folder%3Afolder_id
  358. Returns:
  359. A DocumentListEntry containing information about the presentation created
  360. on the Google Documents service.
  361. """
  362. return self._UploadFile(
  363. media_source, title, self._MakeKindCategory(PRESENTATION_LABEL),
  364. folder_or_uri=folder_or_uri)
  365. UploadPresentation = atom.deprecated('Please use Upload instead')(
  366. UploadPresentation)
  367. #@atom.deprecated('Please use Upload instead')
  368. def UploadSpreadsheet(self, media_source, title, folder_or_uri=None):
  369. """Uploads a spreadsheet inside of a MediaSource object to the Document
  370. List feed with the given title.
  371. This method is deprecated, use Upload instead.
  372. Args:
  373. media_source: MediaSource The MediaSource object containing a spreadsheet
  374. file to be uploaded.
  375. title: string The title of the spreadsheet on the server after being
  376. uploaded.
  377. folder_or_uri: DocumentListEntry or string (optional) An object with a
  378. link to a folder or a uri to a folder to upload to.
  379. Note: A valid uri for a folder is of the form:
  380. /feeds/folders/private/full/folder%3Afolder_id
  381. Returns:
  382. A DocumentListEntry containing information about the spreadsheet created
  383. on the Google Documents service.
  384. """
  385. return self._UploadFile(
  386. media_source, title, self._MakeKindCategory(SPREADSHEET_LABEL),
  387. folder_or_uri=folder_or_uri)
  388. UploadSpreadsheet = atom.deprecated('Please use Upload instead')(
  389. UploadSpreadsheet)
  390. #@atom.deprecated('Please use Upload instead')
  391. def UploadDocument(self, media_source, title, folder_or_uri=None):
  392. """Uploads a document inside of a MediaSource object to the Document List
  393. feed with the given title.
  394. This method is deprecated, use Upload instead.
  395. Args:
  396. media_source: MediaSource The gdata.MediaSource object containing a
  397. document file to be uploaded.
  398. title: string The title of the document on the server after being
  399. uploaded.
  400. folder_or_uri: DocumentListEntry or string (optional) An object with a
  401. link to a folder or a uri to a folder to upload to.
  402. Note: A valid uri for a folder is of the form:
  403. /feeds/folders/private/full/folder%3Afolder_id
  404. Returns:
  405. A DocumentListEntry containing information about the document created
  406. on the Google Documents service.
  407. """
  408. return self._UploadFile(
  409. media_source, title, self._MakeKindCategory(DOCUMENT_LABEL),
  410. folder_or_uri=folder_or_uri)
  411. UploadDocument = atom.deprecated('Please use Upload instead')(
  412. UploadDocument)
  413. """Calling any of these functions is the same as calling Export"""
  414. DownloadDocument = atom.deprecated('Please use Export instead')(Export)
  415. DownloadPresentation = atom.deprecated('Please use Export instead')(Export)
  416. DownloadSpreadsheet = atom.deprecated('Please use Export instead')(Export)
  417. """Calling any of these functions is the same as calling MoveIntoFolder"""
  418. MoveDocumentIntoFolder = atom.deprecated(
  419. 'Please use MoveIntoFolder instead')(MoveIntoFolder)
  420. MovePresentationIntoFolder = atom.deprecated(
  421. 'Please use MoveIntoFolder instead')(MoveIntoFolder)
  422. MoveSpreadsheetIntoFolder = atom.deprecated(
  423. 'Please use MoveIntoFolder instead')(MoveIntoFolder)
  424. MoveFolderIntoFolder = atom.deprecated(
  425. 'Please use MoveIntoFolder instead')(MoveIntoFolder)
  426. class DocumentQuery(gdata.service.Query):
  427. """Object used to construct a URI to query the Google Document List feed"""
  428. def __init__(self, feed='/feeds/documents', visibility='private',
  429. projection='full', text_query=None, params=None,
  430. categories=None):
  431. """Constructor for Document List Query
  432. Args:
  433. feed: string (optional) The path for the feed. (e.g. '/feeds/documents')
  434. visibility: string (optional) The visibility chosen for the current feed.
  435. projection: string (optional) The projection chosen for the current feed.
  436. text_query: string (optional) The contents of the q query parameter. This
  437. string is URL escaped upon conversion to a URI.
  438. params: dict (optional) Parameter value string pairs which become URL
  439. params when translated to a URI. These parameters are added to
  440. the query's items.
  441. categories: list (optional) List of category strings which should be
  442. included as query categories. See gdata.service.Query for
  443. additional documentation.
  444. Yields:
  445. A DocumentQuery object used to construct a URI based on the Document
  446. List feed.
  447. """
  448. self.visibility = visibility
  449. self.projection = projection
  450. gdata.service.Query.__init__(self, feed, text_query, params, categories)
  451. def ToUri(self):
  452. """Generates a URI from the query parameters set in the object.
  453. Returns:
  454. A string containing the URI used to retrieve entries from the Document
  455. List feed.
  456. """
  457. old_feed = self.feed
  458. self.feed = '/'.join([old_feed, self.visibility, self.projection])
  459. new_feed = gdata.service.Query.ToUri(self)
  460. self.feed = old_feed
  461. return new_feed
  462. def AddNamedFolder(self, email, folder_name):
  463. """Adds a named folder category, qualified by a schema.
  464. This function lets you query for documents that are contained inside a
  465. named folder without fear of collision with other categories.
  466. Args:
  467. email: string The email of the user who owns the folder.
  468. folder_name: string The name of the folder.
  469. Returns:
  470. The string of the category that was added to the object.
  471. """
  472. category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name)
  473. self.categories.append(category)
  474. return category
  475. def RemoveNamedFolder(self, email, folder_name):
  476. """Removes a named folder category, qualified by a schema.
  477. Args:
  478. email: string The email of the user who owns the folder.
  479. folder_name: string The name of the folder.
  480. Returns:
  481. The string of the category that was removed to the object.
  482. """
  483. category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name)
  484. self.categories.remove(category)
  485. return category
  486. class DocumentAclQuery(gdata.service.Query):
  487. """Object used to construct a URI to query a Document's ACL feed"""
  488. def __init__(self, resource_id, feed='/feeds/acl/private/full'):
  489. """Constructor for Document ACL Query
  490. Args:
  491. resource_id: string The resource id. (e.g. 'document%3Adocument_id',
  492. 'spreadsheet%3Aspreadsheet_id', etc.)
  493. feed: string (optional) The path for the feed.
  494. (e.g. '/feeds/acl/private/full')
  495. Yields:
  496. A DocumentAclQuery object used to construct a URI based on the Document
  497. ACL feed.
  498. """
  499. self.resource_id = resource_id
  500. gdata.service.Query.__init__(self, feed)
  501. def ToUri(self):
  502. """Generates a URI from the query parameters set in the object.
  503. Returns:
  504. A string containing the URI used to retrieve entries from the Document
  505. ACL feed.
  506. """
  507. return '%s/%s' % (gdata.service.Query.ToUri(self), self.resource_id)