/gdata/docs/service.py
Python | 611 lines | 477 code | 23 blank | 111 comment | 7 complexity | b60eb6616bbd94bb7e61d9a0aa532ab6 MD5 | raw file
1#!/usr/bin/python 2# 3# Copyright 2009 Google Inc. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""DocsService extends the GDataService to streamline Google Documents 18 operations. 19 20 DocsService: Provides methods to query feeds and manipulate items. 21 Extends GDataService. 22 23 DocumentQuery: Queries a Google Document list feed. 24 25 DocumentAclQuery: Queries a Google Document Acl feed. 26""" 27 28 29__author__ = ('api.jfisher (Jeff Fisher), ' 30 'e.bidelman (Eric Bidelman)') 31 32import re 33import atom 34import gdata.service 35import gdata.docs 36import urllib 37 38# XML Namespaces used in Google Documents entities. 39DATA_KIND_SCHEME = gdata.GDATA_NAMESPACE + '#kind' 40DOCUMENT_LABEL = 'document' 41SPREADSHEET_LABEL = 'spreadsheet' 42PRESENTATION_LABEL = 'presentation' 43FOLDER_LABEL = 'folder' 44PDF_LABEL = 'pdf' 45 46LABEL_SCHEME = gdata.GDATA_NAMESPACE + '/labels' 47STARRED_LABEL_TERM = LABEL_SCHEME + '#starred' 48TRASHED_LABEL_TERM = LABEL_SCHEME + '#trashed' 49HIDDEN_LABEL_TERM = LABEL_SCHEME + '#hidden' 50MINE_LABEL_TERM = LABEL_SCHEME + '#mine' 51PRIVATE_LABEL_TERM = LABEL_SCHEME + '#private' 52SHARED_WITH_DOMAIN_LABEL_TERM = LABEL_SCHEME + '#shared-with-domain' 53VIEWED_LABEL_TERM = LABEL_SCHEME + '#viewed' 54 55FOLDERS_SCHEME_PREFIX = gdata.docs.DOCUMENTS_NAMESPACE + '/folders/' 56 57# File extensions of documents that are permitted to be uploaded or downloaded. 58SUPPORTED_FILETYPES = { 59 'CSV': 'text/csv', 60 'TSV': 'text/tab-separated-values', 61 'TAB': 'text/tab-separated-values', 62 'DOC': 'application/msword', 63 'DOCX': ('application/vnd.openxmlformats-officedocument.' 64 'wordprocessingml.document'), 65 'ODS': 'application/x-vnd.oasis.opendocument.spreadsheet', 66 'ODT': 'application/vnd.oasis.opendocument.text', 67 'RTF': 'application/rtf', 68 'SXW': 'application/vnd.sun.xml.writer', 69 'TXT': 'text/plain', 70 'XLS': 'application/vnd.ms-excel', 71 'XLSX': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 72 'PDF': 'application/pdf', 73 'PNG': 'image/png', 74 'PPT': 'application/vnd.ms-powerpoint', 75 'PPS': 'application/vnd.ms-powerpoint', 76 'HTM': 'text/html', 77 'HTML': 'text/html', 78 'ZIP': 'application/zip', 79 'SWF': 'application/x-shockwave-flash' 80 } 81 82 83class DocsService(gdata.service.GDataService): 84 85 """Client extension for the Google Documents service Document List feed.""" 86 87 __FILE_EXT_PATTERN = re.compile('.*\.([a-zA-Z]{3,}$)') 88 __RESOURCE_ID_PATTERN = re.compile('^([a-z]*)(:|%3A)([\w-]*)$') 89 90 def __init__(self, email=None, password=None, source=None, 91 server='docs.google.com', additional_headers=None, **kwargs): 92 """Creates a client for the Google Documents service. 93 94 Args: 95 email: string (optional) The user's email address, used for 96 authentication. 97 password: string (optional) The user's password. 98 source: string (optional) The name of the user's application. 99 server: string (optional) The name of the server to which a connection 100 will be opened. Default value: 'docs.google.com'. 101 **kwargs: The other parameters to pass to gdata.service.GDataService 102 constructor. 103 """ 104 gdata.service.GDataService.__init__( 105 self, email=email, password=password, service='writely', source=source, 106 server=server, additional_headers=additional_headers, **kwargs) 107 108 def _MakeKindCategory(self, label): 109 if label is None: 110 return None 111 return atom.Category(scheme=DATA_KIND_SCHEME, 112 term=gdata.docs.DOCUMENTS_NAMESPACE + '#' + label, label=label) 113 114 def _MakeContentLinkFromId(self, resource_id): 115 match = self.__RESOURCE_ID_PATTERN.match(resource_id) 116 label = match.group(1) 117 doc_id = match.group(3) 118 if label == DOCUMENT_LABEL: 119 return '/feeds/download/documents/Export?docId=%s' % doc_id 120 if label == PRESENTATION_LABEL: 121 return '/feeds/download/presentations/Export?docId=%s' % doc_id 122 if label == SPREADSHEET_LABEL: 123 return ('http://spreadsheets.google.com/feeds/download/spreadsheets/' 124 'Export?key=%s' % doc_id) 125 raise ValueError, 'Invalid resource id: %s' % resource_id 126 127 def _UploadFile(self, media_source, title, category, folder_or_uri=None): 128 """Uploads a file to the Document List feed. 129 130 Args: 131 media_source: A gdata.MediaSource object containing the file to be 132 uploaded. 133 title: string The title of the document on the server after being 134 uploaded. 135 category: An atom.Category object specifying the appropriate document 136 type. 137 folder_or_uri: DocumentListEntry or string (optional) An object with a 138 link to a folder or a uri to a folder to upload to. 139 Note: A valid uri for a folder is of the form: 140 /feeds/folders/private/full/folder%3Afolder_id 141 142 Returns: 143 A DocumentListEntry containing information about the document created on 144 the Google Documents service. 145 """ 146 if folder_or_uri: 147 try: 148 uri = folder_or_uri.content.src 149 except AttributeError: 150 uri = folder_or_uri 151 else: 152 uri = '/feeds/documents/private/full' 153 154 entry = gdata.docs.DocumentListEntry() 155 entry.title = atom.Title(text=title) 156 if category is not None: 157 entry.category.append(category) 158 entry = self.Post(entry, uri, media_source=media_source, 159 extra_headers={'Slug': media_source.file_name}, 160 converter=gdata.docs.DocumentListEntryFromString) 161 return entry 162 163 def _DownloadFile(self, uri, file_path): 164 """Downloads a file. 165 166 Args: 167 uri: string The full Export URL to download the file from. 168 file_path: string The full path to save the file to. 169 170 Raises: 171 RequestError: on error response from server. 172 """ 173 server_response = self.request('GET', uri) 174 response_body = server_response.read() 175 if server_response.status != 200: 176 raise gdata.service.RequestError, {'status': server_response.status, 177 'reason': server_response.reason, 178 'body': response_body} 179 f = open(file_path, 'wb') 180 f.write(response_body) 181 f.flush() 182 f.close() 183 184 def MoveIntoFolder(self, source_entry, folder_entry): 185 """Moves a document into a folder in the Document List Feed. 186 187 Args: 188 source_entry: DocumentListEntry An object representing the source 189 document/folder. 190 folder_entry: DocumentListEntry An object with a link to the destination 191 folder. 192 193 Returns: 194 A DocumentListEntry containing information about the document created on 195 the Google Documents service. 196 """ 197 entry = gdata.docs.DocumentListEntry() 198 entry.id = source_entry.id 199 entry = self.Post(entry, folder_entry.content.src, 200 converter=gdata.docs.DocumentListEntryFromString) 201 return entry 202 203 def Query(self, uri, converter=gdata.docs.DocumentListFeedFromString): 204 """Queries the Document List feed and returns the resulting feed of 205 entries. 206 207 Args: 208 uri: string The full URI to be queried. This can contain query 209 parameters, a hostname, or simply the relative path to a Document 210 List feed. The DocumentQuery object is useful when constructing 211 query parameters. 212 converter: func (optional) A function which will be executed on the 213 retrieved item, generally to render it into a Python object. 214 By default the DocumentListFeedFromString function is used to 215 return a DocumentListFeed object. This is because most feed 216 queries will result in a feed and not a single entry. 217 """ 218 return self.Get(uri, converter=converter) 219 220 def QueryDocumentListFeed(self, uri): 221 """Retrieves a DocumentListFeed by retrieving a URI based off the Document 222 List feed, including any query parameters. A DocumentQuery object can 223 be used to construct these parameters. 224 225 Args: 226 uri: string The URI of the feed being retrieved possibly with query 227 parameters. 228 229 Returns: 230 A DocumentListFeed object representing the feed returned by the server. 231 """ 232 return self.Get(uri, converter=gdata.docs.DocumentListFeedFromString) 233 234 def GetDocumentListEntry(self, uri): 235 """Retrieves a particular DocumentListEntry by its unique URI. 236 237 Args: 238 uri: string The unique URI of an entry in a Document List feed. 239 240 Returns: 241 A DocumentListEntry object representing the retrieved entry. 242 """ 243 return self.Get(uri, converter=gdata.docs.DocumentListEntryFromString) 244 245 def GetDocumentListFeed(self, uri=None): 246 """Retrieves a feed containing all of a user's documents. 247 248 Args: 249 uri: string A full URI to query the Document List feed. 250 """ 251 if not uri: 252 uri = gdata.docs.service.DocumentQuery().ToUri() 253 return self.QueryDocumentListFeed(uri) 254 255 def GetDocumentListAclEntry(self, uri): 256 """Retrieves a particular DocumentListAclEntry by its unique URI. 257 258 Args: 259 uri: string The unique URI of an entry in a Document List feed. 260 261 Returns: 262 A DocumentListAclEntry object representing the retrieved entry. 263 """ 264 return self.Get(uri, converter=gdata.docs.DocumentListAclEntryFromString) 265 266 def GetDocumentListAclFeed(self, uri): 267 """Retrieves a feed containing all of a user's documents. 268 269 Args: 270 uri: string The URI of a document's Acl feed to retrieve. 271 272 Returns: 273 A DocumentListAclFeed object representing the ACL feed 274 returned by the server. 275 """ 276 return self.Get(uri, converter=gdata.docs.DocumentListAclFeedFromString) 277 278 def Upload(self, media_source, title, folder_or_uri=None, label=None): 279 """Uploads a document inside of a MediaSource object to the Document List 280 feed with the given title. 281 282 Args: 283 media_source: MediaSource The gdata.MediaSource object containing a 284 document file to be uploaded. 285 title: string The title of the document on the server after being 286 uploaded. 287 folder_or_uri: DocumentListEntry or string (optional) An object with a 288 link to a folder or a uri to a folder to upload to. 289 Note: A valid uri for a folder is of the form: 290 /feeds/folders/private/full/folder%3Afolder_id 291 label: optional label describing the type of the document to be created. 292 293 Returns: 294 A DocumentListEntry containing information about the document created 295 on the Google Documents service. 296 """ 297 298 return self._UploadFile(media_source, title, self._MakeKindCategory(label), 299 folder_or_uri) 300 301 def Download(self, entry_or_id_or_url, file_path, export_format=None, 302 gid=None, extra_params=None): 303 """Downloads a document from the Document List. 304 305 Args: 306 entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry, 307 or a url to download from (such as the content src). 308 file_path: string The full path to save the file to. 309 export_format: the format to convert to, if conversion is required. 310 gid: grid id, for downloading a single grid of a spreadsheet 311 extra_params: a map of any further parameters to control how the document 312 is downloaded 313 314 Raises: 315 RequestError if the service does not respond with success 316 """ 317 318 if isinstance(entry_or_id_or_url, gdata.docs.DocumentListEntry): 319 url = entry_or_id_or_url.content.src 320 else: 321 if self.__RESOURCE_ID_PATTERN.match(entry_or_id_or_url): 322 url = self._MakeContentLinkFromId(entry_or_id_or_url) 323 else: 324 url = entry_or_id_or_url 325 326 if export_format is not None: 327 if url.find('/Export?') == -1: 328 raise gdata.service.Error, ('This entry cannot be exported ' 329 'as a different format') 330 url += '&exportFormat=%s' % export_format 331 332 if gid is not None: 333 if url.find('spreadsheets') == -1: 334 raise gdata.service.Error, 'grid id param is not valid for this entry' 335 url += '&gid=%s' % gid 336 337 if extra_params: 338 url += '&' + urllib.urlencode(extra_params) 339 340 self._DownloadFile(url, file_path) 341 342 def Export(self, entry_or_id_or_url, file_path, gid=None, extra_params=None): 343 """Downloads a document from the Document List in a different format. 344 345 Args: 346 entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry, 347 or a url to download from (such as the content src). 348 file_path: string The full path to save the file to. The export 349 format is inferred from the the file extension. 350 gid: grid id, for downloading a single grid of a spreadsheet 351 extra_params: a map of any further parameters to control how the document 352 is downloaded 353 354 Raises: 355 RequestError if the service does not respond with success 356 """ 357 ext = None 358 match = self.__FILE_EXT_PATTERN.match(file_path) 359 if match: 360 ext = match.group(1) 361 self.Download(entry_or_id_or_url, file_path, ext, gid, extra_params) 362 363 def CreateFolder(self, title, folder_or_uri=None): 364 """Creates a folder in the Document List feed. 365 366 Args: 367 title: string The title of the folder on the server after being created. 368 folder_or_uri: DocumentListEntry or string (optional) An object with a 369 link to a folder or a uri to a folder to upload to. 370 Note: A valid uri for a folder is of the form: 371 /feeds/folders/private/full/folder%3Afolder_id 372 373 Returns: 374 A DocumentListEntry containing information about the folder created on 375 the Google Documents service. 376 """ 377 if folder_or_uri: 378 try: 379 uri = folder_or_uri.content.src 380 except AttributeError: 381 uri = folder_or_uri 382 else: 383 uri = '/feeds/documents/private/full' 384 385 folder_entry = gdata.docs.DocumentListEntry() 386 folder_entry.title = atom.Title(text=title) 387 folder_entry.category.append(self._MakeKindCategory(FOLDER_LABEL)) 388 folder_entry = self.Post(folder_entry, uri, 389 converter=gdata.docs.DocumentListEntryFromString) 390 391 return folder_entry 392 393 394 def MoveOutOfFolder(self, source_entry): 395 """Moves a document into a folder in the Document List Feed. 396 397 Args: 398 source_entry: DocumentListEntry An object representing the source 399 document/folder. 400 401 Returns: 402 True if the entry was moved out. 403 """ 404 return self.Delete(source_entry.GetEditLink().href) 405 406 # Deprecated methods 407 408 #@atom.deprecated('Please use Upload instead') 409 def UploadPresentation(self, media_source, title, folder_or_uri=None): 410 """Uploads a presentation inside of a MediaSource object to the Document 411 List feed with the given title. 412 413 This method is deprecated, use Upload instead. 414 415 Args: 416 media_source: MediaSource The MediaSource object containing a 417 presentation file to be uploaded. 418 title: string The title of the presentation on the server after being 419 uploaded. 420 folder_or_uri: DocumentListEntry or string (optional) An object with a 421 link to a folder or a uri to a folder to upload to. 422 Note: A valid uri for a folder is of the form: 423 /feeds/folders/private/full/folder%3Afolder_id 424 425 Returns: 426 A DocumentListEntry containing information about the presentation created 427 on the Google Documents service. 428 """ 429 return self._UploadFile( 430 media_source, title, self._MakeKindCategory(PRESENTATION_LABEL), 431 folder_or_uri=folder_or_uri) 432 433 UploadPresentation = atom.deprecated('Please use Upload instead')( 434 UploadPresentation) 435 436 #@atom.deprecated('Please use Upload instead') 437 def UploadSpreadsheet(self, media_source, title, folder_or_uri=None): 438 """Uploads a spreadsheet inside of a MediaSource object to the Document 439 List feed with the given title. 440 441 This method is deprecated, use Upload instead. 442 443 Args: 444 media_source: MediaSource The MediaSource object containing a spreadsheet 445 file to be uploaded. 446 title: string The title of the spreadsheet on the server after being 447 uploaded. 448 folder_or_uri: DocumentListEntry or string (optional) An object with a 449 link to a folder or a uri to a folder to upload to. 450 Note: A valid uri for a folder is of the form: 451 /feeds/folders/private/full/folder%3Afolder_id 452 453 Returns: 454 A DocumentListEntry containing information about the spreadsheet created 455 on the Google Documents service. 456 """ 457 return self._UploadFile( 458 media_source, title, self._MakeKindCategory(SPREADSHEET_LABEL), 459 folder_or_uri=folder_or_uri) 460 461 UploadSpreadsheet = atom.deprecated('Please use Upload instead')( 462 UploadSpreadsheet) 463 464 #@atom.deprecated('Please use Upload instead') 465 def UploadDocument(self, media_source, title, folder_or_uri=None): 466 """Uploads a document inside of a MediaSource object to the Document List 467 feed with the given title. 468 469 This method is deprecated, use Upload instead. 470 471 Args: 472 media_source: MediaSource The gdata.MediaSource object containing a 473 document file to be uploaded. 474 title: string The title of the document on the server after being 475 uploaded. 476 folder_or_uri: DocumentListEntry or string (optional) An object with a 477 link to a folder or a uri to a folder to upload to. 478 Note: A valid uri for a folder is of the form: 479 /feeds/folders/private/full/folder%3Afolder_id 480 481 Returns: 482 A DocumentListEntry containing information about the document created 483 on the Google Documents service. 484 """ 485 return self._UploadFile( 486 media_source, title, self._MakeKindCategory(DOCUMENT_LABEL), 487 folder_or_uri=folder_or_uri) 488 489 UploadDocument = atom.deprecated('Please use Upload instead')( 490 UploadDocument) 491 492 """Calling any of these functions is the same as calling Export""" 493 DownloadDocument = atom.deprecated('Please use Export instead')(Export) 494 DownloadPresentation = atom.deprecated('Please use Export instead')(Export) 495 DownloadSpreadsheet = atom.deprecated('Please use Export instead')(Export) 496 497 """Calling any of these functions is the same as calling MoveIntoFolder""" 498 MoveDocumentIntoFolder = atom.deprecated( 499 'Please use MoveIntoFolder instead')(MoveIntoFolder) 500 MovePresentationIntoFolder = atom.deprecated( 501 'Please use MoveIntoFolder instead')(MoveIntoFolder) 502 MoveSpreadsheetIntoFolder = atom.deprecated( 503 'Please use MoveIntoFolder instead')(MoveIntoFolder) 504 MoveFolderIntoFolder = atom.deprecated( 505 'Please use MoveIntoFolder instead')(MoveIntoFolder) 506 507 508class DocumentQuery(gdata.service.Query): 509 510 """Object used to construct a URI to query the Google Document List feed""" 511 512 def __init__(self, feed='/feeds/documents', visibility='private', 513 projection='full', text_query=None, params=None, 514 categories=None): 515 """Constructor for Document List Query 516 517 Args: 518 feed: string (optional) The path for the feed. (e.g. '/feeds/documents') 519 visibility: string (optional) The visibility chosen for the current feed. 520 projection: string (optional) The projection chosen for the current feed. 521 text_query: string (optional) The contents of the q query parameter. This 522 string is URL escaped upon conversion to a URI. 523 params: dict (optional) Parameter value string pairs which become URL 524 params when translated to a URI. These parameters are added to 525 the query's items. 526 categories: list (optional) List of category strings which should be 527 included as query categories. See gdata.service.Query for 528 additional documentation. 529 530 Yields: 531 A DocumentQuery object used to construct a URI based on the Document 532 List feed. 533 """ 534 self.visibility = visibility 535 self.projection = projection 536 gdata.service.Query.__init__(self, feed, text_query, params, categories) 537 538 def ToUri(self): 539 """Generates a URI from the query parameters set in the object. 540 541 Returns: 542 A string containing the URI used to retrieve entries from the Document 543 List feed. 544 """ 545 old_feed = self.feed 546 self.feed = '/'.join([old_feed, self.visibility, self.projection]) 547 new_feed = gdata.service.Query.ToUri(self) 548 self.feed = old_feed 549 return new_feed 550 551 def AddNamedFolder(self, email, folder_name): 552 """Adds a named folder category, qualified by a schema. 553 554 This function lets you query for documents that are contained inside a 555 named folder without fear of collision with other categories. 556 557 Args: 558 email: string The email of the user who owns the folder. 559 folder_name: string The name of the folder. 560 561 Returns: 562 The string of the category that was added to the object. 563 """ 564 565 category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name) 566 self.categories.append(category) 567 return category 568 569 def RemoveNamedFolder(self, email, folder_name): 570 """Removes a named folder category, qualified by a schema. 571 572 Args: 573 email: string The email of the user who owns the folder. 574 folder_name: string The name of the folder. 575 576 Returns: 577 The string of the category that was removed to the object. 578 """ 579 category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name) 580 self.categories.remove(category) 581 return category 582 583 584class DocumentAclQuery(gdata.service.Query): 585 586 """Object used to construct a URI to query a Document's ACL feed""" 587 588 def __init__(self, resource_id, feed='/feeds/acl/private/full'): 589 """Constructor for Document ACL Query 590 591 Args: 592 resource_id: string The resource id. (e.g. 'document%3Adocument_id', 593 'spreadsheet%3Aspreadsheet_id', etc.) 594 feed: string (optional) The path for the feed. 595 (e.g. '/feeds/acl/private/full') 596 597 Yields: 598 A DocumentAclQuery object used to construct a URI based on the Document 599 ACL feed. 600 """ 601 self.resource_id = resource_id 602 gdata.service.Query.__init__(self, feed) 603 604 def ToUri(self): 605 """Generates a URI from the query parameters set in the object. 606 607 Returns: 608 A string containing the URI used to retrieve entries from the Document 609 ACL feed. 610 """ 611 return '%s/%s' % (gdata.service.Query.ToUri(self), self.resource_id)