PageRenderTime 559ms CodeModel.GetById 537ms RepoModel.GetById 0ms app.codeStats 0ms

/src/googlecl/docs/base.py

http://googlecl.googlecode.com/
Python | 463 lines | 402 code | 7 blank | 54 comment | 24 complexity | 80e13104aa682144663a4a0cf7c6f11e MD5 | raw file
  1. # Copyright (C) 2010 Google Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Service details and instances for the Docs service using GData 3.0.
  15. Some use cases:
  16. Upload a document:
  17. docs upload --folder "Some folder" path_to_doc
  18. Edit a document in your word editor:
  19. docs edit --title "Grocery List" --editor vim (editor also set in prefs)
  20. Download docs:
  21. docs get --folder "Some folder"
  22. """
  23. from __future__ import with_statement
  24. __author__ = 'tom.h.miller@gmail.com (Tom Miller)'
  25. import ConfigParser
  26. import logging
  27. import os
  28. import shlex
  29. import shutil
  30. import sys
  31. import googlecl
  32. from googlecl.docs import SECTION_HEADER
  33. # Renamed here to reduce verbosity in other sections
  34. safe_encode = googlecl.safe_encode
  35. safe_decode = googlecl.safe_decode
  36. LOG = logging.getLogger(googlecl.docs.LOGGER_NAME + '.base')
  37. # For to_safe_filename
  38. if sys.platform == 'win32':
  39. UNSAFE_FILE_CHARS = '\\/:*?"<>|'
  40. else:
  41. UNSAFE_FILE_CHARS = '/'
  42. class DocsError(googlecl.base.Error):
  43. """Base error for Docs errors."""
  44. pass
  45. class DocsBaseCL(object):
  46. """Class meant to be inherited by either DocsClientCL or DocsServiceCL."""
  47. # Marked with leading underscore because people should use the method
  48. # for creating folders appropriate to the superclass.
  49. def _create_folder(folder_name, folder_or_uri=None):
  50. raise NotImplementedError('_modify_entry must be defined!')
  51. def edit_doc(self, doc_entry_or_title, editor, file_ext,
  52. folder_entry_or_path=None):
  53. """Edit a document.
  54. Keyword arguments:
  55. doc_entry_or_title: DocEntry of the existing document to edit,
  56. or title of the document to create.
  57. editor: Name of the editor to use. Should be executable from the user's
  58. working directory.
  59. file_ext: Suffix of the file to download.
  60. For example, "txt", "csv", "xcl".
  61. folder_entry_or_path: Entry or string representing folder to upload into.
  62. If a string, a new set of folders will ALWAYS be created.
  63. For example, 'my_folder' to upload to my_folder,
  64. 'foo/bar' to upload into subfolder bar under folder foo.
  65. Default None for root folder.
  66. """
  67. import subprocess
  68. import tempfile
  69. try:
  70. doc_title = safe_decode(doc_entry_or_title.title.text)
  71. new_doc = False
  72. except AttributeError:
  73. doc_title = doc_entry_or_title
  74. new_doc = True
  75. temp_dir = tempfile.mkdtemp()
  76. # If we're creating a new document and not given a folder entry
  77. if new_doc and isinstance(folder_entry_or_path, basestring):
  78. folder_path = os.path.normpath(folder_entry_or_path)
  79. # Some systems allow more than one path separator
  80. if os.altsep:
  81. folder_path.replace(os.altsep, os.sep)
  82. base_folder = folder_path.split(os.sep)[0]
  83. # Define the base path such that upload_docs will create a folder
  84. # named base_folder
  85. base_path = os.path.join(temp_dir, base_folder)
  86. total_basename = os.path.join(temp_dir, folder_path)
  87. os.makedirs(total_basename)
  88. path = os.path.join(total_basename,
  89. self.to_safe_filename(doc_title) + '.' + file_ext)
  90. else:
  91. path = os.path.join(temp_dir,
  92. self.to_safe_filename(doc_title) + '.' + file_ext)
  93. base_path = path
  94. if not new_doc:
  95. # This used to be the following, passing just the URL instead of the
  96. # entry object (which it's guaranteed to be since not new_doc).
  97. # Both client and service seem happy with it, so it was probably
  98. # unnecessary to reduce it to a URL first.
  99. # self.Export(doc_entry_or_title.content.src, path)
  100. self.Export(doc_entry_or_title, path)
  101. file_hash = _md5_hash_file(path)
  102. else:
  103. file_hash = None
  104. command_args = shlex.split(safe_encode(editor)) + [path]
  105. subprocess.call(command_args)
  106. impatient_editors = self.config.lazy_get(SECTION_HEADER,
  107. 'impatient_editors',
  108. default='')
  109. if impatient_editors:
  110. impatient_editors = impatient_editors.split(',')
  111. if command_args[0] in impatient_editors:
  112. LOG.info('I noticed you are using an application that will not wait for '
  113. 'you to finish editing your file.')
  114. LOG.info('Hit enter in this shell when you finished editing and saved '
  115. 'your work.')
  116. raw_input('')
  117. if file_hash and file_hash == _md5_hash_file(path):
  118. LOG.info('No modifications to file, not uploading.')
  119. return None
  120. elif not os.path.exists(path):
  121. LOG.info('No file written, not uploading.')
  122. return None
  123. if new_doc:
  124. if isinstance(folder_entry_or_path, basestring):
  125. # Let code in upload_docs handle the creation of new folder(s)
  126. self.upload_docs([base_path], doc_title)
  127. else:
  128. # folder_entry_or_path is None or a GDataEntry.
  129. doc_entry = self.upload_single_doc(path,
  130. folder_entry=folder_entry_or_path)
  131. else:
  132. try:
  133. doc_entry = self._modify_entry(doc_entry_or_title, path, file_ext)
  134. except self.request_error, err:
  135. LOG.error(err)
  136. new_path = safe_move(path, '.')
  137. LOG.info(safe_encode('Moved edited document to ' +
  138. safe_decode(new_path)))
  139. return None
  140. try:
  141. # Good faith effort to keep the temp directory clean.
  142. shutil.rmtree(temp_dir)
  143. except OSError:
  144. # Only seen errors on Windows, but catch the more general OSError.
  145. pass
  146. return doc_entry
  147. EditDoc = edit_doc
  148. def get_docs(self, base_path, entries, file_ext=None, grid_id=None):
  149. """Download documents.
  150. Keyword arguments:
  151. base_path: The path to download files to. This plus an entry's title plus
  152. its format-specific extension will form the complete path.
  153. entries: List of DocEntry items representing the files to download.
  154. file_ext: Suffix to give the file(s) when downloading.
  155. For example, "txt", "csv", "xcl". Default None to let
  156. get_extension_from_doctype decide the extension. Ignored
  157. when downloading arbitrary files.
  158. """
  159. if not os.path.isdir(base_path):
  160. if len(entries) > 1:
  161. raise DocsError(safe_encode(u'Specified multiple source files, but ' +
  162. u'destination "' + base_path +
  163. u'" is not a directory'))
  164. format_from_filename = googlecl.get_extension_from_path(base_path)
  165. if format_from_filename and not file_ext:
  166. # Strip the extension off here if it exists. Don't want to double up
  167. # on extension in for loop. (+1 for '.')
  168. base_path = base_path[:-(len(format_from_filename)+1)]
  169. # We can just set the file_ext here, since there's only one file.
  170. file_ext = format_from_filename
  171. for entry in entries:
  172. # Don't set file_ext if we cannot do export.
  173. # get_extension_from_doctype will check the config file for 'format'
  174. # which will set an undesired entry_file_ext for
  175. # unconverted downloads
  176. if not file_ext and can_export(entry):
  177. entry_file_ext = googlecl.docs.get_extension_from_doctype(
  178. googlecl.docs.get_document_type(entry),
  179. self.config)
  180. else:
  181. entry_file_ext = file_ext
  182. if entry_file_ext:
  183. LOG.debug('Decided file_ext is ' + entry_file_ext)
  184. extension = '.' + entry_file_ext
  185. else:
  186. LOG.debug('Could not (or would not) set file_ext')
  187. if can_export(entry):
  188. extension = '.txt'
  189. else:
  190. # Files that cannot be exported typically have a file extension
  191. # in their name / title.
  192. extension = ''
  193. entry_title = safe_decode(entry.title.text)
  194. if os.path.isdir(base_path):
  195. entry_title_safe = self.to_safe_filename(entry_title)
  196. path = os.path.join(base_path, entry_title_safe + extension)
  197. else:
  198. path = base_path + extension
  199. LOG.info(safe_encode('Downloading ' + entry_title + ' to ' + path))
  200. try:
  201. if can_export(entry):
  202. self.Export(entry, path, grid_id)
  203. else:
  204. if hasattr(self, 'Download'):
  205. self.Download(entry, path)
  206. else:
  207. self.DownloadResource(entry, path)
  208. except self.request_error, err:
  209. LOG.error(safe_encode('Download of ' + entry_title + ' failed: ' +
  210. unicode(err)))
  211. except EnvironmentError, err:
  212. LOG.error(err)
  213. LOG.info('Does your destination filename contain invalid characters?')
  214. GetDocs = get_docs
  215. def _modify_entry(doc_entry, path_to_new_content, file_ext):
  216. """Modify the file data associated with a document entry."""
  217. raise NotImplementedError('_modify_entry must be defined!')
  218. def to_safe_filename(self, text):
  219. """Translate string to something that can be safely used as a filename.
  220. Behavior of this function depends on the operating system.
  221. Args:
  222. text: Text to check for invalid characters
  223. Returns:
  224. Parameter with unsafe characters escaped or removed.
  225. Type (unicode vs string)will match that of the parameter.
  226. """
  227. sub = self.config.lazy_get(SECTION_HEADER, 'invalid_filename_character_sub',
  228. default='')
  229. sub = safe_decode(sub)
  230. return ''.join([sub if c in UNSAFE_FILE_CHARS else c for c in text])
  231. def upload_docs(self, paths, title=None, folder_entry=None,
  232. file_ext=None, **kwargs):
  233. """Upload a list of documents or directories.
  234. For each item in paths:
  235. if item is a directory, upload all files found in the directory
  236. in a manner roughly equivalent to "cp -R directory/ <docs_folder>"
  237. if item is a file, upload that file to <docs_folder>
  238. Keyword arguments:
  239. paths: List of file paths and/or directories to upload.
  240. title: Title to give the files once uploaded.
  241. Default None for the names of the files.
  242. folder_entry: GDataEntry of the folder to treat as the new root for
  243. directories/files.
  244. Default None for no folder (the Google Docs root).
  245. file_ext: Replace (or specify) the extension on the file when figuring
  246. out the upload format. For example, 'txt' will upload the
  247. file as if it was plain text. Default None for the file's
  248. extension (which defaults to 'txt' if there is none).
  249. kwargs: Typically contains 'convert', indicates if we should convert the
  250. file on upload. False will only be honored if the user is a Google
  251. Apps Premier account.
  252. Returns:
  253. Dictionary mapping filenames to where they can be accessed online.
  254. """
  255. doc_entries = {}
  256. for path in paths:
  257. folder_root = folder_entry
  258. if os.path.isdir(path):
  259. folder_entries = {}
  260. # final '/' sets folder_name to '' which causes
  261. # 503 "Service Unavailable".
  262. path = path.rstrip(os.path.sep)
  263. for dirpath, dirnames, filenames in os.walk(path):
  264. directory = os.path.dirname(dirpath)
  265. folder_name = os.path.basename(dirpath)
  266. if directory in folder_entries:
  267. fentry = self._create_folder(folder_name, folder_entries[directory])
  268. else:
  269. fentry = self._create_folder(folder_name, folder_root)
  270. folder_entries[dirpath] = fentry
  271. LOG.debug('Created folder ' + dirpath + ' ' + folder_name)
  272. for fname in filenames:
  273. doc = self.upload_single_doc(os.path.join(dirpath, fname),
  274. folder_entry=fentry)
  275. if doc:
  276. doc_entries[fname] = doc
  277. else:
  278. doc = self.upload_single_doc(path, title=title,
  279. folder_entry=folder_entry,
  280. file_ext=file_ext,
  281. **kwargs)
  282. if doc:
  283. doc_entries[os.path.basename(path)] = doc
  284. return doc_entries
  285. UploadDocs = upload_docs
  286. def upload_single_doc(self, path, title=None, folder_entry=None,
  287. file_ext=None, **kwargs):
  288. """Upload one file to Google Docs.
  289. Args:
  290. path: str Path to file to upload.
  291. title: str (optional) Title to give the upload. Defaults to the filename.
  292. folder_entry: DocsEntry (optional) (sub)Folder to upload into.
  293. file_ext: str (optional) Extension used to determine MIME type of
  294. upload. If not specified, uses mimetypes module to guess it.
  295. kwargs: Should contain value for 'convert', either True or False.
  296. Indicates if upload should be converted. Only Apps Premier users can
  297. specify False.
  298. Returns:
  299. Entry corresponding to the document on Google Docs
  300. """
  301. filename = os.path.basename(path)
  302. try:
  303. convert = kwargs['convert']
  304. except KeyError:
  305. convert = True
  306. if not file_ext:
  307. file_ext = googlecl.get_extension_from_path(filename)
  308. file_title = filename.split('.')[0]
  309. else:
  310. file_title = filename
  311. content_type = self._determine_content_type(file_ext)
  312. if not content_type:
  313. LOG.debug('Could not find content type using gdata, trying mimetypes')
  314. import mimetypes
  315. content_type = mimetypes.guess_type(path)[0]
  316. if not content_type:
  317. if convert:
  318. content_type = 'text/plain'
  319. else:
  320. content_type = 'application/octet-stream'
  321. entry_title = title or filename
  322. else:
  323. entry_title = title or file_title
  324. else:
  325. entry_title = title or file_title
  326. LOG.debug('Uploading with content type %s', content_type)
  327. LOG.info('Loading %s', path)
  328. if folder_entry:
  329. post_uri = folder_entry.content.src
  330. else:
  331. post_uri = self.DOCLIST_FEED_URI
  332. if not convert:
  333. post_uri += '?convert=false'
  334. try:
  335. new_entry = self._transmit_doc(path, entry_title, post_uri, content_type,
  336. file_ext)
  337. except self.request_error, err:
  338. LOG.error('Failed to upload %s: %s', path, err)
  339. if (str(err).find('ServiceForbiddenException') != -1 or
  340. str(err).find('Unsupported Media Type') != -1):
  341. # Attempt to catch older gdata users and warn them when they try to upload
  342. # unsupported file types
  343. print "\n\nYour version of python-gdata may not support this action. "
  344. print "Please see the wiki page for more details: "
  345. print "http://code.google.com/p/googlecl/wiki/UploadingGoogleDocs\n\n"
  346. if convert:
  347. LOG.info('You may have to specify a format with --format. Try ' +
  348. '--format=txt')
  349. return None
  350. else:
  351. LOG.info('Upload success! Direct link: %s',
  352. new_entry.GetAlternateLink().href)
  353. return new_entry
  354. UploadSingleDoc = upload_single_doc
  355. # Read size is 128*20 for no good reason.
  356. # Just want to avoid reading in the whole file, and read in a multiple of 128.
  357. def _md5_hash_file(path, read_size=2560):
  358. """Return a binary md5 checksum of file at path."""
  359. import hashlib
  360. hash_function = hashlib.md5()
  361. with open(path, 'r') as my_file:
  362. data = my_file.read(read_size)
  363. while data:
  364. hash_function.update(data)
  365. data = my_file.read(read_size)
  366. return hash_function.digest()
  367. def can_export(entry_or_url):
  368. """See if the given entry can be exported.
  369. Based off check done in gdata.docs.client.DocsClient.export
  370. Returns:
  371. True if entry can be exported to a specific format (can use client.export)
  372. False if not (must use client.Download)
  373. """
  374. if isinstance(entry_or_url, (str, unicode)):
  375. url = entry_or_url
  376. else:
  377. url = entry_or_url.content.src
  378. can_export = url.find('/Export?') != -1
  379. return can_export
  380. def safe_move(src, dst):
  381. """Move file from src to dst.
  382. If file with same name already exists at dst, rename the new file
  383. while preserving the extension.
  384. Returns:
  385. path to new file.
  386. """
  387. new_dir = os.path.abspath(dst)
  388. ext = googlecl.get_extension_from_path(src)
  389. if not ext:
  390. dotted_ext = ''
  391. else:
  392. dotted_ext = '.' + ext
  393. filename = os.path.basename(src).rstrip(dotted_ext)
  394. rename_num = 1
  395. new_path = os.path.join(new_dir, filename + dotted_ext)
  396. while os.path.exists(new_path):
  397. new_filename = filename + '-' + str(rename_num) + dotted_ext
  398. new_path = os.path.join(new_dir, new_filename)
  399. shutil.move(src, new_path)
  400. return new_path