PageRenderTime 1591ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/web/lib/cloudstorage/common.py

https://gitlab.com/adam.lukaitis/muzei
Python | 397 lines | 376 code | 13 blank | 8 comment | 5 complexity | 262b8ce58970c55b2f57a991852657c4 MD5 | raw file
  1. # Copyright 2012 Google Inc. All Rights Reserved.
  2. """Helpers shared by cloudstorage_stub and cloudstorage_api."""
  3. __all__ = ['CS_XML_NS',
  4. 'CSFileStat',
  5. 'dt_str_to_posix',
  6. 'local_api_url',
  7. 'LOCAL_GCS_ENDPOINT',
  8. 'local_run',
  9. 'get_access_token',
  10. 'get_metadata',
  11. 'GCSFileStat',
  12. 'http_time_to_posix',
  13. 'memory_usage',
  14. 'posix_time_to_http',
  15. 'posix_to_dt_str',
  16. 'set_access_token',
  17. 'validate_options',
  18. 'validate_bucket_name',
  19. 'validate_bucket_path',
  20. 'validate_file_path',
  21. ]
  22. import calendar
  23. import datetime
  24. from email import utils as email_utils
  25. import logging
  26. import os
  27. import re
  28. try:
  29. from google.appengine.api import runtime
  30. except ImportError:
  31. from google.appengine.api import runtime
  32. _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
  33. _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
  34. _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
  35. _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
  36. _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
  37. _GCS_METADATA = ['x-goog-meta-',
  38. 'content-disposition',
  39. 'cache-control',
  40. 'content-encoding']
  41. _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
  42. CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
  43. LOCAL_GCS_ENDPOINT = '/_ah/gcs'
  44. _access_token = ''
  45. _MAX_GET_BUCKET_RESULT = 1000
  46. def set_access_token(access_token):
  47. """Set the shared access token to authenticate with Google Cloud Storage.
  48. When set, the library will always attempt to communicate with the
  49. real Google Cloud Storage with this token even when running on dev appserver.
  50. Note the token could expire so it's up to you to renew it.
  51. When absent, the library will automatically request and refresh a token
  52. on appserver, or when on dev appserver, talk to a Google Cloud Storage
  53. stub.
  54. Args:
  55. access_token: you can get one by run 'gsutil -d ls' and copy the
  56. str after 'Bearer'.
  57. """
  58. global _access_token
  59. _access_token = access_token
  60. def get_access_token():
  61. """Returns the shared access token."""
  62. return _access_token
  63. class GCSFileStat(object):
  64. """Container for GCS file stat."""
  65. def __init__(self,
  66. filename,
  67. st_size,
  68. etag,
  69. st_ctime,
  70. content_type=None,
  71. metadata=None,
  72. is_dir=False):
  73. """Initialize.
  74. For files, the non optional arguments are always set.
  75. For directories, only filename and is_dir is set.
  76. Args:
  77. filename: a Google Cloud Storage filename of form '/bucket/filename'.
  78. st_size: file size in bytes. long compatible.
  79. etag: hex digest of the md5 hash of the file's content. str.
  80. st_ctime: posix file creation time. float compatible.
  81. content_type: content type. str.
  82. metadata: a str->str dict of user specified options when creating
  83. the file. Possible keys are x-goog-meta-, content-disposition,
  84. content-encoding, and cache-control.
  85. is_dir: True if this represents a directory. False if this is a real file.
  86. """
  87. self.filename = filename
  88. self.is_dir = is_dir
  89. self.st_size = None
  90. self.st_ctime = None
  91. self.etag = None
  92. self.content_type = content_type
  93. self.metadata = metadata
  94. if not is_dir:
  95. self.st_size = long(st_size)
  96. self.st_ctime = float(st_ctime)
  97. if etag[0] == '"' and etag[-1] == '"':
  98. etag = etag[1:-1]
  99. self.etag = etag
  100. def __repr__(self):
  101. if self.is_dir:
  102. return '(directory: %s)' % self.filename
  103. return (
  104. '(filename: %(filename)s, st_size: %(st_size)s, '
  105. 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
  106. 'content_type: %(content_type)s, '
  107. 'metadata: %(metadata)s)' %
  108. dict(filename=self.filename,
  109. st_size=self.st_size,
  110. st_ctime=self.st_ctime,
  111. etag=self.etag,
  112. content_type=self.content_type,
  113. metadata=self.metadata))
  114. def __cmp__(self, other):
  115. if not isinstance(other, self.__class__):
  116. raise ValueError('Argument to cmp must have the same type. '
  117. 'Expect %s, got %s', self.__class__.__name__,
  118. other.__class__.__name__)
  119. if self.filename > other.filename:
  120. return 1
  121. elif self.filename < other.filename:
  122. return -1
  123. return 0
  124. def __hash__(self):
  125. if self.etag:
  126. return hash(self.etag)
  127. return hash(self.filename)
  128. CSFileStat = GCSFileStat
  129. def get_metadata(headers):
  130. """Get user defined options from HTTP response headers."""
  131. return dict((k, v) for k, v in headers.iteritems()
  132. if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
  133. def validate_bucket_name(name):
  134. """Validate a Google Storage bucket name.
  135. Args:
  136. name: a Google Storage bucket name with no prefix or suffix.
  137. Raises:
  138. ValueError: if name is invalid.
  139. """
  140. _validate_path(name)
  141. if not _GCS_BUCKET_REGEX.match(name):
  142. raise ValueError('Bucket should be 3-63 characters long using only a-z,'
  143. '0-9, underscore, dash or dot but got %s' % name)
  144. def validate_bucket_path(path):
  145. """Validate a Google Cloud Storage bucket path.
  146. Args:
  147. path: a Google Storage bucket path. It should have form '/bucket'.
  148. Raises:
  149. ValueError: if path is invalid.
  150. """
  151. _validate_path(path)
  152. if not _GCS_BUCKET_PATH_REGEX.match(path):
  153. raise ValueError('Bucket should have format /bucket '
  154. 'but got %s' % path)
  155. def validate_file_path(path):
  156. """Validate a Google Cloud Storage file path.
  157. Args:
  158. path: a Google Storage file path. It should have form '/bucket/filename'.
  159. Raises:
  160. ValueError: if path is invalid.
  161. """
  162. _validate_path(path)
  163. if not _GCS_FULLPATH_REGEX.match(path):
  164. raise ValueError('Path should have format /bucket/filename '
  165. 'but got %s' % path)
  166. def _process_path_prefix(path_prefix):
  167. """Validate and process a Google Cloud Stoarge path prefix.
  168. Args:
  169. path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
  170. or '/bucket/' or '/bucket'.
  171. Raises:
  172. ValueError: if path is invalid.
  173. Returns:
  174. a tuple of /bucket and prefix. prefix can be None.
  175. """
  176. _validate_path(path_prefix)
  177. if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
  178. raise ValueError('Path prefix should have format /bucket, /bucket/, '
  179. 'or /bucket/prefix but got %s.' % path_prefix)
  180. bucket_name_end = path_prefix.find('/', 1)
  181. bucket = path_prefix
  182. prefix = None
  183. if bucket_name_end != -1:
  184. bucket = path_prefix[:bucket_name_end]
  185. prefix = path_prefix[bucket_name_end + 1:] or None
  186. return bucket, prefix
  187. def _validate_path(path):
  188. """Basic validation of Google Storage paths.
  189. Args:
  190. path: a Google Storage path. It should have form '/bucket/filename'
  191. or '/bucket'.
  192. Raises:
  193. ValueError: if path is invalid.
  194. TypeError: if path is not of type basestring.
  195. """
  196. if not path:
  197. raise ValueError('Path is empty')
  198. if not isinstance(path, basestring):
  199. raise TypeError('Path should be a string but is %s (%s).' %
  200. (path.__class__, path))
  201. def validate_options(options):
  202. """Validate Google Cloud Storage options.
  203. Args:
  204. options: a str->basestring dict of options to pass to Google Cloud Storage.
  205. Raises:
  206. ValueError: if option is not supported.
  207. TypeError: if option is not of type str or value of an option
  208. is not of type basestring.
  209. """
  210. if not options:
  211. return
  212. for k, v in options.iteritems():
  213. if not isinstance(k, str):
  214. raise TypeError('option %r should be a str.' % k)
  215. if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
  216. raise ValueError('option %s is not supported.' % k)
  217. if not isinstance(v, basestring):
  218. raise TypeError('value %r for option %s should be of type basestring.' %
  219. v, k)
  220. def http_time_to_posix(http_time):
  221. """Convert HTTP time format to posix time.
  222. See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
  223. for http time format.
  224. Args:
  225. http_time: time in RFC 2616 format. e.g.
  226. "Mon, 20 Nov 1995 19:12:08 GMT".
  227. Returns:
  228. A float of secs from unix epoch.
  229. """
  230. if http_time is not None:
  231. return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
  232. def posix_time_to_http(posix_time):
  233. """Convert posix time to HTML header time format.
  234. Args:
  235. posix_time: unix time.
  236. Returns:
  237. A datatime str in RFC 2616 format.
  238. """
  239. if posix_time:
  240. return email_utils.formatdate(posix_time, usegmt=True)
  241. _DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
  242. def dt_str_to_posix(dt_str):
  243. """format str to posix.
  244. datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
  245. e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
  246. between date and time when they are on the same line.
  247. Z indicates UTC (zero meridian).
  248. A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
  249. This is used to parse LastModified node from GCS's GET bucket XML response.
  250. Args:
  251. dt_str: A datetime str.
  252. Returns:
  253. A float of secs from unix epoch. By posix definition, epoch is midnight
  254. 1970/1/1 UTC.
  255. """
  256. parsable, _ = dt_str.split('.')
  257. dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
  258. return calendar.timegm(dt.utctimetuple())
  259. def posix_to_dt_str(posix):
  260. """Reverse of str_to_datetime.
  261. This is used by GCS stub to generate GET bucket XML response.
  262. Args:
  263. posix: A float of secs from unix epoch.
  264. Returns:
  265. A datetime str.
  266. """
  267. dt = datetime.datetime.utcfromtimestamp(posix)
  268. dt_str = dt.strftime(_DT_FORMAT)
  269. return dt_str + '.000Z'
  270. def local_run():
  271. """Whether we should hit GCS dev appserver stub."""
  272. server_software = os.environ.get('SERVER_SOFTWARE')
  273. if server_software is None:
  274. return True
  275. if 'remote_api' in server_software:
  276. return False
  277. if server_software.startswith(('Development', 'testutil')):
  278. return True
  279. return False
  280. def local_api_url():
  281. """Return URL for GCS emulation on dev appserver."""
  282. return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
  283. def memory_usage(method):
  284. """Log memory usage before and after a method."""
  285. def wrapper(*args, **kwargs):
  286. logging.info('Memory before method %s is %s.',
  287. method.__name__, runtime.memory_usage().current())
  288. result = method(*args, **kwargs)
  289. logging.info('Memory after method %s is %s',
  290. method.__name__, runtime.memory_usage().current())
  291. return result
  292. return wrapper
  293. def _add_ns(tagname):
  294. return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
  295. 'tag': tagname}
  296. _T_CONTENTS = _add_ns('Contents')
  297. _T_LAST_MODIFIED = _add_ns('LastModified')
  298. _T_ETAG = _add_ns('ETag')
  299. _T_KEY = _add_ns('Key')
  300. _T_SIZE = _add_ns('Size')
  301. _T_PREFIX = _add_ns('Prefix')
  302. _T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
  303. _T_NEXT_MARKER = _add_ns('NextMarker')
  304. _T_IS_TRUNCATED = _add_ns('IsTruncated')