PageRenderTime 2ms CodeModel.GetById 72ms app.highlight 11ms RepoModel.GetById 0ms app.codeStats 0ms

/web/lib/cloudstorage/common.py

https://gitlab.com/adam.lukaitis/muzei
Python | 397 lines | 376 code | 13 blank | 8 comment | 3 complexity | 262b8ce58970c55b2f57a991852657c4 MD5 | raw file
  1# Copyright 2012 Google Inc. All Rights Reserved.
  2
  3"""Helpers shared by cloudstorage_stub and cloudstorage_api."""
  4
  5
  6
  7
  8
  9__all__ = ['CS_XML_NS',
 10           'CSFileStat',
 11           'dt_str_to_posix',
 12           'local_api_url',
 13           'LOCAL_GCS_ENDPOINT',
 14           'local_run',
 15           'get_access_token',
 16           'get_metadata',
 17           'GCSFileStat',
 18           'http_time_to_posix',
 19           'memory_usage',
 20           'posix_time_to_http',
 21           'posix_to_dt_str',
 22           'set_access_token',
 23           'validate_options',
 24           'validate_bucket_name',
 25           'validate_bucket_path',
 26           'validate_file_path',
 27          ]
 28
 29
 30import calendar
 31import datetime
 32from email import utils as email_utils
 33import logging
 34import os
 35import re
 36
 37try:
 38  from google.appengine.api import runtime
 39except ImportError:
 40  from google.appengine.api import runtime
 41
 42
 43_GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
 44_GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
 45_GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
 46_GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
 47_GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
 48_GCS_METADATA = ['x-goog-meta-',
 49                 'content-disposition',
 50                 'cache-control',
 51                 'content-encoding']
 52_GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
 53CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
 54LOCAL_GCS_ENDPOINT = '/_ah/gcs'
 55_access_token = ''
 56
 57
 58_MAX_GET_BUCKET_RESULT = 1000
 59
 60
 61def set_access_token(access_token):
 62  """Set the shared access token to authenticate with Google Cloud Storage.
 63
 64  When set, the library will always attempt to communicate with the
 65  real Google Cloud Storage with this token even when running on dev appserver.
 66  Note the token could expire so it's up to you to renew it.
 67
 68  When absent, the library will automatically request and refresh a token
 69  on appserver, or when on dev appserver, talk to a Google Cloud Storage
 70  stub.
 71
 72  Args:
 73    access_token: you can get one by run 'gsutil -d ls' and copy the
 74      str after 'Bearer'.
 75  """
 76  global _access_token
 77  _access_token = access_token
 78
 79
 80def get_access_token():
 81  """Returns the shared access token."""
 82  return _access_token
 83
 84
 85class GCSFileStat(object):
 86  """Container for GCS file stat."""
 87
 88  def __init__(self,
 89               filename,
 90               st_size,
 91               etag,
 92               st_ctime,
 93               content_type=None,
 94               metadata=None,
 95               is_dir=False):
 96    """Initialize.
 97
 98    For files, the non optional arguments are always set.
 99    For directories, only filename and is_dir is set.
100
101    Args:
102      filename: a Google Cloud Storage filename of form '/bucket/filename'.
103      st_size: file size in bytes. long compatible.
104      etag: hex digest of the md5 hash of the file's content. str.
105      st_ctime: posix file creation time. float compatible.
106      content_type: content type. str.
107      metadata: a str->str dict of user specified options when creating
108        the file. Possible keys are x-goog-meta-, content-disposition,
109        content-encoding, and cache-control.
110      is_dir: True if this represents a directory. False if this is a real file.
111    """
112    self.filename = filename
113    self.is_dir = is_dir
114    self.st_size = None
115    self.st_ctime = None
116    self.etag = None
117    self.content_type = content_type
118    self.metadata = metadata
119
120    if not is_dir:
121      self.st_size = long(st_size)
122      self.st_ctime = float(st_ctime)
123      if etag[0] == '"' and etag[-1] == '"':
124        etag = etag[1:-1]
125      self.etag = etag
126
127  def __repr__(self):
128    if self.is_dir:
129      return '(directory: %s)' % self.filename
130
131    return (
132        '(filename: %(filename)s, st_size: %(st_size)s, '
133        'st_ctime: %(st_ctime)s, etag: %(etag)s, '
134        'content_type: %(content_type)s, '
135        'metadata: %(metadata)s)' %
136        dict(filename=self.filename,
137             st_size=self.st_size,
138             st_ctime=self.st_ctime,
139             etag=self.etag,
140             content_type=self.content_type,
141             metadata=self.metadata))
142
143  def __cmp__(self, other):
144    if not isinstance(other, self.__class__):
145      raise ValueError('Argument to cmp must have the same type. '
146                       'Expect %s, got %s', self.__class__.__name__,
147                       other.__class__.__name__)
148    if self.filename > other.filename:
149      return 1
150    elif self.filename < other.filename:
151      return -1
152    return 0
153
154  def __hash__(self):
155    if self.etag:
156      return hash(self.etag)
157    return hash(self.filename)
158
159
160CSFileStat = GCSFileStat
161
162
163def get_metadata(headers):
164  """Get user defined options from HTTP response headers."""
165  return dict((k, v) for k, v in headers.iteritems()
166              if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
167
168
169def validate_bucket_name(name):
170  """Validate a Google Storage bucket name.
171
172  Args:
173    name: a Google Storage bucket name with no prefix or suffix.
174
175  Raises:
176    ValueError: if name is invalid.
177  """
178  _validate_path(name)
179  if not _GCS_BUCKET_REGEX.match(name):
180    raise ValueError('Bucket should be 3-63 characters long using only a-z,'
181                     '0-9, underscore, dash or dot but got %s' % name)
182
183
184def validate_bucket_path(path):
185  """Validate a Google Cloud Storage bucket path.
186
187  Args:
188    path: a Google Storage bucket path. It should have form '/bucket'.
189
190  Raises:
191    ValueError: if path is invalid.
192  """
193  _validate_path(path)
194  if not _GCS_BUCKET_PATH_REGEX.match(path):
195    raise ValueError('Bucket should have format /bucket '
196                     'but got %s' % path)
197
198
199def validate_file_path(path):
200  """Validate a Google Cloud Storage file path.
201
202  Args:
203    path: a Google Storage file path. It should have form '/bucket/filename'.
204
205  Raises:
206    ValueError: if path is invalid.
207  """
208  _validate_path(path)
209  if not _GCS_FULLPATH_REGEX.match(path):
210    raise ValueError('Path should have format /bucket/filename '
211                     'but got %s' % path)
212
213
214def _process_path_prefix(path_prefix):
215  """Validate and process a Google Cloud Stoarge path prefix.
216
217  Args:
218    path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
219      or '/bucket/' or '/bucket'.
220
221  Raises:
222    ValueError: if path is invalid.
223
224  Returns:
225    a tuple of /bucket and prefix. prefix can be None.
226  """
227  _validate_path(path_prefix)
228  if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
229    raise ValueError('Path prefix should have format /bucket, /bucket/, '
230                     'or /bucket/prefix but got %s.' % path_prefix)
231  bucket_name_end = path_prefix.find('/', 1)
232  bucket = path_prefix
233  prefix = None
234  if bucket_name_end != -1:
235    bucket = path_prefix[:bucket_name_end]
236    prefix = path_prefix[bucket_name_end + 1:] or None
237  return bucket, prefix
238
239
240def _validate_path(path):
241  """Basic validation of Google Storage paths.
242
243  Args:
244    path: a Google Storage path. It should have form '/bucket/filename'
245      or '/bucket'.
246
247  Raises:
248    ValueError: if path is invalid.
249    TypeError: if path is not of type basestring.
250  """
251  if not path:
252    raise ValueError('Path is empty')
253  if not isinstance(path, basestring):
254    raise TypeError('Path should be a string but is %s (%s).' %
255                    (path.__class__, path))
256
257
258def validate_options(options):
259  """Validate Google Cloud Storage options.
260
261  Args:
262    options: a str->basestring dict of options to pass to Google Cloud Storage.
263
264  Raises:
265    ValueError: if option is not supported.
266    TypeError: if option is not of type str or value of an option
267      is not of type basestring.
268  """
269  if not options:
270    return
271
272  for k, v in options.iteritems():
273    if not isinstance(k, str):
274      raise TypeError('option %r should be a str.' % k)
275    if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
276      raise ValueError('option %s is not supported.' % k)
277    if not isinstance(v, basestring):
278      raise TypeError('value %r for option %s should be of type basestring.' %
279                      v, k)
280
281
282def http_time_to_posix(http_time):
283  """Convert HTTP time format to posix time.
284
285  See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
286  for http time format.
287
288  Args:
289    http_time: time in RFC 2616 format. e.g.
290      "Mon, 20 Nov 1995 19:12:08 GMT".
291
292  Returns:
293    A float of secs from unix epoch.
294  """
295  if http_time is not None:
296    return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
297
298
299def posix_time_to_http(posix_time):
300  """Convert posix time to HTML header time format.
301
302  Args:
303    posix_time: unix time.
304
305  Returns:
306    A datatime str in RFC 2616 format.
307  """
308  if posix_time:
309    return email_utils.formatdate(posix_time, usegmt=True)
310
311
312_DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
313
314
315def dt_str_to_posix(dt_str):
316  """format str to posix.
317
318  datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
319  e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
320  between date and time when they are on the same line.
321  Z indicates UTC (zero meridian).
322
323  A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
324
325  This is used to parse LastModified node from GCS's GET bucket XML response.
326
327  Args:
328    dt_str: A datetime str.
329
330  Returns:
331    A float of secs from unix epoch. By posix definition, epoch is midnight
332    1970/1/1 UTC.
333  """
334  parsable, _ = dt_str.split('.')
335  dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
336  return calendar.timegm(dt.utctimetuple())
337
338
339def posix_to_dt_str(posix):
340  """Reverse of str_to_datetime.
341
342  This is used by GCS stub to generate GET bucket XML response.
343
344  Args:
345    posix: A float of secs from unix epoch.
346
347  Returns:
348    A datetime str.
349  """
350  dt = datetime.datetime.utcfromtimestamp(posix)
351  dt_str = dt.strftime(_DT_FORMAT)
352  return dt_str + '.000Z'
353
354
355def local_run():
356  """Whether we should hit GCS dev appserver stub."""
357  server_software = os.environ.get('SERVER_SOFTWARE')
358  if server_software is None:
359    return True
360  if 'remote_api' in server_software:
361    return False
362  if server_software.startswith(('Development', 'testutil')):
363    return True
364  return False
365
366
367def local_api_url():
368  """Return URL for GCS emulation on dev appserver."""
369  return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
370
371
372def memory_usage(method):
373  """Log memory usage before and after a method."""
374  def wrapper(*args, **kwargs):
375    logging.info('Memory before method %s is %s.',
376                 method.__name__, runtime.memory_usage().current())
377    result = method(*args, **kwargs)
378    logging.info('Memory after method %s is %s',
379                 method.__name__, runtime.memory_usage().current())
380    return result
381  return wrapper
382
383
384def _add_ns(tagname):
385  return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
386                              'tag': tagname}
387
388
389_T_CONTENTS = _add_ns('Contents')
390_T_LAST_MODIFIED = _add_ns('LastModified')
391_T_ETAG = _add_ns('ETag')
392_T_KEY = _add_ns('Key')
393_T_SIZE = _add_ns('Size')
394_T_PREFIX = _add_ns('Prefix')
395_T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
396_T_NEXT_MARKER = _add_ns('NextMarker')
397_T_IS_TRUNCATED = _add_ns('IsTruncated')