PageRenderTime 95ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/django_extensions/management/commands/sync_media_s3.py

https://github.com/mpdaugherty/django-extensions
Python | 282 lines | 252 code | 11 blank | 19 comment | 29 complexity | 5269a4ea3fa127d93a5bf2bcb57c5716 MD5 | raw file
  1. """
  2. Sync Media to S3
  3. ================
  4. Django command that scans all files in your settings.MEDIA_ROOT folder and
  5. uploads them to S3 with the same directory structure.
  6. This command can optionally do the following but it is off by default:
  7. * gzip compress any CSS and Javascript files it finds and adds the appropriate
  8. 'Content-Encoding' header.
  9. * set a far future 'Expires' header for optimal caching.
  10. Note: This script requires the Python boto library and valid Amazon Web
  11. Services API keys.
  12. Required settings.py variables:
  13. AWS_ACCESS_KEY_ID = ''
  14. AWS_SECRET_ACCESS_KEY = ''
  15. AWS_BUCKET_NAME = ''
  16. Command options are:
  17. -p PREFIX, --prefix=PREFIX
  18. The prefix to prepend to the path on S3.
  19. --gzip Enables gzipping CSS and Javascript files.
  20. --expires Enables setting a far future expires header.
  21. --force Skip the file mtime check to force upload of all
  22. files.
  23. --filter-list Override default directory and file exclusion
  24. filters. (enter as comma seperated line)
  25. --renamegzip Enables renaming of gzipped files by appending '.gz.
  26. to the original file name. This way your original assets
  27. will not be replaced by the gzipped ones if you don't want
  28. them to be.
  29. TODO:
  30. * Use fnmatch (or regex) to allow more complex FILTER_LIST rules.
  31. """
  32. import datetime
  33. import email
  34. import mimetypes
  35. import optparse
  36. import os
  37. import sys
  38. import time
  39. from django.conf import settings
  40. from django.core.management.base import BaseCommand, CommandError
  41. # Make sure boto is available
  42. try:
  43. import boto
  44. import boto.exception
  45. except ImportError:
  46. raise ImportError, "The boto Python library is not installed."
  47. class Command(BaseCommand):
  48. # Extra variables to avoid passing these around
  49. AWS_ACCESS_KEY_ID = ''
  50. AWS_SECRET_ACCESS_KEY = ''
  51. AWS_BUCKET_NAME = ''
  52. DIRECTORY = ''
  53. FILTER_LIST = ['.DS_Store', '.svn', '.hg', '.git', 'Thumbs.db']
  54. GZIP_CONTENT_TYPES = (
  55. 'text/css',
  56. 'application/javascript',
  57. 'application/x-javascript',
  58. 'text/javascript'
  59. )
  60. upload_count = 0
  61. skip_count = 0
  62. option_list = BaseCommand.option_list + (
  63. optparse.make_option('-p', '--prefix',
  64. dest='prefix',
  65. default=getattr(settings, 'SYNC_MEDIA_S3_PREFIX', ''),
  66. help="The prefix to prepend to the path on S3."),
  67. optparse.make_option('-d', '--dir',
  68. dest='dir', default=settings.MEDIA_ROOT,
  69. help="The root directory to use instead of your MEDIA_ROOT"),
  70. optparse.make_option('--gzip',
  71. action='store_true', dest='gzip', default=False,
  72. help="Enables gzipping CSS and Javascript files."),
  73. optparse.make_option('--renamegzip',
  74. action='store_true', dest='renamegzip', default=False,
  75. help="Enables renaming of gzipped assets to have '.gz' appended to the filename."),
  76. optparse.make_option('--expires',
  77. action='store_true', dest='expires', default=False,
  78. help="Enables setting a far future expires header."),
  79. optparse.make_option('--force',
  80. action='store_true', dest='force', default=False,
  81. help="Skip the file mtime check to force upload of all files."),
  82. optparse.make_option('--filter-list', dest='filter_list',
  83. action='store', default='',
  84. help="Override default directory and file exclusion filters. (enter as comma seperated line)"),
  85. )
  86. help = 'Syncs the complete MEDIA_ROOT structure and files to S3 into the given bucket name.'
  87. args = 'bucket_name'
  88. can_import_settings = True
  89. def handle(self, *args, **options):
  90. # Check for AWS keys in settings
  91. if not hasattr(settings, 'AWS_ACCESS_KEY_ID') or \
  92. not hasattr(settings, 'AWS_SECRET_ACCESS_KEY'):
  93. raise CommandError('Missing AWS keys from settings file. Please' +
  94. 'supply both AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.')
  95. else:
  96. self.AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
  97. self.AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
  98. if not hasattr(settings, 'AWS_BUCKET_NAME'):
  99. raise CommandError('Missing bucket name from settings file. Please' +
  100. ' add the AWS_BUCKET_NAME to your settings file.')
  101. else:
  102. if not settings.AWS_BUCKET_NAME:
  103. raise CommandError('AWS_BUCKET_NAME cannot be empty.')
  104. self.AWS_BUCKET_NAME = settings.AWS_BUCKET_NAME
  105. if not hasattr(settings, 'MEDIA_ROOT'):
  106. raise CommandError('MEDIA_ROOT must be set in your settings.')
  107. else:
  108. if not settings.MEDIA_ROOT:
  109. raise CommandError('MEDIA_ROOT must be set in your settings.')
  110. self.verbosity = int(options.get('verbosity'))
  111. self.prefix = options.get('prefix')
  112. self.do_gzip = options.get('gzip')
  113. self.rename_gzip = options.get('renamegzip')
  114. self.do_expires = options.get('expires')
  115. self.do_force = options.get('force')
  116. self.DIRECTORY = options.get('dir')
  117. self.FILTER_LIST = getattr(settings, 'FILTER_LIST', self.FILTER_LIST)
  118. filter_list = options.get('filter_list')
  119. if filter_list:
  120. # command line option overrides default filter_list and
  121. # settings.filter_list
  122. self.FILTER_LIST = filter_list.split(',')
  123. # Now call the syncing method to walk the MEDIA_ROOT directory and
  124. # upload all files found.
  125. self.sync_s3()
  126. print
  127. print "%d files uploaded." % (self.upload_count)
  128. print "%d files skipped." % (self.skip_count)
  129. def sync_s3(self):
  130. """
  131. Walks the media directory and syncs files to S3
  132. """
  133. bucket, key = self.open_s3()
  134. os.path.walk(self.DIRECTORY, self.upload_s3,
  135. (bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY))
  136. def compress_string(self, s):
  137. """Gzip a given string."""
  138. import gzip
  139. try:
  140. from cStringIO import StringIO
  141. except ImportError:
  142. from StringIO import StringIO
  143. zbuf = StringIO()
  144. zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
  145. zfile.write(s)
  146. zfile.close()
  147. return zbuf.getvalue()
  148. def open_s3(self):
  149. """
  150. Opens connection to S3 returning bucket and key
  151. """
  152. conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY)
  153. try:
  154. bucket = conn.get_bucket(self.AWS_BUCKET_NAME)
  155. except boto.exception.S3ResponseError:
  156. bucket = conn.create_bucket(self.AWS_BUCKET_NAME)
  157. return bucket, boto.s3.key.Key(bucket)
  158. def upload_s3(self, arg, dirname, names):
  159. """
  160. This is the callback to os.path.walk and where much of the work happens
  161. """
  162. bucket, key, bucket_name, root_dir = arg
  163. # Skip directories we don't want to sync
  164. if os.path.basename(dirname) in self.FILTER_LIST:
  165. # prevent walk from processing subfiles/subdirs below the ignored one
  166. del names[:]
  167. return
  168. # Later we assume the MEDIA_ROOT ends with a trailing slash
  169. if not root_dir.endswith(os.path.sep):
  170. root_dir = root_dir + os.path.sep
  171. for file in names:
  172. headers = {}
  173. if file in self.FILTER_LIST:
  174. continue # Skip files we don't want to sync
  175. filename = os.path.join(dirname, file)
  176. if os.path.isdir(filename):
  177. continue # Don't try to upload directories
  178. file_key = filename[len(root_dir):]
  179. if self.prefix:
  180. file_key = '%s/%s' % (self.prefix, file_key)
  181. # Check if file on S3 is older than local file, if so, upload
  182. if not self.do_force:
  183. s3_key = bucket.get_key(file_key)
  184. if s3_key:
  185. s3_datetime = datetime.datetime(*time.strptime(
  186. s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6])
  187. local_datetime = datetime.datetime.utcfromtimestamp(
  188. os.stat(filename).st_mtime)
  189. if local_datetime < s3_datetime:
  190. self.skip_count += 1
  191. if self.verbosity > 1:
  192. print "File %s hasn't been modified since last " \
  193. "being uploaded" % (file_key)
  194. continue
  195. # File is newer, let's process and upload
  196. if self.verbosity > 0:
  197. print "Uploading %s..." % (file_key)
  198. content_type = mimetypes.guess_type(filename)[0]
  199. if content_type:
  200. headers['Content-Type'] = content_type
  201. file_obj = open(filename, 'rb')
  202. file_size = os.fstat(file_obj.fileno()).st_size
  203. filedata = file_obj.read()
  204. if self.do_gzip:
  205. # Gzipping only if file is large enough (>1K is recommended)
  206. # and only if file is a common text type (not a binary file)
  207. if file_size > 1024 and content_type in self.GZIP_CONTENT_TYPES:
  208. filedata = self.compress_string(filedata)
  209. if self.rename_gzip:
  210. #If rename_gzip is True, then rename the file by appending '.gz' to original filename
  211. file_key = '%s.gz' % (file_key)
  212. headers['Content-Encoding'] = 'gzip'
  213. if self.verbosity > 1:
  214. print "\tgzipped: %dk to %dk" % \
  215. (file_size / 1024, len(filedata) / 1024)
  216. if self.do_expires:
  217. # HTTP/1.0
  218. headers['Expires'] = '%s GMT' % (email.Utils.formatdate(
  219. time.mktime((datetime.datetime.now() +
  220. datetime.timedelta(days=365 * 2)).timetuple())))
  221. # HTTP/1.1
  222. headers['Cache-Control'] = 'max-age %d' % (3600 * 24 * 365 * 2)
  223. if self.verbosity > 1:
  224. print "\texpires: %s" % (headers['Expires'])
  225. print "\tcache-control: %s" % (headers['Cache-Control'])
  226. try:
  227. key.name = file_key
  228. key.set_contents_from_string(filedata, headers, replace=True)
  229. key.set_acl('public-read')
  230. except boto.exception.S3CreateError, e:
  231. print "Failed: %s" % e
  232. except Exception, e:
  233. print e
  234. raise
  235. else:
  236. self.upload_count += 1
  237. file_obj.close()
  238. # Backwards compatibility for Django r9110
  239. if not [opt for opt in Command.option_list if opt.dest == 'verbosity']:
  240. Command.option_list += (
  241. optparse.make_option('-v', '--verbosity',
  242. dest='verbosity', default=1, action='count',
  243. help="Verbose mode. Multiple -v options increase the verbosity."),
  244. )