PageRenderTime 81ms CodeModel.GetById 37ms RepoModel.GetById 0ms app.codeStats 0ms

/carrier/processor.py

https://github.com/pombredanne/carrier
Python | 195 lines | 132 code | 53 blank | 10 comment | 36 complexity | eb0654f8b7aed27c2eff0e11e9dcfe1b MD5 | raw file
Possible License(s): BSD-2-Clause
  1. from __future__ import absolute_import
  2. from __future__ import division
  3. from __future__ import unicode_literals
  4. import collections
  5. import datetime
  6. import hashlib
  7. import logging
  8. import re
  9. import time
  10. import urlparse
  11. from .pypi import Package
  12. logger = logging.getLogger(__name__)
  13. class Processor(object):
  14. def __init__(self, warehouse, pypi, store, *args, **kwargs):
  15. super(Processor, self).__init__(*args, **kwargs)
  16. self.warehouse = warehouse
  17. self.pypi = pypi
  18. self.store = store
  19. def get_and_update_or_create_version(self, release, project):
  20. version_data = release.serialize()
  21. version_data.update({"project": project})
  22. version, created = self.warehouse.versions.objects.get_or_create(project__name=project.name, version=release.version, show_yanked=True, defaults=version_data)
  23. if not created:
  24. version.classifiers = sorted(version.classifiers)
  25. changed = False
  26. for k, v in version_data.iteritems():
  27. if getattr(version, k, None) != v:
  28. changed = True
  29. setattr(version, k, v)
  30. if changed:
  31. version.save()
  32. return version
  33. def get_and_update_or_create_file(self, release, version, distribution):
  34. file_data = distribution.serialize()
  35. file_data.update({"version": version})
  36. vfile, created = self.warehouse.files.objects.get_or_create(filename=file_data["filename"], show_yanked=True, defaults=file_data)
  37. if not created:
  38. changed = False
  39. for k, v in file_data.iteritems():
  40. if getattr(vfile, k, None) != v:
  41. changed = True
  42. setattr(vfile, k, v)
  43. if changed:
  44. vfile.save()
  45. return vfile
  46. def update_files(self, release, version):
  47. # Determine if any files need to be deleted
  48. warehouse_files = set([f.filename for f in version.files])
  49. local_files = set([x.filename for x in release.files])
  50. deleted = warehouse_files - local_files
  51. # Delete any files that need to be deleted
  52. if deleted:
  53. for filename in deleted:
  54. logger.info("Deleting the file '%s' from '%s' version '%s'", filename, release.name, release.version)
  55. self.warehouse.files.objects.filter(filename__in=deleted).delete()
  56. return [self.get_and_update_or_create_file(release, version, distribution) for distribution in release.files]
  57. def update(self, name, version=None, timestamp=None, action=None, matches=None, force=False):
  58. package = Package(self.pypi, name, version)
  59. # Process the Name
  60. project, _ = self.warehouse.projects.objects.get_or_create(name=name)
  61. for release in package.releases():
  62. if "/" in release.version:
  63. # We cannot accept versions with a / in it.
  64. logger.error("Skipping '%s' version '%s' because it contains a '/'", release.name, release.version)
  65. continue
  66. if not release.changed(self.store.get("pypi:process:%s:%s" % (release.name, release.version))) and not force:
  67. logger.info("Skipping '%s' version '%s' because it has not changed", release.name, release.version)
  68. continue
  69. logger.info("Syncing '%s' version '%s'", release.name, release.version)
  70. version = self.get_and_update_or_create_version(release, project)
  71. self.update_files(release, version)
  72. self.store.set("pypi:process:%s:%s" % (release.name, release.version), release.hash())
  73. def delete(self, name, version, timestamp, action, matches):
  74. filename = None
  75. if action == "remove":
  76. if version is None:
  77. obj = self.warehouse.projects.objects.filter(name=name)
  78. logger.info("Deleting '%s'", name)
  79. else:
  80. obj = self.warehouse.versions.objects.filter(project__name=name, version=version)
  81. logger.info("Deleting '%s' version '%s'", name, version)
  82. elif action.startswith("remove file"):
  83. filename = matches.groups()[0]
  84. obj = self.warehouse.files.objects.filter(filename=filename)
  85. logger.info("Deleting '%s' version '%s' filename '%s'", name, version, filename)
  86. else:
  87. raise RuntimeError("Unknown Action passed to delete()")
  88. try:
  89. obj = obj.get()
  90. except obj.resource.DoesNotExist:
  91. return
  92. if version is None:
  93. key_pattern = "pypi:process:%s:*" % name
  94. keys = self.store.keys(key_pattern)
  95. else:
  96. keys = ["pypi:process:%s:%s" % (name, version)]
  97. for key in keys:
  98. self.store.delete(key)
  99. obj.delete()
  100. def process(self):
  101. logger.info("Starting changed projects synchronization")
  102. if not self.store.get("pypi:since"):
  103. # This is the first time we've ran so we need to do a bulk import
  104. raise RuntimeError(" Cannot process changes with no value for the last successful run.")
  105. current = datetime.datetime.utcnow().replace(microsecond=0)
  106. since = int(float(self.store.get("pypi:since"))) - 10
  107. dispatch = collections.OrderedDict([
  108. (re.compile("^create$"), self.update),
  109. (re.compile("^new release$"), self.update),
  110. (re.compile("^add [\w\d\.]+ file .+$"), self.update),
  111. (re.compile("^remove$"), self.delete),
  112. (re.compile("^remove file (.+)$"), self.delete),
  113. (re.compile("^update [\w]+(, [\w]+)*$"), self.update),
  114. #(re.compile("^docupdate$"), docupdate), # @@@ Do Something
  115. #(re.compile("^add (Owner|Maintainer) .+$"), add_user_role), # @@@ Do Something
  116. #(re.compile("^remove (Owner|Maintainer) .+$"), remove_user_role), # @@@ Do Something
  117. ])
  118. changes = self.pypi.changelog(since)
  119. if changes:
  120. if isinstance(changes[0], basestring):
  121. changes = [changes]
  122. for name, version, timestamp, action in changes:
  123. action_hash = hashlib.sha512(u":".join([unicode(x) for x in [name, version, timestamp, action]]).encode("utf-8")).hexdigest()[:32]
  124. action_key = "pypi:changelog:%s" % action_hash
  125. logdata = {"action": action, "name": name, "version": version, "timestamp": timestamp}
  126. if not self.store.exists(action_key):
  127. logger.debug(u"Processing %(name)s %(version)s %(timestamp)s %(action)s" % logdata)
  128. # Dispatch Based on the action
  129. for pattern, func in dispatch.iteritems():
  130. matches = pattern.search(action)
  131. if matches is not None:
  132. func(name, version, timestamp, action, matches)
  133. break
  134. self.store.setex(action_key, 2592000, "1")
  135. else:
  136. logger.debug(u"Skipping %(name)s %(version)s %(timestamp)s %(action)s" % logdata)
  137. # Hijack the warehouse session and url
  138. last_modified_url = urlparse.urljoin(self.warehouse.url, "/last-modified")
  139. resp = self.warehouse.session.post(last_modified_url, {"date": current.isoformat()})
  140. resp.raise_for_status()
  141. self.store.set("pypi:since", time.mktime(current.timetuple()))
  142. logger.info("Finished changed projects synchronization")