PageRenderTime 57ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/chrome/common/extensions/docs/server2/new_github_file_system.py

https://gitlab.com/jonnialva90/iridium-browser
Python | 294 lines | 249 code | 13 blank | 32 comment | 13 complexity | c008c3aa4cac9c9493dafebfaf23e02b MD5 | raw file
  1. # Copyright 2013 The Chromium Authors. All rights reserved.
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. import json
  5. import logging
  6. from cStringIO import StringIO
  7. import posixpath
  8. import traceback
  9. from zipfile import ZipFile
  10. import appengine_blobstore as blobstore
  11. from appengine_url_fetcher import AppEngineUrlFetcher
  12. from appengine_wrappers import urlfetch
  13. from docs_server_utils import StringIdentity
  14. from file_system import FileNotFoundError, FileSystem, FileSystemError, StatInfo
  15. from future import Future
  16. from object_store_creator import ObjectStoreCreator
  17. from path_util import AssertIsDirectory, IsDirectory
  18. import url_constants
  19. _GITHUB_REPOS_NAMESPACE = 'GithubRepos'
  20. def _LoadCredentials(object_store_creator):
  21. '''Returns (username, password) from |password_store|.
  22. '''
  23. password_store = object_store_creator.Create(
  24. GithubFileSystem,
  25. app_version=None,
  26. category='password',
  27. start_empty=False)
  28. password_data = password_store.GetMulti(('username', 'password')).Get()
  29. return password_data.get('username'), password_data.get('password')
  30. class _GithubZipFile(object):
  31. '''A view of a ZipFile with a more convenient interface which ignores the
  32. 'zipball' prefix that all paths have. The zip files that come straight from
  33. GitHub have paths like ['zipball/foo.txt', 'zipball/bar.txt'] but we only
  34. care about ['foo.txt', 'bar.txt'].
  35. '''
  36. @classmethod
  37. def Create(cls, repo_name, blob):
  38. try:
  39. zipball = ZipFile(StringIO(blob))
  40. except:
  41. logging.warning('zipball "%s" is not a valid zip' % repo_name)
  42. return None
  43. if not zipball.namelist():
  44. logging.warning('zipball "%s" is empty' % repo_name)
  45. return None
  46. name_prefix = None # probably 'zipball'
  47. paths = []
  48. for name in zipball.namelist():
  49. prefix, path = name.split('/', 1)
  50. if name_prefix and prefix != name_prefix:
  51. logging.warning('zipball "%s" has names with inconsistent prefix: %s' %
  52. (repo_name, zipball.namelist()))
  53. return None
  54. name_prefix = prefix
  55. paths.append(path)
  56. return cls(zipball, name_prefix, paths)
  57. def __init__(self, zipball, name_prefix, paths):
  58. self._zipball = zipball
  59. self._name_prefix = name_prefix
  60. self._paths = paths
  61. def Paths(self):
  62. '''Return all file paths in this zip file.
  63. '''
  64. return self._paths
  65. def List(self, path):
  66. '''Returns all files within a directory at |path|. Not recursive. Paths
  67. are returned relative to |path|.
  68. '''
  69. AssertIsDirectory(path)
  70. return [p[len(path):] for p in self._paths
  71. if p != path and
  72. p.startswith(path) and
  73. '/' not in p[len(path):].rstrip('/')]
  74. def Read(self, path):
  75. '''Returns the contents of |path|. Raises a KeyError if it doesn't exist.
  76. '''
  77. return self._zipball.read(posixpath.join(self._name_prefix, path))
  78. class GithubFileSystem(FileSystem):
  79. '''Allows reading from a github.com repository.
  80. '''
  81. @staticmethod
  82. def Create(owner, repo, object_store_creator):
  83. '''Creates a GithubFileSystem that corresponds to a single github repository
  84. specified by |owner| and |repo|.
  85. '''
  86. return GithubFileSystem(
  87. url_constants.GITHUB_REPOS,
  88. owner,
  89. repo,
  90. object_store_creator,
  91. AppEngineUrlFetcher)
  92. @staticmethod
  93. def ForTest(repo, fake_fetcher, path=None, object_store_creator=None):
  94. '''Creates a GithubFileSystem that can be used for testing. It reads zip
  95. files and commit data from server2/test_data/github_file_system/test_owner
  96. instead of github.com. It reads from files specified by |repo|.
  97. '''
  98. return GithubFileSystem(
  99. path if path is not None else 'test_data/github_file_system',
  100. 'test_owner',
  101. repo,
  102. object_store_creator or ObjectStoreCreator.ForTest(),
  103. fake_fetcher)
  104. def __init__(self, base_url, owner, repo, object_store_creator, Fetcher):
  105. self._repo_key = posixpath.join(owner, repo)
  106. self._repo_url = posixpath.join(base_url, owner, repo)
  107. self._username, self._password = _LoadCredentials(object_store_creator)
  108. self._blobstore = blobstore.AppEngineBlobstore()
  109. self._fetcher = Fetcher(self._repo_url)
  110. # Stores whether the github is up-to-date. This will either be True or
  111. # empty, the emptiness most likely due to this being a cron run.
  112. self._up_to_date_cache = object_store_creator.Create(
  113. GithubFileSystem, category='up-to-date')
  114. # Caches the zip file's stat. Overrides start_empty=False and use
  115. # |self._up_to_date_cache| to determine whether we need to refresh.
  116. self._stat_cache = object_store_creator.Create(
  117. GithubFileSystem, category='stat-cache', start_empty=False)
  118. # Created lazily in |_EnsureRepoZip|.
  119. self._repo_zip = None
  120. def _EnsureRepoZip(self):
  121. '''Initializes |self._repo_zip| if it hasn't already been (i.e. if
  122. _EnsureRepoZip has never been called before). In that case |self._repo_zip|
  123. will be set to a Future of _GithubZipFile and the fetch process started,
  124. whether that be from a blobstore or if necessary all the way from GitHub.
  125. '''
  126. if self._repo_zip is not None:
  127. return
  128. repo_key, repo_url, username, password = (
  129. self._repo_key, self._repo_url, self._username, self._password)
  130. def fetch_from_blobstore(version):
  131. '''Returns a Future which resolves to the _GithubZipFile for this repo
  132. fetched from blobstore.
  133. '''
  134. blob = None
  135. try:
  136. blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE)
  137. except blobstore.BlobNotFoundError:
  138. pass
  139. if blob is None:
  140. logging.warning('No blob for %s found in datastore' % repo_key)
  141. return fetch_from_github(version)
  142. repo_zip = _GithubZipFile.Create(repo_key, blob)
  143. if repo_zip is None:
  144. logging.warning('Blob for %s was corrupted in blobstore!?' % repo_key)
  145. return fetch_from_github(version)
  146. return Future(value=repo_zip)
  147. def fetch_from_github(version):
  148. '''Returns a Future which resolves to the _GithubZipFile for this repo
  149. fetched new from GitHub, then writes it to blobstore and |version| to the
  150. stat caches.
  151. '''
  152. def get_zip(github_zip):
  153. try:
  154. blob = github_zip.content
  155. except urlfetch.DownloadError:
  156. raise FileSystemError('Failed to download repo %s file from %s' %
  157. (repo_key, repo_url))
  158. repo_zip = _GithubZipFile.Create(repo_key, blob)
  159. if repo_zip is None:
  160. raise FileSystemError('Blob for %s was fetched corrupted from %s' %
  161. (repo_key, repo_url))
  162. self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
  163. self._up_to_date_cache.Set(repo_key, True)
  164. self._stat_cache.Set(repo_key, version)
  165. return repo_zip
  166. return self._fetcher.FetchAsync(
  167. 'zipball', username=username, password=password).Then(get_zip)
  168. # To decide whether we need to re-stat, and from there whether to re-fetch,
  169. # make use of ObjectStore's start-empty configuration. If
  170. # |object_store_creator| is configured to start empty then our creator
  171. # wants to refresh (e.g. running a cron), so fetch the live stat from
  172. # GitHub. If the stat hasn't changed since last time then no reason to
  173. # re-fetch from GitHub, just take from blobstore.
  174. cached_version = self._stat_cache.Get(repo_key).Get()
  175. if self._up_to_date_cache.Get(repo_key).Get() is None:
  176. # This is either a cron or an instance where a cron has never been run.
  177. live_version = self._FetchLiveVersion(username, password)
  178. if cached_version != live_version:
  179. # Note: branch intentionally triggered if |cached_version| is None.
  180. logging.info('%s has changed, fetching from GitHub.' % repo_url)
  181. self._repo_zip = fetch_from_github(live_version)
  182. else:
  183. # Already up to date. Fetch from blobstore. No need to set up-to-date
  184. # to True here since it'll already be set for instances, and it'll
  185. # never be set for crons.
  186. logging.info('%s is up to date.' % repo_url)
  187. self._repo_zip = fetch_from_blobstore(cached_version)
  188. else:
  189. # Instance where cron has been run. It should be in blobstore.
  190. self._repo_zip = fetch_from_blobstore(cached_version)
  191. assert self._repo_zip is not None
  192. def _FetchLiveVersion(self, username, password):
  193. '''Fetches the current repository version from github.com and returns it.
  194. The version is a 'sha' hash value.
  195. '''
  196. # TODO(kalman): Do this asynchronously (use FetchAsync).
  197. result = self._fetcher.Fetch(
  198. 'commits/HEAD', username=username, password=password)
  199. try:
  200. return json.loads(result.content)['sha']
  201. except (KeyError, ValueError):
  202. raise FileSystemError('Error parsing JSON from repo %s: %s' %
  203. (self._repo_url, traceback.format_exc()))
  204. def Refresh(self):
  205. return self.ReadSingle('')
  206. def Read(self, paths, skip_not_found=False):
  207. '''Returns a directory mapping |paths| to the contents of the file at each
  208. path. If path ends with a '/', it is treated as a directory and is mapped to
  209. a list of filenames in that directory.
  210. '''
  211. self._EnsureRepoZip()
  212. def read(repo_zip):
  213. reads = {}
  214. for path in paths:
  215. if path not in repo_zip.Paths():
  216. raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path))
  217. if IsDirectory(path):
  218. reads[path] = repo_zip.List(path)
  219. else:
  220. reads[path] = repo_zip.Read(path)
  221. return reads
  222. return self._repo_zip.Then(read)
  223. def Stat(self, path):
  224. '''Stats |path| returning its version as as StatInfo object. If |path| ends
  225. with a '/', it is assumed to be a directory and the StatInfo object returned
  226. includes child_versions for all paths in the directory.
  227. File paths do not include the name of the zip file, which is arbitrary and
  228. useless to consumers.
  229. Because the repository will only be downloaded once per server version, all
  230. stat versions are always 0.
  231. '''
  232. self._EnsureRepoZip()
  233. repo_zip = self._repo_zip.Get()
  234. if path not in repo_zip.Paths():
  235. raise FileNotFoundError('"%s" does not contain file "%s"' %
  236. (self._repo_key, path))
  237. version = self._stat_cache.Get(self._repo_key).Get()
  238. assert version is not None, ('There was a zipball in datastore; there '
  239. 'should be a version cached for it')
  240. stat_info = StatInfo(version)
  241. if IsDirectory(path):
  242. stat_info.child_versions = dict((p, StatInfo(version))
  243. for p in repo_zip.List(path))
  244. return stat_info
  245. def GetIdentity(self):
  246. return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key)
  247. def __repr__(self):
  248. return '%s(key=%s, url=%s)' % (type(self).__name__,
  249. self._repo_key,
  250. self._repo_url)