PageRenderTime 142ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/development/scripts/app_engine_server/memcache_zipserve.py

https://gitlab.com/brian0218/rk3188_rk3066_r-box_android4.4.2_sdk
Python | 756 lines | 691 code | 6 blank | 59 comment | 5 complexity | 267ba30da6683191ffc65217c24f1416 MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copyright 2009 Google Inc.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. """A class to serve pages from zip files and use memcache for performance.
  19. This contains a class and a function to create an anonymous instance of the
  20. class to serve HTTP GET requests. Memcache is used to increase response speed
  21. and lower processing cycles used in serving. Credit to Guido van Rossum and
  22. his implementation of zipserve which served as a reference as I wrote this.
  23. MemcachedZipHandler: Class that serves request
  24. create_handler: method to create instance of MemcachedZipHandler
  25. """
  26. __author__ = 'jmatt@google.com (Justin Mattson)'
  27. import email.Utils
  28. import logging
  29. import mimetypes
  30. import re
  31. import sys
  32. import time
  33. import yaml
  34. import zipfile
  35. from google.appengine.api import memcache
  36. from google.appengine.ext import webapp
  37. from google.appengine.ext.webapp import util
  38. from time import localtime, strftime
  39. def create_handler(zip_files, max_age=None, public=None):
  40. """Factory method to create a MemcachedZipHandler instance.
  41. Args:
  42. zip_files: A list of file names, or a list of lists of file name, first
  43. member of file mappings. See MemcachedZipHandler documentation for
  44. more information about using the list of lists format
  45. max_age: The maximum client-side cache lifetime
  46. public: Whether this should be declared public in the client-side cache
  47. Returns:
  48. A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
  49. Engine
  50. Raises:
  51. ValueError: if the zip_files argument is not a list
  52. """
  53. # verify argument integrity. If the argument is passed in list format,
  54. # convert it to list of lists format
  55. if zip_files and type(zip_files).__name__ == 'list':
  56. num_items = len(zip_files)
  57. while num_items > 0:
  58. if type(zip_files[num_items - 1]).__name__ != 'list':
  59. zip_files[num_items - 1] = [zip_files[num_items-1]]
  60. num_items -= 1
  61. else:
  62. raise ValueError('File name arguments must be a list')
  63. class HandlerWrapper(MemcachedZipHandler):
  64. """Simple wrapper for an instance of MemcachedZipHandler.
  65. I'm still not sure why this is needed
  66. """
  67. def get(self, name):
  68. self.zipfilenames = zip_files
  69. self.TrueGet(name)
  70. if max_age is not None:
  71. MAX_AGE = max_age
  72. if public is not None:
  73. PUBLIC = public
  74. return HandlerWrapper
  75. class MemcachedZipHandler(webapp.RequestHandler):
  76. """Handles get requests for a given URL.
  77. Serves a GET request from a series of zip files. As files are served they are
  78. put into memcache, which is much faster than retreiving them from the zip
  79. source file again. It also uses considerably fewer CPU cycles.
  80. """
  81. zipfile_cache = {} # class cache of source zip files
  82. MAX_AGE = 43200 # max client-side cache lifetime, in seconds
  83. PUBLIC = True # public cache setting
  84. CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
  85. NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
  86. REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data
  87. REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table
  88. REDIRECT_SRC = 'src' # Name of the 'source' attribute for a
  89. # redirect table entry
  90. REDIRECT_DST = 'dst' # Name of the 'destination' attribute for
  91. # a redirect table entry
  92. REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a
  93. # redirect table entry
  94. REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301
  95. # redirect should be served
  96. REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302
  97. # Redirect should be served
  98. intlString = 'intl/'
  99. validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-cn','zh-TW','zh-tw']
  100. def TrueGet(self, reqUri):
  101. """The top-level entry point to serving requests.
  102. Called 'True' get because it does the work when called from the wrapper
  103. class' get method. Some logic is applied to the request to serve files
  104. from an intl/<lang>/... directory or fall through to the default language.
  105. Args:
  106. name: URL requested
  107. Returns:
  108. None
  109. """
  110. langName = 'en'
  111. resetLangCookie = False
  112. urlLangName = None
  113. retry = False
  114. isValidIntl = False
  115. isStripped = False
  116. # Try to retrieve the user's lang pref from the cookie. If there is no
  117. # lang pref cookie in the request, add set-cookie to the response with the
  118. # default value of 'en'.
  119. try:
  120. langName = self.request.cookies['android_developer_pref_lang']
  121. except KeyError:
  122. resetLangCookie = True
  123. #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
  124. logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
  125. # Do some prep for handling intl requests. Parse the url and validate
  126. # the intl/lang substring, extract the url lang code (urlLangName) and the
  127. # the uri that follows the intl/lang substring(contentUri)
  128. sections = reqUri.split("/", 2)
  129. isIntl = len(sections) > 2 and (sections[0] == "intl")
  130. if isIntl:
  131. isValidIntl = sections[1] in self.validLangs
  132. urlLangName = sections[1]
  133. contentUri = sections[2]
  134. logging.info(' Content URI is [%s]...', contentUri)
  135. if isValidIntl:
  136. if (langName != urlLangName) or (langName == 'en'):
  137. # if the lang code in the request is different from that in
  138. # the cookie, or if the target lang is en, strip the
  139. # intl/nn substring. It will later be redirected to
  140. # the user's preferred language url.
  141. # logging.info(' Handling a MISMATCHED intl request')
  142. reqUri = contentUri
  143. isStripped = True
  144. isValidIntl = False
  145. isIntl = False
  146. #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
  147. #else:
  148. # logging.info('INTL PREP no need to reset langName')
  149. else:
  150. contentUri = reqUri
  151. # Apply manual redirects from redirects.yaml. This occurs before any
  152. # other mutations are performed, to avoid odd redirect behavior
  153. # (For example, a user may want to redirect a directory without having
  154. # /index.html appended.)
  155. did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
  156. if did_redirect:
  157. return
  158. # Preprocess the req url. If it references a directory or the domain itself,
  159. # append '/index.html' to the url and 302 redirect. Otherwise, continue
  160. # processing the request below.
  161. did_redirect = self.PreprocessUrl(reqUri, langName)
  162. if did_redirect:
  163. return
  164. # Send for processing
  165. if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
  166. # handle a 'clean' request.
  167. # Try to form a response using the actual request url.
  168. # logging.info(' Request being handled as clean: [%s]', name)
  169. if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
  170. # If CreateResponse returns False, there was no such document
  171. # in the intl/lang tree. Before going to 404, see if there is an
  172. # English-language version of the doc in the default
  173. # default tree and return it, else go to 404.
  174. self.CreateResponse(contentUri, langName, False, resetLangCookie)
  175. elif isIntl:
  176. # handle the case where we need to pass through an invalid intl req
  177. # for processing (so as to get 404 as appropriate). This is needed
  178. # because intl urls are passed through clean and retried in English,
  179. # if necessary.
  180. # logging.info(' Handling an invalid intl request...')
  181. self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
  182. else:
  183. # handle the case where we have a non-clean url (usually a non-intl
  184. # url) that we need to interpret in the context of any lang pref
  185. # that is set. Prepend an intl/lang string to the request url and
  186. # send it as a 302 redirect. After the redirect, the subsequent
  187. # request will be handled as a clean url.
  188. self.RedirToIntl(reqUri, self.intlString, langName)
  189. def ProcessManualRedirects(self, contentUri, langName, isIntl):
  190. """Compute any manual redirects for a request and execute them.
  191. This allows content authors to manually define a set of regex rules which,
  192. when matched, will cause an HTTP redirect to be performed.
  193. Redirect rules are typically stored in a file named redirects.yaml. See the
  194. comments in that file for more information about formatting.
  195. Redirect computations are stored in memcache for performance.
  196. Note that international URIs are handled automatically, and are assumed to
  197. mirror redirects for non-intl requests.
  198. Args:
  199. contentUri: The relative URI (without leading slash) that was requested.
  200. This should NOT contain an intl-prefix, if otherwise present.
  201. langName: The requested language.
  202. isIntl: True if contentUri originally contained an intl prefix.
  203. Results:
  204. boolean: True if a redirect has been set, False otherwise.
  205. """
  206. # Redirect data is stored in memcache for performance
  207. memcache_key = self.REDIRECT_PREFIX + contentUri
  208. redirect_data = memcache.get(memcache_key)
  209. if redirect_data is None:
  210. logging.info('Redirect cache miss. Computing new redirect data.\n'
  211. 'Memcache Key: ' + memcache_key)
  212. redirect_data = self.ComputeManualRedirectUrl(contentUri)
  213. memcache.set(memcache_key, redirect_data)
  214. contentUri = redirect_data[0]
  215. redirectType = redirect_data[1]
  216. # If this is an international URL, prepend intl path to minimize
  217. # number of redirects
  218. if isIntl:
  219. contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
  220. if redirectType is None:
  221. # No redirect necessary
  222. return False
  223. elif redirectType == self.REDIRECT_TYPE_PERM:
  224. logging.info('Sending permanent redirect: ' + contentUri);
  225. self.redirect(contentUri, permanent=True)
  226. return True
  227. elif redirectType == self.REDIRECT_TYPE_TEMP:
  228. logging.info('Sending temporary redirect: ' + contentUri);
  229. self.redirect(contentUri, permanent=False)
  230. return True
  231. else:
  232. # Invalid redirect type
  233. logging.error('Invalid redirect type: %s', redirectType)
  234. raise ('Invalid redirect type: %s', redirectType)
  235. def ComputeManualRedirectUrl(self, uri):
  236. """Read redirects file and evaluate redirect rules for a given URI.
  237. Args:
  238. uri: The relative URI (without leading slash) for which redirect data
  239. should be computed. No special handling of intl URIs is pefromed
  240. at this level.
  241. Returns:
  242. tuple: The computed redirect data. This tuple has two parts:
  243. redirect_uri: The new URI that should be used. (If no redirect rule is
  244. found, the original input to 'uri' will be returned.
  245. redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
  246. for an HTTP 302 redirect, or None if no redirect should be performed.
  247. """
  248. # Redircts are defined in a file named redirects.yaml.
  249. try:
  250. f = open(self.REDIRECT_FILE)
  251. data = yaml.load(f)
  252. f.close()
  253. except IOError, e:
  254. logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
  255. '): ' + e.strerror)
  256. return (uri, None)
  257. # The incoming path is missing a leading slash. However, many parts of the
  258. # redirect system require leading slashes to distinguish between relative
  259. # and absolute redirects. So, to compensate for this, we'll add a leading
  260. # slash here as well.
  261. uri = '/' + uri
  262. # Check to make sure we actually got an iterable list out of the YAML file
  263. if data is None:
  264. logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
  265. 'YAML.')
  266. elif 'redirects' not in data:
  267. logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
  268. 'properly formatted -- no \'redirects:\' header.')
  269. elif hasattr(data['redirects'], '__iter__'):
  270. # Iterate through redirect data, try to find a redirect that matches.
  271. for redirect in data['redirects']:
  272. # Note: re.search adds an implied '^' to the beginning of the regex
  273. # This means that the regex must match from the beginning of the
  274. # string.
  275. try:
  276. if re.match(redirect[self.REDIRECT_SRC], uri):
  277. # Match found. Apply redirect rule.
  278. redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
  279. redirect[self.REDIRECT_DST], uri)
  280. logging.info('Redirect rule matched.\n'
  281. 'Rule: %s\n'
  282. 'Src: %s\n'
  283. 'Dst: %s',
  284. redirect[self.REDIRECT_SRC], uri, redirect_uri)
  285. if self.REDIRECT_TYPE in redirect:
  286. redirect_type = redirect[self.REDIRECT_TYPE]
  287. else:
  288. # Default redirect type, if unspecified
  289. redirect_type = self.REDIRECT_TYPE_PERM
  290. return (redirect_uri, redirect_type)
  291. except:
  292. e = sys.exc_info()[1]
  293. raise ('Error while processing redirect rule.\n'
  294. 'Rule: %s\n'
  295. 'Error: %s' % (redirect[self.REDIRECT_SRC], e))
  296. # No redirect found, return URL unchanged
  297. return (uri, None)
  298. def isCleanUrl(self, name, langName, isValidIntl, isStripped):
  299. """Determine whether to pass an incoming url straight to processing.
  300. Args:
  301. name: The incoming URL
  302. Returns:
  303. boolean: Whether the URL should be sent straight to processing
  304. """
  305. # logging.info(' >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
  306. if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
  307. return True
  308. def PreprocessUrl(self, name, langName):
  309. """Any preprocessing work on the URL when it comes in.
  310. Put any work related to interpreting the incoming URL here. For example,
  311. this is used to redirect requests for a directory to the index.html file
  312. in that directory. Subclasses should override this method to do different
  313. preprocessing.
  314. Args:
  315. name: The incoming URL
  316. Returns:
  317. True if the request was redirected to '/index.html'.
  318. Otherewise False.
  319. """
  320. # determine if this is a request for a directory
  321. final_path_segment = name
  322. final_slash_offset = name.rfind('/')
  323. if final_slash_offset != len(name) - 1:
  324. final_path_segment = name[final_slash_offset + 1:]
  325. if final_path_segment.find('.') == -1:
  326. name = ''.join([name, '/'])
  327. # if this is a directory or the domain itself, redirect to /index.html
  328. if not name or (name[len(name) - 1:] == '/'):
  329. uri = ''.join(['/', name, 'index.html'])
  330. # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
  331. self.redirect(uri, False)
  332. return True
  333. else:
  334. return False
  335. def RedirToIntl(self, name, intlString, langName):
  336. """Redirect an incoming request to the appropriate intl uri.
  337. For non-en langName, builds the intl/lang string from a
  338. base (en) string and redirects (302) the request to look for
  339. a version of the file in langName. For en langName, simply
  340. redirects a stripped uri string (intl/nn removed).
  341. Args:
  342. name: The incoming, preprocessed URL
  343. Returns:
  344. The lang-specific URL
  345. """
  346. if not (langName == 'en'):
  347. builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
  348. else:
  349. builtIntlLangUri = name
  350. uri = ''.join(['/', builtIntlLangUri])
  351. logging.info('-->REDIRECTING %s to %s', name, uri)
  352. self.redirect(uri, False)
  353. return uri
  354. def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
  355. """Process the url and form a response, if appropriate.
  356. Attempts to retrieve the requested file (name) from cache,
  357. negative cache, or store (zip) and form the response.
  358. For intl requests that are not found (in the localized tree),
  359. returns False rather than forming a response, so that
  360. the request can be retried with the base url (this is the
  361. fallthrough to default language).
  362. For requests that are found, forms the headers and
  363. adds the content to the response entity. If the request was
  364. for an intl (localized) url, also resets the language cookie
  365. to the language specified in the url if needed, to ensure that
  366. the client language and response data remain harmonious.
  367. Args:
  368. name: The incoming, preprocessed URL
  369. langName: The language id. Used as necessary to reset the
  370. language cookie in the response.
  371. isValidIntl: If present, indicates whether the request is
  372. for a language-specific url
  373. resetLangCookie: Whether the response should reset the
  374. language cookie to 'langName'
  375. Returns:
  376. True: A response was successfully created for the request
  377. False: No response was created.
  378. """
  379. # see if we have the page in the memcache
  380. logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
  381. name, langName, isValidIntl, resetLangCookie)
  382. resp_data = self.GetFromCache(name)
  383. if resp_data is None:
  384. logging.info(' Cache miss for %s', name)
  385. resp_data = self.GetFromNegativeCache(name)
  386. if resp_data is None:
  387. resp_data = self.GetFromStore(name)
  388. # IF we have the file, put it in the memcache
  389. # ELSE put it in the negative cache
  390. if resp_data is not None:
  391. self.StoreOrUpdateInCache(name, resp_data)
  392. elif isValidIntl:
  393. # couldn't find the intl doc. Try to fall through to English.
  394. #logging.info(' Retrying with base uri...')
  395. return False
  396. else:
  397. logging.info(' Adding %s to negative cache, serving 404', name)
  398. self.StoreInNegativeCache(name)
  399. self.Write404Error()
  400. return True
  401. else:
  402. # found it in negative cache
  403. self.Write404Error()
  404. return True
  405. # found content from cache or store
  406. logging.info('FOUND CLEAN')
  407. if resetLangCookie:
  408. logging.info(' Resetting android_developer_pref_lang cookie to [%s]',
  409. langName)
  410. expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
  411. self.response.headers.add_header('Set-Cookie',
  412. 'android_developer_pref_lang=%s; path=/; expires=%s' %
  413. (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
  414. mustRevalidate = False
  415. if ('.html' in name):
  416. # revalidate html files -- workaround for cache inconsistencies for
  417. # negotiated responses
  418. mustRevalidate = True
  419. #logging.info(' Adding [Vary: Cookie] to response...')
  420. self.response.headers.add_header('Vary', 'Cookie')
  421. content_type, encoding = mimetypes.guess_type(name)
  422. if content_type:
  423. self.response.headers['Content-Type'] = content_type
  424. self.SetCachingHeaders(mustRevalidate)
  425. self.response.out.write(resp_data)
  426. elif (name == 'favicon.ico'):
  427. self.response.headers['Content-Type'] = 'image/x-icon'
  428. self.SetCachingHeaders(mustRevalidate)
  429. self.response.out.write(resp_data)
  430. elif name.endswith('.psd'):
  431. self.response.headers['Content-Type'] = 'application/octet-stream'
  432. self.SetCachingHeaders(mustRevalidate)
  433. self.response.out.write(resp_data)
  434. elif name.endswith('.svg'):
  435. self.response.headers['Content-Type'] = 'image/svg+xml'
  436. self.SetCachingHeaders(mustRevalidate)
  437. self.response.out.write(resp_data)
  438. elif name.endswith('.mp4'):
  439. self.response.headers['Content-Type'] = 'video/mp4'
  440. self.SetCachingHeaders(mustRevalidate)
  441. self.response.out.write(resp_data)
  442. elif name.endswith('.webm'):
  443. self.response.headers['Content-Type'] = 'video/webm'
  444. self.SetCachingHeaders(mustRevalidate)
  445. self.response.out.write(resp_data)
  446. elif name.endswith('.ogv'):
  447. self.response.headers['Content-Type'] = 'video/ogg'
  448. self.SetCachingHeaders(mustRevalidate)
  449. self.response.out.write(resp_data)
  450. return True
  451. def GetFromStore(self, file_path):
  452. """Retrieve file from zip files.
  453. Get the file from the source, it must not have been in the memcache. If
  454. possible, we'll use the zip file index to quickly locate where the file
  455. should be found. (See MapToFileArchive documentation for assumptions about
  456. file ordering.) If we don't have an index or don't find the file where the
  457. index says we should, look through all the zip files to find it.
  458. Args:
  459. file_path: the file that we're looking for
  460. Returns:
  461. The contents of the requested file
  462. """
  463. resp_data = None
  464. file_itr = iter(self.zipfilenames)
  465. # decode any escape characters in the URI
  466. # Note: We are currenty just looking for '@' (%40)
  467. file_path = file_path.replace('%40', '@')
  468. # check the index, if we have one, to see what archive the file is in
  469. archive_name = self.MapFileToArchive(file_path)
  470. if not archive_name:
  471. archive_name = file_itr.next()[0]
  472. while resp_data is None and archive_name:
  473. zip_archive = self.LoadZipFile(archive_name)
  474. if zip_archive:
  475. # we expect some lookups will fail, and that's okay, 404s will deal
  476. # with that
  477. try:
  478. resp_data = zip_archive.read(file_path)
  479. except (KeyError, RuntimeError), err:
  480. # no op
  481. x = False
  482. if resp_data is not None:
  483. logging.info('%s read from %s', file_path, archive_name)
  484. try:
  485. archive_name = file_itr.next()[0]
  486. except (StopIteration), err:
  487. archive_name = False
  488. return resp_data
  489. def LoadZipFile(self, zipfilename):
  490. """Convenience method to load zip file.
  491. Just a convenience method to load the zip file from the data store. This is
  492. useful if we ever want to change data stores and also as a means of
  493. dependency injection for testing. This method will look at our file cache
  494. first, and then load and cache the file if there's a cache miss
  495. Args:
  496. zipfilename: the name of the zip file to load
  497. Returns:
  498. The zip file requested, or None if there is an I/O error
  499. """
  500. zip_archive = None
  501. zip_archive = self.zipfile_cache.get(zipfilename)
  502. if zip_archive is None:
  503. try:
  504. zip_archive = zipfile.ZipFile(zipfilename)
  505. self.zipfile_cache[zipfilename] = zip_archive
  506. except (IOError, RuntimeError), err:
  507. logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
  508. err))
  509. return zip_archive
  510. def MapFileToArchive(self, file_path):
  511. """Given a file name, determine what archive it should be in.
  512. This method makes two critical assumptions.
  513. (1) The zip files passed as an argument to the handler, if concatenated
  514. in that same order, would result in a total ordering
  515. of all the files. See (2) for ordering type.
  516. (2) Upper case letters before lower case letters. The traversal of a
  517. directory tree is depth first. A parent directory's files are added
  518. before the files of any child directories
  519. Args:
  520. file_path: the file to be mapped to an archive
  521. Returns:
  522. The name of the archive where we expect the file to be
  523. """
  524. num_archives = len(self.zipfilenames)
  525. while num_archives > 0:
  526. target = self.zipfilenames[num_archives - 1]
  527. if len(target) > 1:
  528. if self.CompareFilenames(target[1], file_path) >= 0:
  529. return target[0]
  530. num_archives -= 1
  531. return None
  532. def CompareFilenames(self, file1, file2):
  533. """Determines whether file1 is lexigraphically 'before' file2.
  534. WARNING: This method assumes that paths are output in a depth-first,
  535. with parent directories' files stored before childs'
  536. We say that file1 is lexigraphically before file2 if the last non-matching
  537. path segment of file1 is alphabetically before file2.
  538. Args:
  539. file1: the first file path
  540. file2: the second file path
  541. Returns:
  542. A positive number if file1 is before file2
  543. A negative number if file2 is before file1
  544. 0 if filenames are the same
  545. """
  546. f1_segments = file1.split('/')
  547. f2_segments = file2.split('/')
  548. segment_ptr = 0
  549. while (segment_ptr < len(f1_segments) and
  550. segment_ptr < len(f2_segments) and
  551. f1_segments[segment_ptr] == f2_segments[segment_ptr]):
  552. segment_ptr += 1
  553. if len(f1_segments) == len(f2_segments):
  554. # we fell off the end, the paths much be the same
  555. if segment_ptr == len(f1_segments):
  556. return 0
  557. # we didn't fall of the end, compare the segments where they differ
  558. if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
  559. return 1
  560. elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
  561. return -1
  562. else:
  563. return 0
  564. # the number of segments differs, we either mismatched comparing
  565. # directories, or comparing a file to a directory
  566. else:
  567. # IF we were looking at the last segment of one of the paths,
  568. # the one with fewer segments is first because files come before
  569. # directories
  570. # ELSE we just need to compare directory names
  571. if (segment_ptr + 1 == len(f1_segments) or
  572. segment_ptr + 1 == len(f2_segments)):
  573. return len(f2_segments) - len(f1_segments)
  574. else:
  575. if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
  576. return 1
  577. elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
  578. return -1
  579. else:
  580. return 0
  581. def SetCachingHeaders(self, revalidate):
  582. """Set caching headers for the request."""
  583. max_age = self.MAX_AGE
  584. #self.response.headers['Expires'] = email.Utils.formatdate(
  585. # time.time() + max_age, usegmt=True)
  586. cache_control = []
  587. if self.PUBLIC:
  588. cache_control.append('public')
  589. cache_control.append('max-age=%d' % max_age)
  590. if revalidate:
  591. cache_control.append('must-revalidate')
  592. self.response.headers['Cache-Control'] = ', '.join(cache_control)
  593. def GetFromCache(self, filename):
  594. """Get file from memcache, if available.
  595. Args:
  596. filename: The URL of the file to return
  597. Returns:
  598. The content of the file
  599. """
  600. return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))
  601. def StoreOrUpdateInCache(self, filename, data):
  602. """Store data in the cache.
  603. Store a piece of data in the memcache. Memcache has a maximum item size of
  604. 1*10^6 bytes. If the data is too large, fail, but log the failure. Future
  605. work will consider compressing the data before storing or chunking it
  606. Args:
  607. filename: the name of the file to store
  608. data: the data of the file
  609. Returns:
  610. None
  611. """
  612. try:
  613. if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
  614. memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
  615. except (ValueError), err:
  616. logging.warning('Data size too large to cache\n%s' % err)
  617. def Write404Error(self):
  618. """Ouptut a simple 404 response."""
  619. self.error(404)
  620. self.response.out.write(
  621. ''.join(['<html><head><title>404: Not Found</title></head>',
  622. '<body><b><h2>Error 404</h2><br/>',
  623. 'File not found</b></body></html>']))
  624. def StoreInNegativeCache(self, filename):
  625. """If a non-existant URL is accessed, cache this result as well.
  626. Future work should consider setting a maximum negative cache size to
  627. prevent it from from negatively impacting the real cache.
  628. Args:
  629. filename: URL to add ot negative cache
  630. Returns:
  631. None
  632. """
  633. memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)
  634. def GetFromNegativeCache(self, filename):
  635. """Retrieve from negative cache.
  636. Args:
  637. filename: URL to retreive
  638. Returns:
  639. The file contents if present in the negative cache.
  640. """
  641. return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))
  642. def main():
  643. application = webapp.WSGIApplication([('/([^/]+)/(.*)',
  644. MemcachedZipHandler)])
  645. util.run_wsgi_app(application)
  646. if __name__ == '__main__':
  647. main()