/circuits/web/tools.py

https://bitbucket.org/prologic/circuits/ · Python · 455 lines · 324 code · 44 blank · 87 comment · 63 complexity · 0cfb7e6c0b01781249d36c2caa705937 MD5 · raw file

  1. # Module: tools
  2. # Date: 16th February 2009
  3. # Author: James Mills, prologic at shortcircuit dot net dot au
  4. """Tools
  5. This module implements tools used throughout circuits.web.
  6. These tools can also be used within Controlelrs and request handlers.
  7. """
  8. import os
  9. import stat
  10. import hashlib
  11. import mimetypes
  12. import collections
  13. from time import mktime
  14. from email.utils import formatdate
  15. from datetime import datetime, timedelta
  16. from email.generator import _make_boundary
  17. mimetypes.init()
  18. mimetypes.add_type("image/x-dwg", ".dwg")
  19. mimetypes.add_type("image/x-icon", ".ico")
  20. mimetypes.add_type("text/javascript", ".js")
  21. mimetypes.add_type("application/xhtml+xml", ".xhtml")
  22. from . import _httpauth
  23. from .utils import get_ranges, compress
  24. from .errors import httperror, notfound, redirect, unauthorized
  25. def expires(request, response, secs=0, force=False):
  26. """Tool for influencing cache mechanisms using the 'Expires' header.
  27. 'secs' must be either an int or a datetime.timedelta, and indicates the
  28. number of seconds between response.time and when the response should
  29. expire. The 'Expires' header will be set to (response.time + secs).
  30. If 'secs' is zero, the 'Expires' header is set one year in the past, and
  31. the following "cache prevention" headers are also set:
  32. - 'Pragma': 'no-cache'
  33. - 'Cache-Control': 'no-cache, must-revalidate'
  34. If 'force' is False (the default), the following headers are checked:
  35. 'Etag', 'Last-Modified', 'Age', 'Expires'. If any are already present,
  36. none of the above response headers are set.
  37. """
  38. headers = response.headers
  39. cacheable = False
  40. if not force:
  41. # some header names that indicate that the response can be cached
  42. for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'):
  43. if indicator in headers:
  44. cacheable = True
  45. break
  46. if not cacheable:
  47. if isinstance(secs, timedelta):
  48. secs = (86400 * secs.days) + secs.seconds
  49. if secs == 0:
  50. if force or "Pragma" not in headers:
  51. headers["Pragma"] = "no-cache"
  52. if request.protocol >= (1, 1):
  53. if force or "Cache-Control" not in headers:
  54. headers["Cache-Control"] = "no-cache, must-revalidate"
  55. # Set an explicit Expires date in the past.
  56. now = datetime.now()
  57. lastyear = now.replace(year=now.year - 1)
  58. expiry = formatdate(
  59. mktime(lastyear.timetuple()), usegmt=True
  60. )
  61. else:
  62. expiry = formatdate(response.time + secs, usegmt=True)
  63. if force or "Expires" not in headers:
  64. headers["Expires"] = expiry
  65. def serve_file(request, response, path, type=None, disposition=None,
  66. name=None):
  67. """Set status, headers, and body in order to serve the given file.
  68. The Content-Type header will be set to the type arg, if provided.
  69. If not provided, the Content-Type will be guessed by the file extension
  70. of the 'path' argument.
  71. If disposition is not None, the Content-Disposition header will be set
  72. to "<disposition>; filename=<name>". If name is None, it will be set
  73. to the basename of path. If disposition is None, no Content-Disposition
  74. header will be written.
  75. """
  76. if not os.path.isabs(path):
  77. raise ValueError("'%s' is not an absolute path." % path)
  78. try:
  79. st = os.stat(path)
  80. except OSError:
  81. return notfound(request, response)
  82. # Check if path is a directory.
  83. if stat.S_ISDIR(st.st_mode):
  84. # Let the caller deal with it as they like.
  85. return notfound(request, response)
  86. # Set the Last-Modified response header, so that
  87. # modified-since validation code can work.
  88. response.headers['Last-Modified'] = formatdate(
  89. st.st_mtime, usegmt=True
  90. )
  91. result = validate_since(request, response)
  92. if result is not None:
  93. return result
  94. if type is None:
  95. # Set content-type based on filename extension
  96. ext = ""
  97. i = path.rfind('.')
  98. if i != -1:
  99. ext = path[i:].lower()
  100. type = mimetypes.types_map.get(ext, "text/plain")
  101. response.headers['Content-Type'] = type
  102. if disposition is not None:
  103. if name is None:
  104. name = os.path.basename(path)
  105. cd = '%s; filename="%s"' % (disposition, name)
  106. response.headers["Content-Disposition"] = cd
  107. # Set Content-Length and use an iterable (file object)
  108. # this way CP won't load the whole file in memory
  109. c_len = st.st_size
  110. bodyfile = open(path, 'rb')
  111. # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code
  112. if request.protocol >= (1, 1):
  113. response.headers["Accept-Ranges"] = "bytes"
  114. r = get_ranges(request.headers.get('Range'), c_len)
  115. if r == []:
  116. response.headers['Content-Range'] = "bytes */%s" % c_len
  117. return httperror(request, response, 416)
  118. if r:
  119. if len(r) == 1:
  120. # Return a single-part response.
  121. start, stop = r[0]
  122. r_len = stop - start
  123. response.status = 206
  124. response.headers['Content-Range'] = (
  125. "bytes %s-%s/%s" % (start, stop - 1, c_len)
  126. )
  127. response.headers['Content-Length'] = r_len
  128. bodyfile.seek(start)
  129. response.body = bodyfile.read(r_len)
  130. else:
  131. # Return a multipart/byteranges response.
  132. response.status = 206
  133. boundary = _make_boundary()
  134. ct = "multipart/byteranges; boundary=%s" % boundary
  135. response.headers['Content-Type'] = ct
  136. if "Content-Length" in response.headers:
  137. # Delete Content-Length header so finalize() recalcs it.
  138. del response.headers["Content-Length"]
  139. def file_ranges():
  140. # Apache compatibility:
  141. yield "\r\n"
  142. for start, stop in r:
  143. yield "--" + boundary
  144. yield "\r\nContent-type: %s" % type
  145. yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n"
  146. % (start, stop - 1, c_len))
  147. bodyfile.seek(start)
  148. yield bodyfile.read(stop - start)
  149. yield "\r\n"
  150. # Final boundary
  151. yield "--" + boundary + "--"
  152. # Apache compatibility:
  153. yield "\r\n"
  154. response.body = file_ranges()
  155. else:
  156. response.headers['Content-Length'] = c_len
  157. response.body = bodyfile
  158. else:
  159. response.headers['Content-Length'] = c_len
  160. response.body = bodyfile
  161. return response
  162. def serve_download(request, response, path, name=None):
  163. """Serve 'path' as an application/x-download attachment."""
  164. type = "application/x-download"
  165. disposition = "attachment"
  166. return serve_file(request, response, path, type, disposition, name)
  167. def validate_etags(request, response, autotags=False):
  168. """Validate the current ETag against If-Match, If-None-Match headers.
  169. If autotags is True, an ETag response-header value will be provided
  170. from an MD5 hash of the response body (unless some other code has
  171. already provided an ETag header). If False (the default), the ETag
  172. will not be automatic.
  173. WARNING: the autotags feature is not designed for URL's which allow
  174. methods other than GET. For example, if a POST to the same URL returns
  175. no content, the automatic ETag will be incorrect, breaking a fundamental
  176. use for entity tags in a possibly destructive fashion. Likewise, if you
  177. raise 304 Not Modified, the response body will be empty, the ETag hash
  178. will be incorrect, and your application will break.
  179. See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24
  180. """
  181. # Guard against being run twice.
  182. if hasattr(response, "ETag"):
  183. return
  184. status = response.status
  185. etag = response.headers.get('ETag')
  186. # Automatic ETag generation. See warning in docstring.
  187. if (not etag) and autotags:
  188. if status == 200:
  189. etag = response.collapse_body()
  190. etag = '"%s"' % hashlib.md5.new(etag).hexdigest()
  191. response.headers['ETag'] = etag
  192. response.ETag = etag
  193. # "If the request would, without the If-Match header field, result in
  194. # anything other than a 2xx or 412 status, then the If-Match header
  195. # MUST be ignored."
  196. if status >= 200 and status <= 299:
  197. conditions = request.headers.elements('If-Match') or []
  198. conditions = [str(x) for x in conditions]
  199. if conditions and not (conditions == ["*"] or etag in conditions):
  200. return httperror(
  201. request, response, 412,
  202. description="If-Match failed: ETag %r did not match %r" % (
  203. etag, conditions
  204. )
  205. )
  206. conditions = request.headers.elements('If-None-Match') or []
  207. conditions = [str(x) for x in conditions]
  208. if conditions == ["*"] or etag in conditions:
  209. if request.method in ("GET", "HEAD"):
  210. return redirect(request, response, [], code=304)
  211. else:
  212. return httperror(
  213. request, response, 412,
  214. description=(
  215. "If-None-Match failed: ETag %r matched %r" % (
  216. etag, conditions
  217. )
  218. )
  219. )
  220. def validate_since(request, response):
  221. """Validate the current Last-Modified against If-Modified-Since headers.
  222. If no code has set the Last-Modified response header, then no validation
  223. will be performed.
  224. """
  225. lastmod = response.headers.get('Last-Modified')
  226. if lastmod:
  227. status = response.status
  228. since = request.headers.get('If-Unmodified-Since')
  229. if since and since != lastmod:
  230. if (status >= 200 and status <= 299) or status == 412:
  231. return httperror(request, response, 412)
  232. since = request.headers.get('If-Modified-Since')
  233. if since and since == lastmod:
  234. if (status >= 200 and status <= 299) or status == 304:
  235. if request.method in ("GET", "HEAD"):
  236. return redirect(request, response, [], code=304)
  237. else:
  238. return httperror(request, response, 412)
  239. def check_auth(request, response, realm, users, encrypt=None):
  240. """Check Authentication
  241. If an Authorization header contains credentials, return True, else False.
  242. :param realm: The authentication realm.
  243. :type realm: str
  244. :param users: A dict of the form: {username: password} or a callable
  245. returning a dict.
  246. :type users: dict or callable
  247. :param encrypt: Callable used to encrypt the password returned from
  248. the user-agent. if None it defaults to a md5 encryption.
  249. :type encrypt: callable
  250. """
  251. if "Authorization" in request.headers:
  252. # make sure the provided credentials are correctly set
  253. ah = _httpauth.parseAuthorization(request.headers.get("Authorization"))
  254. if ah is None:
  255. return httperror(request, response, 400)
  256. if not encrypt:
  257. encrypt = _httpauth.DIGEST_AUTH_ENCODERS[_httpauth.MD5]
  258. if isinstance(users, collections.Callable):
  259. try:
  260. # backward compatibility
  261. users = users() # expect it to return a dictionary
  262. if not isinstance(users, dict):
  263. raise ValueError("Authentication users must be a dict")
  264. # fetch the user password
  265. password = users.get(ah["username"], None)
  266. except TypeError:
  267. # returns a password (encrypted or clear text)
  268. password = users(ah["username"])
  269. else:
  270. if not isinstance(users, dict):
  271. raise ValueError("Authentication users must be a dict")
  272. # fetch the user password
  273. password = users.get(ah["username"], None)
  274. # validate the Authorization by re-computing it here
  275. # and compare it with what the user-agent provided
  276. if _httpauth.checkResponse(ah, password, method=request.method,
  277. encrypt=encrypt, realm=realm):
  278. request.login = ah["username"]
  279. return True
  280. request.login = False
  281. return False
  282. def basic_auth(request, response, realm, users, encrypt=None):
  283. """Perform Basic Authentication
  284. If auth fails, returns an Unauthorized error with a
  285. basic authentication header.
  286. :param realm: The authentication realm.
  287. :type realm: str
  288. :param users: A dict of the form: {username: password} or a callable
  289. returning a dict.
  290. :type users: dict or callable
  291. :param encrypt: Callable used to encrypt the password returned from
  292. the user-agent. if None it defaults to a md5 encryption.
  293. :type encrypt: callable
  294. """
  295. if check_auth(request, response, realm, users, encrypt):
  296. return
  297. # inform the user-agent this path is protected
  298. response.headers["WWW-Authenticate"] = _httpauth.basicAuth(realm)
  299. return unauthorized(request, response)
  300. def digest_auth(request, response, realm, users):
  301. """Perform Digest Authentication
  302. If auth fails, raise 401 with a digest authentication header.
  303. :param realm: The authentication realm.
  304. :type realm: str
  305. :param users: A dict of the form: {username: password} or a callable
  306. returning a dict.
  307. :type users: dict or callable
  308. """
  309. if check_auth(request, response, realm, users):
  310. return
  311. # inform the user-agent this path is protected
  312. response.headers["WWW-Authenticate"] = _httpauth.digestAuth(realm)
  313. return unauthorized(request, response)
  314. def gzip(response, level=4, mime_types=['text/html', 'text/plain']):
  315. """Try to gzip the response body if Content-Type in mime_types.
  316. response.headers['Content-Type'] must be set to one of the
  317. values in the mime_types arg before calling this function.
  318. No compression is performed if any of the following hold:
  319. * The client sends no Accept-Encoding request header
  320. * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
  321. * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
  322. * The 'identity' value is given with a qvalue > 0.
  323. """
  324. if not response.body:
  325. # Response body is empty (might be a 304 for instance)
  326. return response
  327. # If returning cached content (which should already have been gzipped),
  328. # don't re-zip.
  329. if getattr(response.request, "cached", False):
  330. return response
  331. acceptable = response.request.headers.elements('Accept-Encoding')
  332. if not acceptable:
  333. # If no Accept-Encoding field is present in a request,
  334. # the server MAY assume that the client will accept any
  335. # content coding. In this case, if "identity" is one of
  336. # the available content-codings, then the server SHOULD use
  337. # the "identity" content-coding, unless it has additional
  338. # information that a different content-coding is meaningful
  339. # to the client.
  340. return response
  341. ct = response.headers.get('Content-Type', 'text/html').split(';')[0]
  342. for coding in acceptable:
  343. if coding.value == 'identity' and coding.qvalue != 0:
  344. return response
  345. if coding.value in ('gzip', 'x-gzip'):
  346. if coding.qvalue == 0:
  347. return response
  348. if ct in mime_types:
  349. # Return a generator that compresses the page
  350. varies = response.headers.get("Vary", "")
  351. varies = [x.strip() for x in varies.split(",") if x.strip()]
  352. if "Accept-Encoding" not in varies:
  353. varies.append("Accept-Encoding")
  354. response.headers['Vary'] = ", ".join(varies)
  355. response.headers['Content-Encoding'] = 'gzip'
  356. response.body = compress(response.body, level)
  357. if "Content-Length" in response.headers:
  358. # Delete Content-Length header so finalize() recalcs it.
  359. del response.headers["Content-Length"]
  360. return response
  361. return httperror(
  362. response.request, response, 406, description="identity, gzip"
  363. )