PageRenderTime 46ms CodeModel.GetById 22ms app.highlight 19ms RepoModel.GetById 1ms app.codeStats 0ms

/circuits/web/tools.py

https://bitbucket.org/prologic/circuits/
Python | 455 lines | 326 code | 43 blank | 86 comment | 70 complexity | 0cfb7e6c0b01781249d36c2caa705937 MD5 | raw file
  1# Module:   tools
  2# Date:     16th February 2009
  3# Author:   James Mills, prologic at shortcircuit dot net dot au
  4
  5"""Tools
  6
  7This module implements tools used throughout circuits.web.
  8These tools can also be used within Controlelrs and request handlers.
  9"""
 10
 11import os
 12import stat
 13import hashlib
 14import mimetypes
 15import collections
 16from time import mktime
 17from email.utils import formatdate
 18from datetime import datetime, timedelta
 19from email.generator import _make_boundary
 20
 21mimetypes.init()
 22mimetypes.add_type("image/x-dwg", ".dwg")
 23mimetypes.add_type("image/x-icon", ".ico")
 24mimetypes.add_type("text/javascript", ".js")
 25mimetypes.add_type("application/xhtml+xml", ".xhtml")
 26
 27from . import _httpauth
 28from .utils import get_ranges, compress
 29from .errors import httperror, notfound, redirect, unauthorized
 30
 31
 32def expires(request, response, secs=0, force=False):
 33    """Tool for influencing cache mechanisms using the 'Expires' header.
 34
 35    'secs' must be either an int or a datetime.timedelta, and indicates the
 36    number of seconds between response.time and when the response should
 37    expire. The 'Expires' header will be set to (response.time + secs).
 38
 39    If 'secs' is zero, the 'Expires' header is set one year in the past, and
 40    the following "cache prevention" headers are also set:
 41    - 'Pragma': 'no-cache'
 42    - 'Cache-Control': 'no-cache, must-revalidate'
 43
 44    If 'force' is False (the default), the following headers are checked:
 45    'Etag', 'Last-Modified', 'Age', 'Expires'. If any are already present,
 46    none of the above response headers are set.
 47    """
 48
 49    headers = response.headers
 50
 51    cacheable = False
 52    if not force:
 53        # some header names that indicate that the response can be cached
 54        for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'):
 55            if indicator in headers:
 56                cacheable = True
 57                break
 58
 59    if not cacheable:
 60        if isinstance(secs, timedelta):
 61            secs = (86400 * secs.days) + secs.seconds
 62
 63        if secs == 0:
 64            if force or "Pragma" not in headers:
 65                headers["Pragma"] = "no-cache"
 66            if request.protocol >= (1, 1):
 67                if force or "Cache-Control" not in headers:
 68                    headers["Cache-Control"] = "no-cache, must-revalidate"
 69            # Set an explicit Expires date in the past.
 70            now = datetime.now()
 71            lastyear = now.replace(year=now.year - 1)
 72            expiry = formatdate(
 73                mktime(lastyear.timetuple()), usegmt=True
 74            )
 75        else:
 76            expiry = formatdate(response.time + secs, usegmt=True)
 77        if force or "Expires" not in headers:
 78            headers["Expires"] = expiry
 79
 80
 81def serve_file(request, response, path, type=None, disposition=None,
 82               name=None):
 83    """Set status, headers, and body in order to serve the given file.
 84
 85    The Content-Type header will be set to the type arg, if provided.
 86    If not provided, the Content-Type will be guessed by the file extension
 87    of the 'path' argument.
 88
 89    If disposition is not None, the Content-Disposition header will be set
 90    to "<disposition>; filename=<name>". If name is None, it will be set
 91    to the basename of path. If disposition is None, no Content-Disposition
 92    header will be written.
 93    """
 94
 95    if not os.path.isabs(path):
 96        raise ValueError("'%s' is not an absolute path." % path)
 97
 98    try:
 99        st = os.stat(path)
100    except OSError:
101        return notfound(request, response)
102
103    # Check if path is a directory.
104    if stat.S_ISDIR(st.st_mode):
105        # Let the caller deal with it as they like.
106        return notfound(request, response)
107
108    # Set the Last-Modified response header, so that
109    # modified-since validation code can work.
110    response.headers['Last-Modified'] = formatdate(
111        st.st_mtime, usegmt=True
112    )
113
114    result = validate_since(request, response)
115    if result is not None:
116        return result
117
118    if type is None:
119        # Set content-type based on filename extension
120        ext = ""
121        i = path.rfind('.')
122        if i != -1:
123            ext = path[i:].lower()
124        type = mimetypes.types_map.get(ext, "text/plain")
125    response.headers['Content-Type'] = type
126
127    if disposition is not None:
128        if name is None:
129            name = os.path.basename(path)
130        cd = '%s; filename="%s"' % (disposition, name)
131        response.headers["Content-Disposition"] = cd
132
133    # Set Content-Length and use an iterable (file object)
134    #   this way CP won't load the whole file in memory
135    c_len = st.st_size
136    bodyfile = open(path, 'rb')
137
138    # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code
139    if request.protocol >= (1, 1):
140        response.headers["Accept-Ranges"] = "bytes"
141        r = get_ranges(request.headers.get('Range'), c_len)
142        if r == []:
143            response.headers['Content-Range'] = "bytes */%s" % c_len
144            return httperror(request, response, 416)
145        if r:
146            if len(r) == 1:
147                # Return a single-part response.
148                start, stop = r[0]
149                r_len = stop - start
150                response.status = 206
151                response.headers['Content-Range'] = (
152                    "bytes %s-%s/%s" % (start, stop - 1, c_len)
153                )
154                response.headers['Content-Length'] = r_len
155                bodyfile.seek(start)
156                response.body = bodyfile.read(r_len)
157            else:
158                # Return a multipart/byteranges response.
159                response.status = 206
160                boundary = _make_boundary()
161                ct = "multipart/byteranges; boundary=%s" % boundary
162                response.headers['Content-Type'] = ct
163                if "Content-Length" in response.headers:
164                    # Delete Content-Length header so finalize() recalcs it.
165                    del response.headers["Content-Length"]
166
167                def file_ranges():
168                    # Apache compatibility:
169                    yield "\r\n"
170
171                    for start, stop in r:
172                        yield "--" + boundary
173                        yield "\r\nContent-type: %s" % type
174                        yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n"
175                               % (start, stop - 1, c_len))
176                        bodyfile.seek(start)
177                        yield bodyfile.read(stop - start)
178                        yield "\r\n"
179                    # Final boundary
180                    yield "--" + boundary + "--"
181
182                    # Apache compatibility:
183                    yield "\r\n"
184                response.body = file_ranges()
185        else:
186            response.headers['Content-Length'] = c_len
187            response.body = bodyfile
188    else:
189        response.headers['Content-Length'] = c_len
190        response.body = bodyfile
191
192    return response
193
194
195def serve_download(request, response, path, name=None):
196    """Serve 'path' as an application/x-download attachment."""
197
198    type = "application/x-download"
199    disposition = "attachment"
200
201    return serve_file(request, response, path, type, disposition, name)
202
203
204def validate_etags(request, response, autotags=False):
205    """Validate the current ETag against If-Match, If-None-Match headers.
206
207    If autotags is True, an ETag response-header value will be provided
208    from an MD5 hash of the response body (unless some other code has
209    already provided an ETag header). If False (the default), the ETag
210    will not be automatic.
211
212    WARNING: the autotags feature is not designed for URL's which allow
213    methods other than GET. For example, if a POST to the same URL returns
214    no content, the automatic ETag will be incorrect, breaking a fundamental
215    use for entity tags in a possibly destructive fashion. Likewise, if you
216    raise 304 Not Modified, the response body will be empty, the ETag hash
217    will be incorrect, and your application will break.
218    See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24
219    """
220
221    # Guard against being run twice.
222    if hasattr(response, "ETag"):
223        return
224
225    status = response.status
226
227    etag = response.headers.get('ETag')
228
229    # Automatic ETag generation. See warning in docstring.
230    if (not etag) and autotags:
231        if status == 200:
232            etag = response.collapse_body()
233            etag = '"%s"' % hashlib.md5.new(etag).hexdigest()
234            response.headers['ETag'] = etag
235
236    response.ETag = etag
237
238    # "If the request would, without the If-Match header field, result in
239    # anything other than a 2xx or 412 status, then the If-Match header
240    # MUST be ignored."
241    if status >= 200 and status <= 299:
242        conditions = request.headers.elements('If-Match') or []
243        conditions = [str(x) for x in conditions]
244        if conditions and not (conditions == ["*"] or etag in conditions):
245            return httperror(
246                request, response, 412,
247                description="If-Match failed: ETag %r did not match %r" % (
248                    etag, conditions
249                )
250            )
251
252        conditions = request.headers.elements('If-None-Match') or []
253        conditions = [str(x) for x in conditions]
254        if conditions == ["*"] or etag in conditions:
255            if request.method in ("GET", "HEAD"):
256                return redirect(request, response, [], code=304)
257            else:
258                return httperror(
259                    request, response, 412,
260                    description=(
261                        "If-None-Match failed: ETag %r matched %r" % (
262                            etag, conditions
263                        )
264                    )
265                )
266
267
268def validate_since(request, response):
269    """Validate the current Last-Modified against If-Modified-Since headers.
270
271    If no code has set the Last-Modified response header, then no validation
272    will be performed.
273    """
274
275    lastmod = response.headers.get('Last-Modified')
276    if lastmod:
277        status = response.status
278
279        since = request.headers.get('If-Unmodified-Since')
280        if since and since != lastmod:
281            if (status >= 200 and status <= 299) or status == 412:
282                return httperror(request, response, 412)
283
284        since = request.headers.get('If-Modified-Since')
285        if since and since == lastmod:
286            if (status >= 200 and status <= 299) or status == 304:
287                if request.method in ("GET", "HEAD"):
288                    return redirect(request, response, [], code=304)
289                else:
290                    return httperror(request, response, 412)
291
292
293def check_auth(request, response, realm, users, encrypt=None):
294    """Check Authentication
295
296    If an Authorization header contains credentials, return True, else False.
297
298    :param realm: The authentication realm.
299    :type  realm: str
300
301    :param users: A dict of the form: {username: password} or a callable
302                  returning a dict.
303    :type  users: dict or callable
304
305    :param encrypt: Callable used to encrypt the password returned from
306                    the user-agent. if None it defaults to a md5 encryption.
307    :type  encrypt: callable
308    """
309
310    if "Authorization" in request.headers:
311        # make sure the provided credentials are correctly set
312        ah = _httpauth.parseAuthorization(request.headers.get("Authorization"))
313        if ah is None:
314            return httperror(request, response, 400)
315
316        if not encrypt:
317            encrypt = _httpauth.DIGEST_AUTH_ENCODERS[_httpauth.MD5]
318
319        if isinstance(users, collections.Callable):
320            try:
321                # backward compatibility
322                users = users()  # expect it to return a dictionary
323
324                if not isinstance(users, dict):
325                    raise ValueError("Authentication users must be a dict")
326
327                # fetch the user password
328                password = users.get(ah["username"], None)
329            except TypeError:
330                # returns a password (encrypted or clear text)
331                password = users(ah["username"])
332        else:
333            if not isinstance(users, dict):
334                raise ValueError("Authentication users must be a dict")
335
336            # fetch the user password
337            password = users.get(ah["username"], None)
338
339        # validate the Authorization by re-computing it here
340        # and compare it with what the user-agent provided
341        if _httpauth.checkResponse(ah, password, method=request.method,
342                                   encrypt=encrypt, realm=realm):
343            request.login = ah["username"]
344            return True
345
346        request.login = False
347    return False
348
349
350def basic_auth(request, response, realm, users, encrypt=None):
351    """Perform Basic Authentication
352
353    If auth fails, returns an Unauthorized error  with a
354    basic authentication header.
355
356    :param realm: The authentication realm.
357    :type  realm: str
358
359    :param users: A dict of the form: {username: password} or a callable
360                  returning a dict.
361    :type  users: dict or callable
362
363    :param encrypt: Callable used to encrypt the password returned from
364                    the user-agent. if None it defaults to a md5 encryption.
365    :type  encrypt: callable
366    """
367
368    if check_auth(request, response, realm, users, encrypt):
369        return
370
371    # inform the user-agent this path is protected
372    response.headers["WWW-Authenticate"] = _httpauth.basicAuth(realm)
373
374    return unauthorized(request, response)
375
376
377def digest_auth(request, response, realm, users):
378    """Perform Digest Authentication
379
380    If auth fails, raise 401 with a digest authentication header.
381
382    :param realm: The authentication realm.
383    :type  realm: str
384
385    :param users: A dict of the form: {username: password} or a callable
386                  returning a dict.
387    :type  users: dict or callable
388    """
389
390    if check_auth(request, response, realm, users):
391        return
392
393    # inform the user-agent this path is protected
394    response.headers["WWW-Authenticate"] = _httpauth.digestAuth(realm)
395
396    return unauthorized(request, response)
397
398
399def gzip(response, level=4, mime_types=['text/html', 'text/plain']):
400    """Try to gzip the response body if Content-Type in mime_types.
401
402    response.headers['Content-Type'] must be set to one of the
403    values in the mime_types arg before calling this function.
404
405    No compression is performed if any of the following hold:
406        * The client sends no Accept-Encoding request header
407        * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
408        * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
409        * The 'identity' value is given with a qvalue > 0.
410    """
411
412    if not response.body:
413        # Response body is empty (might be a 304 for instance)
414        return response
415
416    # If returning cached content (which should already have been gzipped),
417    # don't re-zip.
418    if getattr(response.request, "cached", False):
419        return response
420
421    acceptable = response.request.headers.elements('Accept-Encoding')
422    if not acceptable:
423        # If no Accept-Encoding field is present in a request,
424        # the server MAY assume that the client will accept any
425        # content coding. In this case, if "identity" is one of
426        # the available content-codings, then the server SHOULD use
427        # the "identity" content-coding, unless it has additional
428        # information that a different content-coding is meaningful
429        # to the client.
430        return response
431
432    ct = response.headers.get('Content-Type', 'text/html').split(';')[0]
433    for coding in acceptable:
434        if coding.value == 'identity' and coding.qvalue != 0:
435            return response
436        if coding.value in ('gzip', 'x-gzip'):
437            if coding.qvalue == 0:
438                return response
439            if ct in mime_types:
440                # Return a generator that compresses the page
441                varies = response.headers.get("Vary", "")
442                varies = [x.strip() for x in varies.split(",") if x.strip()]
443                if "Accept-Encoding" not in varies:
444                    varies.append("Accept-Encoding")
445                response.headers['Vary'] = ", ".join(varies)
446
447                response.headers['Content-Encoding'] = 'gzip'
448                response.body = compress(response.body, level)
449                if "Content-Length" in response.headers:
450                    # Delete Content-Length header so finalize() recalcs it.
451                    del response.headers["Content-Length"]
452            return response
453    return httperror(
454        response.request, response, 406, description="identity, gzip"
455    )