/httplib2plus/__init__.py
Python | 1685 lines | 1655 code | 7 blank | 23 comment | 17 complexity | 121995b36a4f425e7c3566e66997c677 MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- from __future__ import generators
- """
- httplib2plus
- A caching http interface that supports ETags and gzip
- to conserve bandwidth.
- Requires Python 2.3 or later
- Changelog:
- 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
- """
- __author__ = "Joe Gregorio (joe@bitworking.org)"
- __copyright__ = "Copyright 2006, Joe Gregorio"
- __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
- "James Antill",
- "Xavier Verges Farrero",
- "Jonathan Feinberg",
- "Blair Zajac",
- "Sam Ruby",
- "Louis Nyffenegger"]
- __license__ = "MIT"
- __version__ = "0.8"
- import re
- import sys
- import email
- import email.Utils
- import email.Message
- import email.FeedParser
- import StringIO
- import gzip
- import zlib
- import httplib
- import urlparse
- import urllib
- import base64
- import os
- import copy
- import calendar
- import time
- import random
- import errno
- try:
- from hashlib import sha1 as _sha, md5 as _md5
- except ImportError:
- # prior to Python 2.5, these were separate modules
- import sha
- import md5
- _sha = sha.new
- _md5 = md5.new
- import hmac
- from gettext import gettext as _
- import socket
- try:
- from httplib2 import socks
- except ImportError:
- try:
- import socks
- except (ImportError, AttributeError):
- socks = None
- # Build the appropriate socket wrapper for ssl
- try:
- import ssl # python 2.6
- ssl_SSLError = ssl.SSLError
- def _ssl_wrap_socket(sock, key_file, cert_file,
- disable_validation, ca_certs):
- if disable_validation:
- cert_reqs = ssl.CERT_NONE
- else:
- cert_reqs = ssl.CERT_REQUIRED
- # We should be specifying SSL version 3 or TLS v1, but the ssl module
- # doesn't expose the necessary knobs. So we need to go with the default
- # of SSLv23.
- return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file,
- cert_reqs=cert_reqs, ca_certs=ca_certs)
- except (AttributeError, ImportError):
- ssl_SSLError = None
- def _ssl_wrap_socket(sock, key_file, cert_file,
- disable_validation, ca_certs):
- if not disable_validation:
- raise CertificateValidationUnsupported(
- "SSL certificate validation is not supported without "
- "the ssl module installed. To avoid this error, install "
- "the ssl module, or explicity disable validation.")
- ssl_sock = socket.ssl(sock, key_file, cert_file)
- return httplib.FakeSocket(sock, ssl_sock)
- if sys.version_info >= (2,3):
- from iri2uri import iri2uri
- else:
- def iri2uri(uri):
- return uri
- def has_timeout(timeout): # python 2.6
- if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
- return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
- return (timeout is not None)
- __all__ = [
- 'Http', 'Response', 'ProxyInfo', 'HttpLib2Error', 'RedirectMissingLocation',
- 'RedirectLimit', 'FailedToDecompressContent',
- 'UnimplementedDigestAuthOptionError',
- 'UnimplementedHmacDigestAuthOptionError',
- 'debuglevel', 'ProxiesUnavailableError']
- # The httplib debug level, set to a non-zero value to get debug output
- debuglevel = 0
- # A request will be tried 'RETRIES' times if it fails at the socket/connection level.
- RETRIES = 2
- # Python 2.3 support
- if sys.version_info < (2,4):
- def sorted(seq):
- seq.sort()
- return seq
- # Python 2.3 support
- def HTTPResponse__getheaders(self):
- """Return list of (header, value) tuples."""
- if self.msg is None:
- raise httplib.ResponseNotReady()
- return self.msg.items()
- if not hasattr(httplib.HTTPResponse, 'getheaders'):
- httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
- # All exceptions raised here derive from HttpLib2Error
- class HttpLib2Error(Exception): pass
- # Some exceptions can be caught and optionally
- # be turned back into responses.
- class HttpLib2ErrorWithResponse(HttpLib2Error):
- def __init__(self, desc, response, content):
- self.response = response
- self.content = content
- HttpLib2Error.__init__(self, desc)
- class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
- class RedirectLimit(HttpLib2ErrorWithResponse): pass
- class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
- class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
- class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
- class MalformedHeader(HttpLib2Error): pass
- class RelativeURIError(HttpLib2Error): pass
- class ServerNotFoundError(HttpLib2Error): pass
- class ProxiesUnavailableError(HttpLib2Error): pass
- class CertificateValidationUnsupported(HttpLib2Error): pass
- class SSLHandshakeError(HttpLib2Error): pass
- class NotSupportedOnThisPlatform(HttpLib2Error): pass
- class CertificateHostnameMismatch(SSLHandshakeError):
- def __init__(self, desc, host, cert):
- HttpLib2Error.__init__(self, desc)
- self.host = host
- self.cert = cert
- # Open Items:
- # -----------
- # Proxy support
- # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
- # Pluggable cache storage (supports storing the cache in
- # flat files by default. We need a plug-in architecture
- # that can support Berkeley DB and Squid)
- # == Known Issues ==
- # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
- # Does not handle Cache-Control: max-stale
- # Does not use Age: headers when calculating cache freshness.
- # The number of redirections to follow before giving up.
- # Note that only GET redirects are automatically followed.
- # Will also honor 301 requests by saving that info and never
- # requesting that URI again.
- DEFAULT_MAX_REDIRECTS = 5
- try:
- # Users can optionally provide a module that tells us where the CA_CERTS
- # are located.
- import ca_certs_locater
- CA_CERTS = ca_certs_locater.get()
- except ImportError:
- # Default CA certificates file bundled with httplib2.
- CA_CERTS = os.path.join(
- os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")
- # Which headers are hop-by-hop headers by default
- HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
- def _get_end2end_headers(response):
- hopbyhop = list(HOP_BY_HOP)
- hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
- return [header for header in response.keys() if header not in hopbyhop]
- URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
- def parse_uri(uri):
- """Parses a URI using the regex given in Appendix B of RFC 3986.
- (scheme, authority, path, query, fragment) = parse_uri(uri)
- """
- groups = URI.match(uri).groups()
- return (groups[1], groups[3], groups[4], groups[6], groups[8])
- def urlnorm(uri):
- (scheme, authority, path, query, fragment) = parse_uri(uri)
- if not scheme or not authority:
- raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
- authority = authority.lower()
- scheme = scheme.lower()
- if not path:
- path = "/"
- # Could do syntax based normalization of the URI before
- # computing the digest. See Section 6.2.2 of Std 66.
- request_uri = query and "?".join([path, query]) or path
- scheme = scheme.lower()
- defrag_uri = scheme + "://" + authority + request_uri
- return scheme, authority, request_uri, defrag_uri
- # Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
- re_url_scheme = re.compile(r'^\w+://')
- re_slash = re.compile(r'[?/:|]+')
- def safename(filename):
- """Return a filename suitable for the cache.
- Strips dangerous and common characters to create a filename we
- can use to store the cache in.
- """
- try:
- if re_url_scheme.match(filename):
- if isinstance(filename,str):
- filename = filename.decode('utf-8')
- filename = filename.encode('idna')
- else:
- filename = filename.encode('idna')
- except UnicodeError:
- pass
- if isinstance(filename,unicode):
- filename=filename.encode('utf-8')
- filemd5 = _md5(filename).hexdigest()
- filename = re_url_scheme.sub("", filename)
- filename = re_slash.sub(",", filename)
- # limit length of filename
- if len(filename)>200:
- filename=filename[:200]
- return ",".join((filename, filemd5))
- NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
- def _normalize_headers(headers):
- return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
- def _parse_cache_control(headers):
- retval = {}
- if headers.has_key('cache-control'):
- parts = headers['cache-control'].split(',')
- parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
- parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
- retval = dict(parts_with_args + parts_wo_args)
- return retval
- # Whether to use a strict mode to parse WWW-Authenticate headers
- # Might lead to bad results in case of ill-formed header value,
- # so disabled by default, falling back to relaxed parsing.
- # Set to true to turn on, usefull for testing servers.
- USE_WWW_AUTH_STRICT_PARSING = 0
- # In regex below:
- # [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
- # "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
- # Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
- # \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
- WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
- WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
- UNQUOTE_PAIRS = re.compile(r'\\(.)')
- def _parse_www_authenticate(headers, headername='www-authenticate'):
- """Returns a dictionary of dictionaries, one dict
- per auth_scheme."""
- retval = {}
- if headers.has_key(headername):
- try:
- authenticate = headers[headername].strip()
- www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
- while authenticate:
- # Break off the scheme at the beginning of the line
- if headername == 'authentication-info':
- (auth_scheme, the_rest) = ('digest', authenticate)
- else:
- (auth_scheme, the_rest) = authenticate.split(" ", 1)
- # Now loop over all the key value pairs that come after the scheme,
- # being careful not to roll into the next scheme
- match = www_auth.search(the_rest)
- auth_params = {}
- while match:
- if match and len(match.groups()) == 3:
- (key, value, the_rest) = match.groups()
- auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
- match = www_auth.search(the_rest)
- retval[auth_scheme.lower()] = auth_params
- authenticate = the_rest.strip()
- except ValueError:
- raise MalformedHeader("WWW-Authenticate")
- return retval
- def _entry_disposition(response_headers, request_headers):
- """Determine freshness from the Date, Expires and Cache-Control headers.
- We don't handle the following:
- 1. Cache-Control: max-stale
- 2. Age: headers are not used in the calculations.
- Not that this algorithm is simpler than you might think
- because we are operating as a private (non-shared) cache.
- This lets us ignore 's-maxage'. We can also ignore
- 'proxy-invalidate' since we aren't a proxy.
- We will never return a stale document as
- fresh as a design decision, and thus the non-implementation
- of 'max-stale'. This also lets us safely ignore 'must-revalidate'
- since we operate as if every server has sent 'must-revalidate'.
- Since we are private we get to ignore both 'public' and
- 'private' parameters. We also ignore 'no-transform' since
- we don't do any transformations.
- The 'no-store' parameter is handled at a higher level.
- So the only Cache-Control parameters we look at are:
- no-cache
- only-if-cached
- max-age
- min-fresh
- """
- retval = "STALE"
- cc = _parse_cache_control(request_headers)
- cc_response = _parse_cache_control(response_headers)
- if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
- retval = "TRANSPARENT"
- if 'cache-control' not in request_headers:
- request_headers['cache-control'] = 'no-cache'
- elif cc.has_key('no-cache'):
- retval = "TRANSPARENT"
- elif cc_response.has_key('no-cache'):
- retval = "STALE"
- elif cc.has_key('only-if-cached'):
- retval = "FRESH"
- elif response_headers.has_key('date'):
- date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
- now = time.time()
- current_age = max(0, now - date)
- if cc_response.has_key('max-age'):
- try:
- freshness_lifetime = int(cc_response['max-age'])
- except ValueError:
- freshness_lifetime = 0
- elif response_headers.has_key('expires'):
- expires = email.Utils.parsedate_tz(response_headers['expires'])
- if None == expires:
- freshness_lifetime = 0
- else:
- freshness_lifetime = max(0, calendar.timegm(expires) - date)
- else:
- freshness_lifetime = 0
- if cc.has_key('max-age'):
- try:
- freshness_lifetime = int(cc['max-age'])
- except ValueError:
- freshness_lifetime = 0
- if cc.has_key('min-fresh'):
- try:
- min_fresh = int(cc['min-fresh'])
- except ValueError:
- min_fresh = 0
- current_age += min_fresh
- if freshness_lifetime > current_age:
- retval = "FRESH"
- return retval
- def _decompressContent(response, new_content):
- content = new_content
- try:
- encoding = response.get('content-encoding', None)
- if encoding in ['gzip', 'deflate']:
- if encoding == 'gzip':
- content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
- if encoding == 'deflate':
- content = zlib.decompress(content)
- response['content-length'] = str(len(content))
- # Record the historical presence of the encoding in a way the won't interfere.
- response['-content-encoding'] = response['content-encoding']
- del response['content-encoding']
- except IOError:
- content = ""
- raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
- return content
- def _updateCache(request_headers, response_headers, content, cache, cachekey):
- if cachekey:
- cc = _parse_cache_control(request_headers)
- cc_response = _parse_cache_control(response_headers)
- if cc.has_key('no-store') or cc_response.has_key('no-store'):
- cache.delete(cachekey)
- else:
- info = email.Message.Message()
- for key, value in response_headers.iteritems():
- if key not in ['status','content-encoding','transfer-encoding']:
- info[key] = value
- # Add annotations to the cache to indicate what headers
- # are variant for this request.
- vary = response_headers.get('vary', None)
- if vary:
- vary_headers = vary.lower().replace(' ', '').split(',')
- for header in vary_headers:
- key = '-varied-%s' % header
- try:
- info[key] = request_headers[header]
- except KeyError:
- pass
- status = response_headers.status
- if status == 304:
- status = 200
- status_header = 'status: %d\r\n' % status
- header_str = info.as_string()
- header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
- text = "".join([status_header, header_str, content])
- cache.set(cachekey, text)
- def _cnonce():
- dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
- return dig[:16]
- def _wsse_username_token(cnonce, iso_now, password):
- return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
- # For credentials we need two things, first
- # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
- # Then we also need a list of URIs that have already demanded authentication
- # That list is tricky since sub-URIs can take the same auth, or the
- # auth scheme may change as you descend the tree.
- # So we also need each Auth instance to be able to tell us
- # how close to the 'top' it is.
- class Authentication(object):
- def __init__(self, credentials, host, request_uri, headers, response, content, http):
- (scheme, authority, path, query, fragment) = parse_uri(request_uri)
- self.path = path
- self.host = host
- self.credentials = credentials
- self.http = http
- def depth(self, request_uri):
- (scheme, authority, path, query, fragment) = parse_uri(request_uri)
- return request_uri[len(self.path):].count("/")
- def inscope(self, host, request_uri):
- # XXX Should we normalize the request_uri?
- (scheme, authority, path, query, fragment) = parse_uri(request_uri)
- return (host == self.host) and path.startswith(self.path)
- def request(self, method, request_uri, headers, content):
- """Modify the request headers to add the appropriate
- Authorization header. Over-ride this in sub-classes."""
- pass
- def response(self, response, content):
- """Gives us a chance to update with new nonces
- or such returned from the last authorized response.
- Over-rise this in sub-classes if necessary.
- Return TRUE is the request is to be retried, for
- example Digest may return stale=true.
- """
- return False
- class BasicAuthentication(Authentication):
- def __init__(self, credentials, host, request_uri, headers, response, content, http):
- Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
- def request(self, method, request_uri, headers, content):
- """Modify the request headers to add the appropriate
- Authorization header."""
- headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
- class DigestAuthentication(Authentication):
- """Only do qop='auth' and MD5, since that
- is all Apache currently implements"""
- def __init__(self, credentials, host, request_uri, headers, response, content, http):
- Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
- challenge = _parse_www_authenticate(response, 'www-authenticate')
- self.challenge = challenge['digest']
- qop = self.challenge.get('qop', 'auth')
- self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
- if self.challenge['qop'] is None:
- raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
- self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
- if self.challenge['algorithm'] != 'MD5':
- raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
- self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
- self.challenge['nc'] = 1
- def request(self, method, request_uri, headers, content, cnonce = None):
- """Modify the request headers"""
- H = lambda x: _md5(x).hexdigest()
- KD = lambda s, d: H("%s:%s" % (s, d))
- A2 = "".join([method, ":", request_uri])
- self.challenge['cnonce'] = cnonce or _cnonce()
- request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (
- self.challenge['nonce'],
- '%08x' % self.challenge['nc'],
- self.challenge['cnonce'],
- self.challenge['qop'], H(A2)))
- headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
- self.credentials[0],
- self.challenge['realm'],
- self.challenge['nonce'],
- request_uri,
- self.challenge['algorithm'],
- request_digest,
- self.challenge['qop'],
- self.challenge['nc'],
- self.challenge['cnonce'])
- if self.challenge.get('opaque'):
- headers['authorization'] += ', opaque="%s"' % self.challenge['opaque']
- self.challenge['nc'] += 1
- def response(self, response, content):
- if not response.has_key('authentication-info'):
- challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
- if 'true' == challenge.get('stale'):
- self.challenge['nonce'] = challenge['nonce']
- self.challenge['nc'] = 1
- return True
- else:
- updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
- if updated_challenge.has_key('nextnonce'):
- self.challenge['nonce'] = updated_challenge['nextnonce']
- self.challenge['nc'] = 1
- return False
- class HmacDigestAuthentication(Authentication):
- """Adapted from Robert Sayre's code and DigestAuthentication above."""
- __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
- def __init__(self, credentials, host, request_uri, headers, response, content, http):
- Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
- challenge = _parse_www_authenticate(response, 'www-authenticate')
- self.challenge = challenge['hmacdigest']
- # TODO: self.challenge['domain']
- self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
- if self.challenge['reason'] not in ['unauthorized', 'integrity']:
- self.challenge['reason'] = 'unauthorized'
- self.challenge['salt'] = self.challenge.get('salt', '')
- if not self.challenge.get('snonce'):
- raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
- self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
- if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
- raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
- self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
- if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
- raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
- if self.challenge['algorithm'] == 'HMAC-MD5':
- self.hashmod = _md5
- else:
- self.hashmod = _sha
- if self.challenge['pw-algorithm'] == 'MD5':
- self.pwhashmod = _md5
- else:
- self.pwhashmod = _sha
- self.key = "".join([self.credentials[0], ":",
- self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
- ":", self.challenge['realm']])
- self.key = self.pwhashmod.new(self.key).hexdigest().lower()
- def request(self, method, request_uri, headers, content):
- """Modify the request headers"""
- keys = _get_end2end_headers(headers)
- keylist = "".join(["%s " % k for k in keys])
- headers_val = "".join([headers[k] for k in keys])
- created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
- cnonce = _cnonce()
- request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
- request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
- headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
- self.credentials[0],
- self.challenge['realm'],
- self.challenge['snonce'],
- cnonce,
- request_uri,
- created,
- request_digest,
- keylist)
- def response(self, response, content):
- challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
- if challenge.get('reason') in ['integrity', 'stale']:
- return True
- return False
- class WsseAuthentication(Authentication):
- """This is thinly tested and should not be relied upon.
- At this time there isn't any third party server to test against.
- Blogger and TypePad implemented this algorithm at one point
- but Blogger has since switched to Basic over HTTPS and
- TypePad has implemented it wrong, by never issuing a 401
- challenge but instead requiring your client to telepathically know that
- their endpoint is expecting WSSE profile="UsernameToken"."""
- def __init__(self, credentials, host, request_uri, headers, response, content, http):
- Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
- def request(self, method, request_uri, headers, content):
- """Modify the request headers to add the appropriate
- Authorization header."""
- headers['authorization'] = 'WSSE profile="UsernameToken"'
- iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
- cnonce = _cnonce()
- password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
- headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
- self.credentials[0],
- password_digest,
- cnonce,
- iso_now)
- class GoogleLoginAuthentication(Authentication):
- def __init__(self, credentials, host, request_uri, headers, response, content, http):
- from urllib import urlencode
- Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
- challenge = _parse_www_authenticate(response, 'www-authenticate')
- service = challenge['googlelogin'].get('service', 'xapi')
- # Bloggger actually returns the service in the challenge
- # For the rest we guess based on the URI
- if service == 'xapi' and request_uri.find("calendar") > 0:
- service = "cl"
- # No point in guessing Base or Spreadsheet
- #elif request_uri.find("spreadsheets") > 0:
- # service = "wise"
- auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
- resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
- lines = content.split('\n')
- d = dict([tuple(line.split("=", 1)) for line in lines if line])
- if resp.status == 403:
- self.Auth = ""
- else:
- self.Auth = d['Auth']
- def request(self, method, request_uri, headers, content):
- """Modify the request headers to add the appropriate
- Authorization header."""
- headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
- AUTH_SCHEME_CLASSES = {
- "basic": BasicAuthentication,
- "wsse": WsseAuthentication,
- "digest": DigestAuthentication,
- "hmacdigest": HmacDigestAuthentication,
- "googlelogin": GoogleLoginAuthentication
- }
- AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
- class FileCache(object):
- """Uses a local directory as a store for cached files.
- Not really safe to use if multiple threads or processes are going to
- be running on the same cache.
- """
- def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
- self.cache = cache
- self.safe = safe
- if not os.path.exists(cache):
- os.makedirs(self.cache)
- def get(self, key):
- retval = None
- cacheFullPath = os.path.join(self.cache, self.safe(key))
- try:
- f = file(cacheFullPath, "rb")
- retval = f.read()
- f.close()
- except IOError:
- pass
- return retval
- def set(self, key, value):
- cacheFullPath = os.path.join(self.cache, self.safe(key))
- f = file(cacheFullPath, "wb")
- f.write(value)
- f.close()
- def delete(self, key):
- cacheFullPath = os.path.join(self.cache, self.safe(key))
- if os.path.exists(cacheFullPath):
- os.remove(cacheFullPath)
- class Credentials(object):
- def __init__(self):
- self.credentials = []
- def add(self, name, password, domain=""):
- self.credentials.append((domain.lower(), name, password))
- def clear(self):
- self.credentials = []
- def iter(self, domain):
- for (cdomain, name, password) in self.credentials:
- if cdomain == "" or domain == cdomain:
- yield (name, password)
- class KeyCerts(Credentials):
- """Identical to Credentials except that
- name/password are mapped to key/cert."""
- pass
- class AllHosts(object):
- pass
- class ProxyInfo(object):
- """Collect information required to use a proxy."""
- bypass_hosts = ()
- def __init__(self, proxy_type, proxy_host, proxy_port,
- proxy_rdns=None, proxy_user=None, proxy_pass=None):
- """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
- constants. For example:
- p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
- proxy_host='localhost', proxy_port=8000)
- """
- self.proxy_type = proxy_type
- self.proxy_host = proxy_host
- self.proxy_port = proxy_port
- self.proxy_rdns = proxy_rdns
- self.proxy_user = proxy_user
- self.proxy_pass = proxy_pass
- def astuple(self):
- return (self.proxy_type, self.proxy_host, self.proxy_port,
- self.proxy_rdns, self.proxy_user, self.proxy_pass)
- def isgood(self):
- return (self.proxy_host != None) and (self.proxy_port != None)
- def applies_to(self, hostname):
- return not self.bypass_host(hostname)
- def bypass_host(self, hostname):
- """Has this host been excluded from the proxy config"""
- if self.bypass_hosts is AllHosts:
- return True
- bypass = False
- for domain in self.bypass_hosts:
- if hostname.endswith(domain):
- bypass = True
- return bypass
- def proxy_info_from_environment(method='http'):
- """
- Read proxy info from the environment variables.
- """
- if method not in ['http', 'https']:
- return
-
- #env_var = method + '_proxy'
- url = urllib.getproxies()
- if not url:
- return
- else:url=url[method]
- pi = proxy_info_from_url(url, method)
-
- #pi=urllib.getproxies()
- no_proxy = os.environ.get('no_proxy', os.environ.get('NO_PROXY', ''))
- bypass_hosts = []
- if no_proxy:
- bypass_hosts = no_proxy.split(',')
- # special case, no_proxy=* means all hosts bypassed
- if no_proxy == '*':
- bypass_hosts = AllHosts
- pi.bypass_hosts = bypass_hosts
- return pi
- def proxy_info_from_url(url, method='http'):
- """
- Construct a ProxyInfo from a URL (such as http_proxy env var)
- """
- url = urlparse.urlparse(url)
- username = None
- password = None
- port = None
- if '@' in url[1]:
- ident, host_port = url[1].split('@', 1)
- if ':' in ident:
- username, password = ident.split(':', 1)
- else:
- password = ident
- else:
- host_port = url[1]
- if ':' in host_port:
- host, port = host_port.split(':', 1)
- else:
- host = host_port
- if port:
- port = int(port)
- else:
- port = dict(https=443, http=80)[method]
- if method=='http':proxy_type = 4 # socks.PROXY_TYPE_HTTP_NO_TUNNEL
- elif method=='https':proxy_type = 3 # socks.PROXY_TYPE_HTTP, will use CONNECT tunnel
- return ProxyInfo(
- proxy_type = proxy_type,
- proxy_host = host,
- proxy_port = port,
- proxy_user = username or None,
- proxy_pass = password or None,
- )
- class HTTPConnectionWithTimeout(httplib.HTTPConnection):
- """
- HTTPConnection subclass that supports timeouts
- All timeouts are in seconds. If None is passed for timeout then
- Python's default timeout for sockets will be used. See for example
- the docs of socket.setdefaulttimeout():
- http://docs.python.org/library/socket.html#socket.setdefaulttimeout
- """
- def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
- httplib.HTTPConnection.__init__(self, host, port, strict)
- self.timeout = timeout
- self.proxy_info = proxy_info
- def connect(self):
- """Connect to the host and port specified in __init__."""
- # Mostly verbatim from httplib.py.
- if self.proxy_info and socks is None:
- raise ProxiesUnavailableError(
- 'Proxy support missing but proxy use was requested!')
- msg = "getaddrinfo returns an empty list"
- if self.proxy_info and self.proxy_info.isgood():
- use_proxy = True
- proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass = self.proxy_info.astuple()
- else:
- use_proxy = False
- if use_proxy and proxy_rdns:
- host = proxy_host
- port = proxy_port
- else:
- host = self.host
- port = self.port
- for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
- af, socktype, proto, canonname, sa = res
- try:
- if use_proxy:
- self.sock = socks.socksocket(af, socktype, proto)
- self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)
- else:
- self.sock = socket.socket(af, socktype, proto)
- self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
- # Different from httplib: support timeouts.
- if has_timeout(self.timeout):
- self.sock.settimeout(self.timeout)
- # End of difference from httplib.
- if self.debuglevel > 0:
- print "connect: (%s, %s) ************" % (self.host, self.port)
- if use_proxy:
- print "proxy: %s ************" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
- self.sock.connect((self.host, self.port) + sa[2:])
- except socket.error, msg:
- if self.debuglevel > 0:
- print "connect fail: (%s, %s)" % (self.host, self.port)
- if use_proxy:
- print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
- if self.sock:
- self.sock.close()
- self.sock = None
- continue
- break
- if not self.sock:
- raise socket.error, msg
- class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
- """
- This class allows communication via SSL.
- All timeouts are in seconds. If None is passed for timeout then
- Python's default timeout for sockets will be used. See for example
- the docs of socket.setdefaulttimeout():
- http://docs.python.org/library/socket.html#socket.setdefaulttimeout
- """
- def __init__(self, host, port=None, key_file=None, cert_file=None,
- strict=None, timeout=None, proxy_info=None,
- ca_certs=None, disable_ssl_certificate_validation=False):
- httplib.HTTPSConnection.__init__(self, host, port=port,
- key_file=key_file,
- cert_file=cert_file, strict=strict)
- self.timeout = timeout
- self.proxy_info = proxy_info
- if ca_certs is None:
- ca_certs = CA_CERTS
- self.ca_certs = ca_certs
- self.disable_ssl_certificate_validation = \
- disable_ssl_certificate_validation
- # The following two methods were adapted from https_wrapper.py, released
- # with the Google Appengine SDK at
- # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py
- # under the following license:
- #
- # Copyright 2007 Google Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- def _GetValidHostsForCert(self, cert):
- """Returns a list of valid host globs for an SSL certificate.
- Args:
- cert: A dictionary representing an SSL certificate.
- Returns:
- list: A list of valid host globs.
- """
- if 'subjectAltName' in cert:
- return [x[1] for x in cert['subjectAltName']
- if x[0].lower() == 'dns']
- else:
- return [x[0][1] for x in cert['subject']
- if x[0][0].lower() == 'commonname']
- def _ValidateCertificateHostname(self, cert, hostname):
- """Validates that a given hostname is valid for an SSL certificate.
- Args:
- cert: A dictionary representing an SSL certificate.
- hostname: The hostname to test.
- Returns:
- bool: Whether or not the hostname is valid for this certificate.
- """
- hosts = self._GetValidHostsForCert(cert)
- for host in hosts:
- host_re = host.replace('.', '\.').replace('*', '[^.]*')
- if re.search('^%s$' % (host_re,), hostname, re.I):
- return True
- return False
- def connect(self):
- "Connect to a host on a given (SSL) port."
- msg = "getaddrinfo returns an empty list"
- if self.proxy_info and self.proxy_info.isgood():
- use_proxy = True
- proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass = self.proxy_info.astuple()
- else:
- use_proxy = False
- if use_proxy and proxy_rdns:
- host = proxy_host
- port = proxy_port
- else:
- host = self.host
- port = self.port
- address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
- for family, socktype, proto, canonname, sockaddr in address_info:
- try:
- if use_proxy:
- sock = socks.socksocket(family, socktype, proto)
- sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)
- else:
- sock = socket.socket(family, socktype, proto)
- sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
- if has_timeout(self.timeout):
- sock.settimeout(self.timeout)
- sock.connect((self.host, self.port))
- self.sock =_ssl_wrap_socket(
- sock, self.key_file, self.cert_file,
- self.disable_ssl_certificate_validation, self.ca_certs)
- if self.debuglevel > 0:
- print "connect: (%s, %s)" % (self.host, self.port)
- if use_proxy:
- print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
- if not self.disable_ssl_certificate_validation:
- cert = self.sock.getpeercert()
- hostname = self.host.split(':', 0)[0]
- if not self._ValidateCertificateHostname(cert, hostname):
- raise CertificateHostnameMismatch(
- 'Server presented certificate that does not match '
- 'host %s: %s' % (hostname, cert), hostname, cert)
- except ssl_SSLError, e:
- if sock:
- sock.close()
- if self.sock:
- self.sock.close()
- self.sock = None
- # Unfortunately the ssl module doesn't seem to provide any way
- # to get at more detailed error information, in particular
- # whether the error is due to certificate validation or
- # something else (such as SSL protocol mismatch).
- if e.errno == ssl.SSL_ERROR_SSL:
- raise SSLHandshakeError(e)
- else:
- raise
- except (socket.timeout, socket.gaierror):
- raise
- except socket.error, msg:
- if self.debuglevel > 0:
- print "connect fail: (%s, %s)" % (self.host, self.port)
- if use_proxy:
- print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
- if self.sock:
- self.sock.close()
- self.sock = None
- continue
- break
- if not self.sock:
- raise socket.error, msg
- SCHEME_TO_CONNECTION = {
- 'http': HTTPConnectionWithTimeout,
- 'https': HTTPSConnectionWithTimeout
- }
- # Use a different connection object for Google App Engine
- try:
- try:
- from google.appengine.api import apiproxy_stub_map
- if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None:
- raise ImportError # Bail out; we're not actually running on App Engine.
- from google.appengine.api.urlfetch import fetch
- from google.appengine.api.urlfetch import InvalidURLError
- except (ImportError, AttributeError):
- from google3.apphosting.api import apiproxy_stub_map
- if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None:
- raise ImportError # Bail out; we're not actually running on App Engine.
- from google3.apphosting.api.urlfetch import fetch
- from google3.apphosting.api.urlfetch import InvalidURLError
- def _new_fixed_fetch(validate_certificate):
- def fixed_fetch(url, payload=None, method="GET", headers={},
- allow_truncated=False, follow_redirects=True,
- deadline=5):
- return fetch(url, payload=payload, method=method, headers=headers,
- allow_truncated=allow_truncated,
- follow_redirects=follow_redirects, deadline=deadline,
- validate_certificate=validate_certificate)
- return fixed_fetch
- class AppEngineHttpConnection(httplib.HTTPConnection):
- """Use httplib on App Engine, but compensate for its weirdness.
- The parameters key_file, cert_file, proxy_info, ca_certs, and
- disable_ssl_certificate_validation are all dropped on the ground.
- """
- def __init__(self, host, port=None, key_file=None, cert_file=None,
- strict=None, timeout=None, proxy_info=None, ca_certs=None,
- disable_ssl_certificate_validation=False):
- httplib.HTTPConnection.__init__(self, host, port=port,
- strict=strict, timeout=timeout)
- class AppEngineHttpsConnection(httplib.HTTPSConnection):
- """Same as AppEngineHttpConnection, but for HTTPS URIs."""
- def __init__(self, host, port=None, key_file=None, cert_file=None,
- strict=None, timeout=None, proxy_info=None, ca_certs=None,
- disable_ssl_certificate_validation=False):
- httplib.HTTPSConnection.__init__(self, host, port=port,
- key_file=key_file,
- cert_file=cert_file, strict=strict,
- timeout=timeout)
- self._fetch = _new_fixed_fetch(
- not disable_ssl_certificate_validation)
- # Update the connection classes to use the Googel App Engine specific ones.
- SCHEME_TO_CONNECTION = {
- 'http': AppEngineHttpConnection,
- 'https': AppEngineHttpsConnection
- }
- except (ImportError, AttributeError):
- pass
- class Http(object):
- """An HTTP client that handles:
- - all methods
- - caching
- - ETags
- - compression,
- - HTTPS
- - Basic
- - Digest
- - WSSE
- and more.
- """
- def __init__(self, cache=None, timeout=None,
- proxy_info=proxy_info_from_environment,
- ca_certs=None, disable_ssl_certificate_validation=False,callback_hook=None,chunk_size=0):
- """If 'cache' is a string then it is used as a directory name for
- a disk cache. Otherwise it must be an object that supports the
- same interface as FileCache.
- All timeouts are in seconds. If None is passed for timeout
- then Python's default timeout for sockets will be used. See
- for example the docs of socket.setdefaulttimeout():
- http://docs.python.org/library/socket.html#socket.setdefaulttimeout
- `proxy_info` may be:
- - a callable that takes the http scheme ('http' or 'https') and
- returns a ProxyInfo instance per request. By default, uses
- proxy_nfo_from_environment.
- - a ProxyInfo instance (static proxy config).
- - None (proxy disabled).
- ca_certs is the path of a file containing root CA certificates for SSL
- server certificate validation. By default, a CA cert file bundled with
- httplib2 is used.
- If disable_ssl_certificate_validation is true, SSL cert validation will
- not be performed.
- """
- self.proxy_info = proxy_info
- self.ca_certs = ca_certs
- self.disable_ssl_certificate_validation = \
- disable_ssl_certificate_validation
- self.chunk_size=chunk_size
- self.callback_hook=callback_hook
- # Map domain name to an httplib connection
- self.connections = {}
- # The location of the cache, for now a directory
- # where cached responses are held.
- if cache and isinstance(cache, basestring):
- self.cache = FileCache(cache)
- else:
- self.cache = cache
- # Name/password
- self.credentials = Credentials()
- # Key/cert
- self.certificates = KeyCerts()
- # authorization objects
- self.authorizations = []
- # If set to False then no redirects are followed, even safe ones.
- self.follow_redirects = True
- # Which HTTP methods do we apply optimistic concurrency to, i.e.
- # which methods get an "if-match:" etag header added to them.
- self.optimistic_concurrency_methods = ["PUT", "PATCH"]
- # If 'follow_redirects' is True, and this is set to True then
- # all redirecs are followed, including unsafe ones.
- self.follow_all_redirects = False
- self.ignore_etag = False
- self.force_exception_to_status_code = False
- self.timeout = timeout
- # Keep Authorization: headers on a redirect.
- self.forward_authorization_headers = False
- def __getstate__(self):
- state_dict = copy.copy(self.__dict__)
- # In case request is augmented by some foreign object such as
- # credentials which handle auth
- if 'request' in state_dict:
- del state_dict['request']
- if 'connections' in state_dict:
- del state_dict['connections']
- return state_dict
- def __setstate__(self, state):
- self.__dict__.update(state)
- self.connections = {}
- def _auth_from_challenge(self, host, request_uri, headers, response, content):
- """A generator that creates Authorization objects
- that can be applied to requests.
- """
- challenges = _parse_www_authenticate(response, 'www-authenticate')
- for cred in self.credentials.iter(host):
- for scheme in AUTH_SCHEME_ORDER:
- if challenges.has_key(…
Large files files are truncated, but you can click here to view the full file