PageRenderTime 427ms CodeModel.GetById 32ms RepoModel.GetById 1ms app.codeStats 1ms

/bigboard/trunk/bigboard/httplib2/__init__.py

https://gitlab.com/manoj-makkuboy/magnetism
Python | 1148 lines | 1118 code | 7 blank | 23 comment | 17 complexity | da1786ad20116688f323065e3ae4d7b1 MD5 | raw file
  1. from __future__ import generators
  2. """
  3. httplib2
  4. A caching http interface that supports ETags and gzip
  5. to conserve bandwidth.
  6. Requires Python 2.3 or later
  7. Changelog:
  8. 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
  9. """
  10. __author__ = "Joe Gregorio (joe@bitworking.org)"
  11. __copyright__ = "Copyright 2006, Joe Gregorio"
  12. __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
  13. "James Antill",
  14. "Xavier Verges Farrero",
  15. "Jonathan Feinberg",
  16. "Blair Zajac",
  17. "Sam Ruby",
  18. "Louis Nyffenegger"]
  19. __license__ = "MIT"
  20. __version__ = "$Rev: 259 $"
  21. import re
  22. import sys
  23. import md5
  24. import email
  25. import email.Utils
  26. import email.Message
  27. import StringIO
  28. import gzip
  29. import zlib
  30. import httplib
  31. import urlparse
  32. import base64
  33. import os
  34. import copy
  35. import calendar
  36. import time
  37. import random
  38. import sha
  39. import hmac
  40. from gettext import gettext as _
  41. import socket
  42. try:
  43. import socks
  44. except ImportError:
  45. socks = None
  46. if sys.version_info >= (2,3):
  47. from iri2uri import iri2uri
  48. else:
  49. def iri2uri(uri):
  50. return uri
  51. __all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
  52. 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
  53. 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
  54. 'debuglevel']
  55. # The httplib debug level, set to a non-zero value to get debug output
  56. debuglevel = 0
  57. # Python 2.3 support
  58. if sys.version_info < (2,4):
  59. def sorted(seq):
  60. seq.sort()
  61. return seq
  62. # Python 2.3 support
  63. def HTTPResponse__getheaders(self):
  64. """Return list of (header, value) tuples."""
  65. if self.msg is None:
  66. raise httplib.ResponseNotReady()
  67. return self.msg.items()
  68. if not hasattr(httplib.HTTPResponse, 'getheaders'):
  69. httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
  70. # All exceptions raised here derive from HttpLib2Error
  71. class HttpLib2Error(Exception): pass
  72. # Some exceptions can be caught and optionally
  73. # be turned back into responses.
  74. class HttpLib2ErrorWithResponse(HttpLib2Error):
  75. def __init__(self, desc, response, content):
  76. self.response = response
  77. self.content = content
  78. HttpLib2Error.__init__(self, desc)
  79. class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
  80. class RedirectLimit(HttpLib2ErrorWithResponse): pass
  81. class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
  82. class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
  83. class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
  84. class RelativeURIError(HttpLib2Error): pass
  85. class ServerNotFoundError(HttpLib2Error): pass
  86. # Open Items:
  87. # -----------
  88. # Proxy support
  89. # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
  90. # Pluggable cache storage (supports storing the cache in
  91. # flat files by default. We need a plug-in architecture
  92. # that can support Berkeley DB and Squid)
  93. # == Known Issues ==
  94. # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
  95. # Does not handle Cache-Control: max-stale
  96. # Does not use Age: headers when calculating cache freshness.
  97. # The number of redirections to follow before giving up.
  98. # Note that only GET redirects are automatically followed.
  99. # Will also honor 301 requests by saving that info and never
  100. # requesting that URI again.
  101. DEFAULT_MAX_REDIRECTS = 5
  102. # Which headers are hop-by-hop headers by default
  103. HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
  104. def _get_end2end_headers(response):
  105. hopbyhop = list(HOP_BY_HOP)
  106. hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
  107. return [header for header in response.keys() if header not in hopbyhop]
  108. URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
  109. def parse_uri(uri):
  110. """Parses a URI using the regex given in Appendix B of RFC 3986.
  111. (scheme, authority, path, query, fragment) = parse_uri(uri)
  112. """
  113. groups = URI.match(uri).groups()
  114. return (groups[1], groups[3], groups[4], groups[6], groups[8])
  115. def urlnorm(uri):
  116. (scheme, authority, path, query, fragment) = parse_uri(uri)
  117. if not scheme or not authority:
  118. raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
  119. authority = authority.lower()
  120. scheme = scheme.lower()
  121. if not path:
  122. path = "/"
  123. # Could do syntax based normalization of the URI before
  124. # computing the digest. See Section 6.2.2 of Std 66.
  125. request_uri = query and "?".join([path, query]) or path
  126. scheme = scheme.lower()
  127. defrag_uri = scheme + "://" + authority + request_uri
  128. return scheme, authority, request_uri, defrag_uri
  129. # Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
  130. re_url_scheme = re.compile(r'^\w+://')
  131. re_slash = re.compile(r'[?/:|]+')
  132. def safename(filename):
  133. """Return a filename suitable for the cache.
  134. Strips dangerous and common characters to create a filename we
  135. can use to store the cache in.
  136. """
  137. try:
  138. if re_url_scheme.match(filename):
  139. if isinstance(filename,str):
  140. filename = filename.decode('utf-8')
  141. filename = filename.encode('idna')
  142. else:
  143. filename = filename.encode('idna')
  144. except UnicodeError:
  145. pass
  146. if isinstance(filename,unicode):
  147. filename=filename.encode('utf-8')
  148. filemd5 = md5.new(filename).hexdigest()
  149. filename = re_url_scheme.sub("", filename)
  150. filename = re_slash.sub(",", filename)
  151. # limit length of filename
  152. if len(filename)>200:
  153. filename=filename[:200]
  154. return ",".join((filename, filemd5))
  155. NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
  156. def _normalize_headers(headers):
  157. return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
  158. def _parse_cache_control(headers):
  159. retval = {}
  160. if headers.has_key('cache-control'):
  161. parts = headers['cache-control'].split(',')
  162. parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
  163. parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
  164. retval = dict(parts_with_args + parts_wo_args)
  165. return retval
  166. # Whether to use a strict mode to parse WWW-Authenticate headers
  167. # Might lead to bad results in case of ill-formed header value,
  168. # so disabled by default, falling back to relaxed parsing.
  169. # Set to true to turn on, usefull for testing servers.
  170. USE_WWW_AUTH_STRICT_PARSING = 0
  171. # In regex below:
  172. # [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
  173. # "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
  174. # Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
  175. # \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
  176. WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
  177. WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
  178. UNQUOTE_PAIRS = re.compile(r'\\(.)')
  179. def _parse_www_authenticate(headers, headername='www-authenticate'):
  180. """Returns a dictionary of dictionaries, one dict
  181. per auth_scheme."""
  182. retval = {}
  183. if headers.has_key(headername):
  184. authenticate = headers[headername].strip()
  185. www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
  186. while authenticate:
  187. # Break off the scheme at the beginning of the line
  188. if headername == 'authentication-info':
  189. (auth_scheme, the_rest) = ('digest', authenticate)
  190. else:
  191. (auth_scheme, the_rest) = authenticate.split(" ", 1)
  192. # Now loop over all the key value pairs that come after the scheme,
  193. # being careful not to roll into the next scheme
  194. match = www_auth.search(the_rest)
  195. auth_params = {}
  196. while match:
  197. if match and len(match.groups()) == 3:
  198. (key, value, the_rest) = match.groups()
  199. auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
  200. match = www_auth.search(the_rest)
  201. retval[auth_scheme.lower()] = auth_params
  202. authenticate = the_rest.strip()
  203. return retval
  204. def _entry_disposition(response_headers, request_headers):
  205. """Determine freshness from the Date, Expires and Cache-Control headers.
  206. We don't handle the following:
  207. 1. Cache-Control: max-stale
  208. 2. Age: headers are not used in the calculations.
  209. Not that this algorithm is simpler than you might think
  210. because we are operating as a private (non-shared) cache.
  211. This lets us ignore 's-maxage'. We can also ignore
  212. 'proxy-invalidate' since we aren't a proxy.
  213. We will never return a stale document as
  214. fresh as a design decision, and thus the non-implementation
  215. of 'max-stale'. This also lets us safely ignore 'must-revalidate'
  216. since we operate as if every server has sent 'must-revalidate'.
  217. Since we are private we get to ignore both 'public' and
  218. 'private' parameters. We also ignore 'no-transform' since
  219. we don't do any transformations.
  220. The 'no-store' parameter is handled at a higher level.
  221. So the only Cache-Control parameters we look at are:
  222. no-cache
  223. only-if-cached
  224. max-age
  225. min-fresh
  226. """
  227. retval = "STALE"
  228. cc = _parse_cache_control(request_headers)
  229. cc_response = _parse_cache_control(response_headers)
  230. if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
  231. retval = "TRANSPARENT"
  232. if 'cache-control' not in request_headers:
  233. request_headers['cache-control'] = 'no-cache'
  234. elif cc.has_key('no-cache'):
  235. retval = "TRANSPARENT"
  236. elif cc_response.has_key('no-cache'):
  237. retval = "STALE"
  238. elif cc.has_key('only-if-cached'):
  239. retval = "FRESH"
  240. elif response_headers.has_key('date'):
  241. date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
  242. now = time.time()
  243. current_age = max(0, now - date)
  244. if cc_response.has_key('max-age'):
  245. try:
  246. freshness_lifetime = int(cc_response['max-age'])
  247. except ValueError:
  248. freshness_lifetime = 0
  249. elif response_headers.has_key('expires'):
  250. expires = email.Utils.parsedate_tz(response_headers['expires'])
  251. if None == expires:
  252. freshness_lifetime = 0
  253. else:
  254. freshness_lifetime = max(0, calendar.timegm(expires) - date)
  255. else:
  256. freshness_lifetime = 0
  257. if cc.has_key('max-age'):
  258. try:
  259. freshness_lifetime = int(cc['max-age'])
  260. except ValueError:
  261. freshness_lifetime = 0
  262. if cc.has_key('min-fresh'):
  263. try:
  264. min_fresh = int(cc['min-fresh'])
  265. except ValueError:
  266. min_fresh = 0
  267. current_age += min_fresh
  268. if freshness_lifetime > current_age:
  269. retval = "FRESH"
  270. return retval
  271. def _decompressContent(response, new_content):
  272. content = new_content
  273. try:
  274. encoding = response.get('content-encoding', None)
  275. if encoding in ['gzip', 'deflate']:
  276. if encoding == 'gzip':
  277. content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
  278. if encoding == 'deflate':
  279. content = zlib.decompress(content)
  280. response['content-length'] = str(len(content))
  281. del response['content-encoding']
  282. except IOError:
  283. content = ""
  284. raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
  285. return content
  286. def _updateCache(request_headers, response_headers, content, cache, cachekey):
  287. if cachekey:
  288. cc = _parse_cache_control(request_headers)
  289. cc_response = _parse_cache_control(response_headers)
  290. if cc.has_key('no-store') or cc_response.has_key('no-store'):
  291. cache.delete(cachekey)
  292. else:
  293. info = email.Message.Message()
  294. for key, value in response_headers.iteritems():
  295. if key not in ['status','content-encoding','transfer-encoding']:
  296. info[key] = value
  297. status = response_headers.status
  298. if status == 304:
  299. status = 200
  300. status_header = 'status: %d\r\n' % response_headers.status
  301. header_str = info.as_string()
  302. header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
  303. text = "".join([status_header, header_str, content])
  304. cache.set(cachekey, text)
  305. def _cnonce():
  306. dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
  307. return dig[:16]
  308. def _wsse_username_token(cnonce, iso_now, password):
  309. return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
  310. # For credentials we need two things, first
  311. # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
  312. # Then we also need a list of URIs that have already demanded authentication
  313. # That list is tricky since sub-URIs can take the same auth, or the
  314. # auth scheme may change as you descend the tree.
  315. # So we also need each Auth instance to be able to tell us
  316. # how close to the 'top' it is.
  317. class Authentication(object):
  318. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  319. (scheme, authority, path, query, fragment) = parse_uri(request_uri)
  320. self.path = path
  321. self.host = host
  322. self.credentials = credentials
  323. self.http = http
  324. def depth(self, request_uri):
  325. (scheme, authority, path, query, fragment) = parse_uri(request_uri)
  326. return request_uri[len(self.path):].count("/")
  327. def inscope(self, host, request_uri):
  328. # XXX Should we normalize the request_uri?
  329. (scheme, authority, path, query, fragment) = parse_uri(request_uri)
  330. return (host == self.host) and path.startswith(self.path)
  331. def request(self, method, request_uri, headers, content):
  332. """Modify the request headers to add the appropriate
  333. Authorization header. Over-rise this in sub-classes."""
  334. pass
  335. def response(self, response, content):
  336. """Gives us a chance to update with new nonces
  337. or such returned from the last authorized response.
  338. Over-rise this in sub-classes if necessary.
  339. Return TRUE is the request is to be retried, for
  340. example Digest may return stale=true.
  341. """
  342. return False
  343. class BasicAuthentication(Authentication):
  344. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  345. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  346. def request(self, method, request_uri, headers, content):
  347. """Modify the request headers to add the appropriate
  348. Authorization header."""
  349. headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip()
  350. class DigestAuthentication(Authentication):
  351. """Only do qop='auth' and MD5, since that
  352. is all Apache currently implements"""
  353. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  354. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  355. challenge = _parse_www_authenticate(response, 'www-authenticate')
  356. self.challenge = challenge['digest']
  357. qop = self.challenge.get('qop')
  358. self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
  359. if self.challenge['qop'] is None:
  360. raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
  361. self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
  362. if self.challenge['algorithm'] != 'MD5':
  363. raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
  364. self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
  365. self.challenge['nc'] = 1
  366. def request(self, method, request_uri, headers, content, cnonce = None):
  367. """Modify the request headers"""
  368. H = lambda x: md5.new(x).hexdigest()
  369. KD = lambda s, d: H("%s:%s" % (s, d))
  370. A2 = "".join([method, ":", request_uri])
  371. self.challenge['cnonce'] = cnonce or _cnonce()
  372. request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
  373. '%08x' % self.challenge['nc'],
  374. self.challenge['cnonce'],
  375. self.challenge['qop'], H(A2)
  376. ))
  377. headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
  378. self.credentials[0],
  379. self.challenge['realm'],
  380. self.challenge['nonce'],
  381. request_uri,
  382. self.challenge['algorithm'],
  383. request_digest,
  384. self.challenge['qop'],
  385. self.challenge['nc'],
  386. self.challenge['cnonce'],
  387. )
  388. self.challenge['nc'] += 1
  389. def response(self, response, content):
  390. if not response.has_key('authentication-info'):
  391. challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
  392. if 'true' == challenge.get('stale'):
  393. self.challenge['nonce'] = challenge['nonce']
  394. self.challenge['nc'] = 1
  395. return True
  396. else:
  397. updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
  398. if updated_challenge.has_key('nextnonce'):
  399. self.challenge['nonce'] = updated_challenge['nextnonce']
  400. self.challenge['nc'] = 1
  401. return False
  402. class HmacDigestAuthentication(Authentication):
  403. """Adapted from Robert Sayre's code and DigestAuthentication above."""
  404. __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
  405. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  406. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  407. challenge = _parse_www_authenticate(response, 'www-authenticate')
  408. self.challenge = challenge['hmacdigest']
  409. # TODO: self.challenge['domain']
  410. self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
  411. if self.challenge['reason'] not in ['unauthorized', 'integrity']:
  412. self.challenge['reason'] = 'unauthorized'
  413. self.challenge['salt'] = self.challenge.get('salt', '')
  414. if not self.challenge.get('snonce'):
  415. raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
  416. self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
  417. if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
  418. raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
  419. self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
  420. if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
  421. raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
  422. if self.challenge['algorithm'] == 'HMAC-MD5':
  423. self.hashmod = md5
  424. else:
  425. self.hashmod = sha
  426. if self.challenge['pw-algorithm'] == 'MD5':
  427. self.pwhashmod = md5
  428. else:
  429. self.pwhashmod = sha
  430. self.key = "".join([self.credentials[0], ":",
  431. self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
  432. ":", self.challenge['realm']
  433. ])
  434. self.key = self.pwhashmod.new(self.key).hexdigest().lower()
  435. def request(self, method, request_uri, headers, content):
  436. """Modify the request headers"""
  437. keys = _get_end2end_headers(headers)
  438. keylist = "".join(["%s " % k for k in keys])
  439. headers_val = "".join([headers[k] for k in keys])
  440. created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
  441. cnonce = _cnonce()
  442. request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
  443. request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
  444. headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
  445. self.credentials[0],
  446. self.challenge['realm'],
  447. self.challenge['snonce'],
  448. cnonce,
  449. request_uri,
  450. created,
  451. request_digest,
  452. keylist,
  453. )
  454. def response(self, response, content):
  455. challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
  456. if challenge.get('reason') in ['integrity', 'stale']:
  457. return True
  458. return False
  459. class WsseAuthentication(Authentication):
  460. """This is thinly tested and should not be relied upon.
  461. At this time there isn't any third party server to test against.
  462. Blogger and TypePad implemented this algorithm at one point
  463. but Blogger has since switched to Basic over HTTPS and
  464. TypePad has implemented it wrong, by never issuing a 401
  465. challenge but instead requiring your client to telepathically know that
  466. their endpoint is expecting WSSE profile="UsernameToken"."""
  467. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  468. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  469. def request(self, method, request_uri, headers, content):
  470. """Modify the request headers to add the appropriate
  471. Authorization header."""
  472. headers['Authorization'] = 'WSSE profile="UsernameToken"'
  473. iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
  474. cnonce = _cnonce()
  475. password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
  476. headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
  477. self.credentials[0],
  478. password_digest,
  479. cnonce,
  480. iso_now)
  481. class GoogleLoginAuthentication(Authentication):
  482. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  483. from urllib import urlencode
  484. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  485. challenge = _parse_www_authenticate(response, 'www-authenticate')
  486. service = challenge['googlelogin'].get('service', 'xapi')
  487. # Bloggger actually returns the service in the challenge
  488. # For the rest we guess based on the URI
  489. if service == 'xapi' and request_uri.find("calendar") > 0:
  490. service = "cl"
  491. # No point in guessing Base or Spreadsheet
  492. #elif request_uri.find("spreadsheets") > 0:
  493. # service = "wise"
  494. auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
  495. resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
  496. lines = content.split('\n')
  497. d = dict([tuple(line.split("=", 1)) for line in lines if line])
  498. if resp.status == 403:
  499. self.Auth = ""
  500. else:
  501. self.Auth = d['Auth']
  502. def request(self, method, request_uri, headers, content):
  503. """Modify the request headers to add the appropriate
  504. Authorization header."""
  505. headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
  506. AUTH_SCHEME_CLASSES = {
  507. "basic": BasicAuthentication,
  508. "wsse": WsseAuthentication,
  509. "digest": DigestAuthentication,
  510. "hmacdigest": HmacDigestAuthentication,
  511. "googlelogin": GoogleLoginAuthentication
  512. }
  513. AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
  514. def _md5(s):
  515. return
  516. class FileCache(object):
  517. """Uses a local directory as a store for cached files.
  518. Not really safe to use if multiple threads or processes are going to
  519. be running on the same cache.
  520. """
  521. def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
  522. self.cache = cache
  523. self.safe = safe
  524. if not os.path.exists(cache):
  525. os.makedirs(self.cache)
  526. def get(self, key):
  527. retval = None
  528. cacheFullPath = os.path.join(self.cache, self.safe(key))
  529. try:
  530. f = file(cacheFullPath, "r")
  531. retval = f.read()
  532. f.close()
  533. except IOError:
  534. pass
  535. return retval
  536. def set(self, key, value):
  537. cacheFullPath = os.path.join(self.cache, self.safe(key))
  538. f = file(cacheFullPath, "w")
  539. f.write(value)
  540. f.close()
  541. def delete(self, key):
  542. cacheFullPath = os.path.join(self.cache, self.safe(key))
  543. if os.path.exists(cacheFullPath):
  544. os.remove(cacheFullPath)
  545. class Credentials(object):
  546. def __init__(self):
  547. self.credentials = []
  548. def add(self, name, password, domain=""):
  549. self.credentials.append((domain.lower(), name, password))
  550. def clear(self):
  551. self.credentials = []
  552. def iter(self, domain):
  553. for (cdomain, name, password) in self.credentials:
  554. if cdomain == "" or domain == cdomain:
  555. yield (name, password)
  556. class KeyCerts(Credentials):
  557. """Identical to Credentials except that
  558. name/password are mapped to key/cert."""
  559. pass
  560. class ProxyInfo(object):
  561. """Collect information required to use a proxy."""
  562. def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
  563. """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
  564. constants. For example:
  565. p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
  566. """
  567. self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
  568. def astuple(self):
  569. return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
  570. self.proxy_user, self.proxy_pass)
  571. def isgood(self):
  572. return socks and (self.proxy_host != None) and (self.proxy_port != None)
  573. class HTTPConnectionWithTimeout(httplib.HTTPConnection):
  574. """HTTPConnection subclass that supports timeouts"""
  575. def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
  576. httplib.HTTPConnection.__init__(self, host, port, strict)
  577. self.timeout = timeout
  578. self.proxy_info = proxy_info
  579. def connect(self):
  580. """Connect to the host and port specified in __init__."""
  581. # Mostly verbatim from httplib.py.
  582. msg = "getaddrinfo returns an empty list"
  583. for res in socket.getaddrinfo(self.host, self.port, 0,
  584. socket.SOCK_STREAM):
  585. af, socktype, proto, canonname, sa = res
  586. try:
  587. if self.proxy_info and self.proxy_info.isgood():
  588. self.sock = socks.socksocket(af, socktype, proto)
  589. self.sock.setproxy(*self.proxy_info.astuple())
  590. else:
  591. self.sock = socket.socket(af, socktype, proto)
  592. # Different from httplib: support timeouts.
  593. if self.timeout is not None:
  594. self.sock.settimeout(self.timeout)
  595. # End of difference from httplib.
  596. if self.debuglevel > 0:
  597. print "connect: (%s, %s)" % (self.host, self.port)
  598. self.sock.connect(sa)
  599. except socket.error, msg:
  600. if self.debuglevel > 0:
  601. print 'connect fail:', (self.host, self.port)
  602. if self.sock:
  603. self.sock.close()
  604. self.sock = None
  605. continue
  606. break
  607. if not self.sock:
  608. raise socket.error, msg
  609. class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
  610. "This class allows communication via SSL."
  611. def __init__(self, host, port=None, key_file=None, cert_file=None,
  612. strict=None, timeout=None, proxy_info=None):
  613. self.timeout = timeout
  614. self.proxy_info = proxy_info
  615. httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
  616. cert_file=cert_file, strict=strict)
  617. def connect(self):
  618. "Connect to a host on a given (SSL) port."
  619. if self.proxy_info and self.proxy_info.isgood():
  620. self.sock.setproxy(*self.proxy_info.astuple())
  621. sock.setproxy(*self.proxy_info.astuple())
  622. else:
  623. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  624. if self.timeout is not None:
  625. sock.settimeout(self.timeout)
  626. sock.connect((self.host, self.port))
  627. ssl = socket.ssl(sock, self.key_file, self.cert_file)
  628. self.sock = httplib.FakeSocket(sock, ssl)
  629. def data_from_data_url(url):
  630. ## 
  631. i = url.find('base64,')
  632. if i < 0:
  633. return None
  634. decoded = base64.b64decode(url[i+len('base64,'):])
  635. return decoded
  636. class Http(object):
  637. """An HTTP client that handles:
  638. - all methods
  639. - caching
  640. - ETags
  641. - compression,
  642. - HTTPS
  643. - Basic
  644. - Digest
  645. - WSSE
  646. and more.
  647. """
  648. def __init__(self, cache=None, timeout=None, proxy_info=None):
  649. """The value of proxy_info is a ProxyInfo instance.
  650. If 'cache' is a string then it is used as a directory name
  651. for a disk cache. Otherwise it must be an object that supports
  652. the same interface as FileCache."""
  653. self.proxy_info = proxy_info
  654. # Map domain name to an httplib connection
  655. self.connections = {}
  656. # The location of the cache, for now a directory
  657. # where cached responses are held.
  658. if cache and isinstance(cache, str):
  659. self.cache = FileCache(cache)
  660. else:
  661. self.cache = cache
  662. # Name/password
  663. self.credentials = Credentials()
  664. # Key/cert
  665. self.certificates = KeyCerts()
  666. # authorization objects
  667. self.authorizations = []
  668. # If set to False then no redirects are followed, even safe ones.
  669. self.follow_redirects = True
  670. # If 'follow_redirects' is True, and this is set to True then
  671. # all redirecs are followed, including unsafe ones.
  672. self.follow_all_redirects = False
  673. self.ignore_etag = False
  674. self.force_exception_to_status_code = False
  675. self.timeout = timeout
  676. def _auth_from_challenge(self, host, request_uri, headers, response, content):
  677. """A generator that creates Authorization objects
  678. that can be applied to requests.
  679. """
  680. challenges = _parse_www_authenticate(response, 'www-authenticate')
  681. for cred in self.credentials.iter(host):
  682. for scheme in AUTH_SCHEME_ORDER:
  683. if challenges.has_key(scheme):
  684. yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
  685. def add_credentials(self, name, password, domain=""):
  686. """Add a name and password that will be used
  687. any time a request requires authentication."""
  688. self.credentials.add(name, password, domain)
  689. def add_certificate(self, key, cert, domain):
  690. """Add a key and cert that will be used
  691. any time a request requires authentication."""
  692. self.certificates.add(key, cert, domain)
  693. def clear_credentials(self):
  694. """Remove all the names and passwords
  695. that are used for authentication"""
  696. self.credentials.clear()
  697. self.authorizations = []
  698. def _conn_request(self, conn, request_uri, method, body, headers):
  699. for i in range(2):
  700. try:
  701. conn.request(method, request_uri, body, headers)
  702. response = conn.getresponse()
  703. except socket.gaierror:
  704. conn.close()
  705. raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
  706. except httplib.HTTPException, e:
  707. if i == 0:
  708. conn.close()
  709. conn.connect()
  710. continue
  711. else:
  712. raise
  713. else:
  714. content = response.read()
  715. response = Response(response)
  716. if method != "HEAD":
  717. content = _decompressContent(response, content)
  718. break;
  719. return (response, content)
  720. def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
  721. """Do the actual request using the connection object
  722. and also follow one level of redirects if necessary"""
  723. auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
  724. auth = auths and sorted(auths)[0][1] or None
  725. if auth:
  726. auth.request(method, request_uri, headers, body)
  727. (response, content) = self._conn_request(conn, request_uri, method, body, headers)
  728. if auth:
  729. if auth.response(response, body):
  730. auth.request(method, request_uri, headers, body)
  731. (response, content) = self._conn_request(conn, request_uri, method, body, headers )
  732. response._stale_digest = 1
  733. if response.status == 401:
  734. for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
  735. authorization.request(method, request_uri, headers, body)
  736. (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
  737. if response.status != 401:
  738. self.authorizations.append(authorization)
  739. authorization.response(response, body)
  740. break
  741. if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
  742. if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
  743. # Pick out the location header and basically start from the beginning
  744. # remembering first to strip the ETag header and decrement our 'depth'
  745. if redirections:
  746. if not response.has_key('location') and response.status != 300:
  747. raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
  748. # Fix-up relative redirects (which violate an RFC 2616 MUST)
  749. if response.has_key('location'):
  750. location = response['location']
  751. (scheme, authority, path, query, fragment) = parse_uri(location)
  752. if authority == None:
  753. response['location'] = urlparse.urljoin(absolute_uri, location)
  754. if response.status == 301 and method in ["GET", "HEAD"]:
  755. response['-x-permanent-redirect-url'] = response['location']
  756. if not response.has_key('content-location'):
  757. response['content-location'] = absolute_uri
  758. _updateCache(headers, response, content, self.cache, cachekey)
  759. if headers.has_key('if-none-match'):
  760. del headers['if-none-match']
  761. if headers.has_key('if-modified-since'):
  762. del headers['if-modified-since']
  763. if response.has_key('location'):
  764. location = response['location']
  765. old_response = copy.deepcopy(response)
  766. if not old_response.has_key('content-location'):
  767. old_response['content-location'] = absolute_uri
  768. redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
  769. (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
  770. response.previous = old_response
  771. else:
  772. raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
  773. elif response.status in [200, 203] and method == "GET":
  774. # Don't cache 206's since we aren't going to handle byte range requests
  775. if not response.has_key('content-location'):
  776. response['content-location'] = absolute_uri
  777. _updateCache(headers, response, content, self.cache, cachekey)
  778. return (response, content)
  779. # Need to catch and rebrand some exceptions
  780. # Then need to optionally turn all exceptions into status codes
  781. # including all socket.* and httplib.* exceptions.
  782. def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
  783. """ Performs a single HTTP request.
  784. The 'uri' is the URI of the HTTP resource and can begin
  785. with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
  786. The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
  787. There is no restriction on the methods allowed.
  788. The 'body' is the entity body to be sent with the request. It is a string
  789. object.
  790. Any extra headers that are to be sent with the request should be provided in the
  791. 'headers' dictionary.
  792. The maximum number of redirect to follow before raising an
  793. exception is 'redirections. The default is 5.
  794. The return value is a tuple of (response, content), the first
  795. being and instance of the 'Response' class, the second being
  796. a string that contains the response entity body.
  797. """
  798. try:
  799. if headers is None:
  800. headers = {}
  801. else:
  802. headers = _normalize_headers(headers)
  803. if not headers.has_key('user-agent'):
  804. headers['user-agent'] = "Python-httplib2/%s" % __version__
  805. ## special-case data: urls
  806. if uri.startswith("data:"):
  807. info = { 'status' : '200' }
  808. content = data_from_data_url(uri)
  809. if not content:
  810. raise Exception("Failed to parse data url")
  811. response = Response(info)
  812. return (response, content)
  813. uri = iri2uri(uri)
  814. (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
  815. conn_key = scheme+":"+authority
  816. if conn_key in self.connections:
  817. conn = self.connections[conn_key]
  818. else:
  819. if not connection_type:
  820. connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
  821. certs = list(self.certificates.iter(authority))
  822. if scheme == 'https' and certs:
  823. conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
  824. cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
  825. else:
  826. conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
  827. conn.set_debuglevel(debuglevel)
  828. if method in ["GET", "HEAD"] and 'range' not in headers:
  829. headers['accept-encoding'] = 'compress, gzip'
  830. cache_control = _parse_cache_control(headers)
  831. info = email.Message.Message()
  832. cached_value = None
  833. if self.cache:
  834. cachekey = defrag_uri
  835. cached_value = self.cache.get(cachekey)
  836. if cached_value:
  837. info = email.message_from_string(cached_value)
  838. try:
  839. content = cached_value.split('\r\n\r\n', 1)[1]
  840. except IndexError:
  841. self.cache.delete(cachekey)
  842. cachekey = None
  843. cached_value = None
  844. else:
  845. cachekey = None
  846. if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
  847. # http://www.w3.org/1999/04/Editing/
  848. headers['if-match'] = info['etag']
  849. if method not in ["GET", "HEAD"] and self.cache and cachekey:
  850. # RFC 2616 Section 13.10
  851. self.cache.delete(cachekey)
  852. if not cached_value and cache_control.has_key('only-if-cached'):
  853. # Immediately handle the case where we are instructed to only
  854. # check the cache; RFC 2616 Section 14.9.4
  855. info['status'] = '504'
  856. content = ""
  857. response = Response(info)
  858. return (response, content)
  859. elif cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
  860. if info.has_key('-x-permanent-redirect-url'):
  861. # Should cached permanent redirects be counted in our redirection count? For now, yes.
  862. (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
  863. response.previous = Response(info)
  864. response.previous.fromcache = True
  865. else:
  866. # Determine our course of action:
  867. # Is the cached entry fresh or stale?
  868. # Has the client requested a non-cached response?
  869. #
  870. # There seems to be three possible answers:
  871. # 1. [FRESH] Return the cache entry w/o doing a GET
  872. # 2. [STALE] Do the GET (but add in cache validators if available)
  873. # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
  874. entry_disposition = _entry_disposition(info, headers)
  875. if entry_disposition == "FRESH":
  876. response = Response(info)
  877. response.fromcache = True
  878. return (response, content)
  879. if entry_disposition == "STALE":
  880. if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
  881. headers['if-none-match'] = info['etag']
  882. if info.has_key('last-modified') and not 'last-modified' in headers:
  883. headers['if-modified-since'] = info['last-modified']
  884. elif entry_disposition == "TRANSPARENT":
  885. pass
  886. (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
  887. if response.status == 304 and method == "GET":
  888. # Rewrite the cache entry with the new end-to-end headers
  889. # Take all headers that are in response
  890. # and overwrite their values in info.
  891. # unless they are hop-by-hop, or are listed in the connection header.
  892. for key in _get_end2end_headers(response):
  893. info[key] = response[key]
  894. merged_response = Response(info)
  895. if hasattr(response, "_stale_digest"):
  896. merged_response._stale_digest = response._stale_digest
  897. _updateCache(headers, merged_response, content, self.cache, cachekey)
  898. response = merged_response
  899. response.status = 200
  900. response.fromcache = True
  901. elif response.status == 200:
  902. content = new_content
  903. else:
  904. self.cache.delete(cachekey)
  905. content = new_content
  906. else:
  907. (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
  908. except Exception, e:
  909. if self.force_exception_to_status_code:
  910. if isinstance(e, HttpLib2ErrorWithResponse):
  911. response = e.response
  912. content = e.content
  913. response.status = 500
  914. response.reason = str(e)
  915. elif isinstance(e, socket.timeout):
  916. content = "Request Timeout"
  917. response = Response( {
  918. "content-type": "text/plain",
  919. "status": "408",
  920. "content-length": len(content)
  921. })
  922. response.reason = "Request Timeout"
  923. else:
  924. content = str(e)
  925. response = Response( {
  926. "content-type": "text/plain",
  927. "status": "400",
  928. "content-length": len(content)
  929. })
  930. response.reason = "Bad Request"
  931. else:
  932. raise
  933. return (response, content)
  934. class Response(dict):
  935. """An object more like email.Message than httplib.HTTPResponse."""
  936. """Is this response from our local cache"""
  937. fromcache = False
  938. """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
  939. version = 11
  940. "Status code returned by server. "
  941. status = 200
  942. """Reason phrase returned by server."""
  943. reason = "Ok"
  944. previous = None
  945. def __init__(self, info):
  946. # info is either an email.Message or
  947. # an httplib.HTTPResponse object or just a dict
  948. if isinstance(info, httplib.HTTPResponse):
  949. for key, value in info.getheaders():
  950. self[key] = value
  951. self.status = info.status
  952. self['status'] = str(self.status)
  953. self.reason = info.reason
  954. self.version = info.version
  955. elif isinstance(info, email.Message.Message):
  956. for key, value in info.items():
  957. self[key] = value
  958. self.status = int(self['status'])
  959. else:
  960. for key, value in info.iteritems():
  961. self[key] = value
  962. self.status = int(self.get('status', self.status))
  963. def __getattr__(self, name):
  964. if name == 'dict':
  965. return self
  966. else:
  967. raise AttributeError, name