/mercurial/httpconnection.py
Python | 287 lines | 199 code | 27 blank | 61 comment | 59 complexity | b9004ceab99db701cd37b97c4669770f MD5 | raw file
Possible License(s): GPL-2.0
- # httpconnection.py - urllib2 handler for new http support
- #
- # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
- # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
- # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
- # Copyright 2011 Google, Inc.
- #
- # This software may be used and distributed according to the terms of the
- # GNU General Public License version 2 or any later version.
- import logging
- import socket
- import urllib
- import urllib2
- import os
- from mercurial import httpclient
- from mercurial import sslutil
- from mercurial import util
- from mercurial.i18n import _
- # moved here from url.py to avoid a cycle
- class httpsendfile(object):
- """This is a wrapper around the objects returned by python's "open".
- Its purpose is to send file-like objects via HTTP.
- It do however not define a __len__ attribute because the length
- might be more than Py_ssize_t can handle.
- """
- def __init__(self, ui, *args, **kwargs):
- # We can't just "self._data = open(*args, **kwargs)" here because there
- # is an "open" function defined in this module that shadows the global
- # one
- self.ui = ui
- self._data = open(*args, **kwargs)
- self.seek = self._data.seek
- self.close = self._data.close
- self.write = self._data.write
- self.length = os.fstat(self._data.fileno()).st_size
- self._pos = 0
- self._total = self.length // 1024 * 2
- def read(self, *args, **kwargs):
- try:
- ret = self._data.read(*args, **kwargs)
- except EOFError:
- self.ui.progress(_('sending'), None)
- self._pos += len(ret)
- # We pass double the max for total because we currently have
- # to send the bundle twice in the case of a server that
- # requires authentication. Since we can't know until we try
- # once whether authentication will be required, just lie to
- # the user and maybe the push succeeds suddenly at 50%.
- self.ui.progress(_('sending'), self._pos // 1024,
- unit=_('kb'), total=self._total)
- return ret
- # moved here from url.py to avoid a cycle
- def readauthforuri(ui, uri, user):
- # Read configuration
- config = dict()
- for key, val in ui.configitems('auth'):
- if '.' not in key:
- ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
- continue
- group, setting = key.rsplit('.', 1)
- gdict = config.setdefault(group, dict())
- if setting in ('username', 'cert', 'key'):
- val = util.expandpath(val)
- gdict[setting] = val
- # Find the best match
- if '://' in uri:
- scheme, hostpath = uri.split('://', 1)
- else:
- # Python 2.4.1 doesn't provide the full URI
- scheme, hostpath = 'http', uri
- bestuser = None
- bestlen = 0
- bestauth = None
- for group, auth in config.iteritems():
- if user and user != auth.get('username', user):
- # If a username was set in the URI, the entry username
- # must either match it or be unset
- continue
- prefix = auth.get('prefix')
- if not prefix:
- continue
- p = prefix.split('://', 1)
- if len(p) > 1:
- schemes, prefix = [p[0]], p[1]
- else:
- schemes = (auth.get('schemes') or 'https').split()
- if (prefix == '*' or hostpath.startswith(prefix)) and \
- (len(prefix) > bestlen or (len(prefix) == bestlen and \
- not bestuser and 'username' in auth)) \
- and scheme in schemes:
- bestlen = len(prefix)
- bestauth = group, auth
- bestuser = auth.get('username')
- if user and not bestuser:
- auth['username'] = user
- return bestauth
- # Mercurial (at least until we can remove the old codepath) requires
- # that the http response object be sufficiently file-like, so we
- # provide a close() method here.
- class HTTPResponse(httpclient.HTTPResponse):
- def close(self):
- pass
- class HTTPConnection(httpclient.HTTPConnection):
- response_class = HTTPResponse
- def request(self, method, uri, body=None, headers={}):
- if isinstance(body, httpsendfile):
- body.seek(0)
- httpclient.HTTPConnection.request(self, method, uri, body=body,
- headers=headers)
- _configuredlogging = False
- LOGFMT = '%(levelname)s:%(name)s:%(lineno)d:%(message)s'
- # Subclass BOTH of these because otherwise urllib2 "helpfully"
- # reinserts them since it notices we don't include any subclasses of
- # them.
- class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
- def __init__(self, ui, pwmgr):
- global _configuredlogging
- urllib2.AbstractHTTPHandler.__init__(self)
- self.ui = ui
- self.pwmgr = pwmgr
- self._connections = {}
- loglevel = ui.config('ui', 'http2debuglevel', default=None)
- if loglevel and not _configuredlogging:
- _configuredlogging = True
- logger = logging.getLogger('mercurial.httpclient')
- logger.setLevel(getattr(logging, loglevel.upper()))
- handler = logging.StreamHandler()
- handler.setFormatter(logging.Formatter(LOGFMT))
- logger.addHandler(handler)
- def close_all(self):
- """Close and remove all connection objects being kept for reuse."""
- for openconns in self._connections.values():
- for conn in openconns:
- conn.close()
- self._connections = {}
- # shamelessly borrowed from urllib2.AbstractHTTPHandler
- def do_open(self, http_class, req, use_ssl):
- """Return an addinfourl object for the request, using http_class.
- http_class must implement the HTTPConnection API from httplib.
- The addinfourl return value is a file-like object. It also
- has methods and attributes including:
- - info(): return a mimetools.Message object for the headers
- - geturl(): return the original request URL
- - code: HTTP status code
- """
- # If using a proxy, the host returned by get_host() is
- # actually the proxy. On Python 2.6.1, the real destination
- # hostname is encoded in the URI in the urllib2 request
- # object. On Python 2.6.5, it's stored in the _tunnel_host
- # attribute which has no accessor.
- tunhost = getattr(req, '_tunnel_host', None)
- host = req.get_host()
- if tunhost:
- proxyhost = host
- host = tunhost
- elif req.has_proxy():
- proxyhost = req.get_host()
- host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
- else:
- proxyhost = None
- if proxyhost:
- if ':' in proxyhost:
- # Note: this means we'll explode if we try and use an
- # IPv6 http proxy. This isn't a regression, so we
- # won't worry about it for now.
- proxyhost, proxyport = proxyhost.rsplit(':', 1)
- else:
- proxyport = 3128 # squid default
- proxy = (proxyhost, proxyport)
- else:
- proxy = None
- if not host:
- raise urllib2.URLError('no host given')
- connkey = use_ssl, host, proxy
- allconns = self._connections.get(connkey, [])
- conns = [c for c in allconns if not c.busy()]
- if conns:
- h = conns[0]
- else:
- if allconns:
- self.ui.debug('all connections for %s busy, making a new '
- 'one\n' % host)
- timeout = None
- if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
- timeout = req.timeout
- h = http_class(host, timeout=timeout, proxy_hostport=proxy)
- self._connections.setdefault(connkey, []).append(h)
- headers = dict(req.headers)
- headers.update(req.unredirected_hdrs)
- headers = dict(
- (name.title(), val) for name, val in headers.items())
- try:
- path = req.get_selector()
- if '://' in path:
- path = path.split('://', 1)[1].split('/', 1)[1]
- if path[0] != '/':
- path = '/' + path
- h.request(req.get_method(), path, req.data, headers)
- r = h.getresponse()
- except socket.error, err: # XXX what error?
- raise urllib2.URLError(err)
- # Pick apart the HTTPResponse object to get the addinfourl
- # object initialized properly.
- r.recv = r.read
- resp = urllib.addinfourl(r, r.headers, req.get_full_url())
- resp.code = r.status
- resp.msg = r.reason
- return resp
- # httplib always uses the given host/port as the socket connect
- # target, and then allows full URIs in the request path, which it
- # then observes and treats as a signal to do proxying instead.
- def http_open(self, req):
- if req.get_full_url().startswith('https'):
- return self.https_open(req)
- def makehttpcon(*args, **kwargs):
- k2 = dict(kwargs)
- k2['use_ssl'] = False
- return HTTPConnection(*args, **k2)
- return self.do_open(makehttpcon, req, False)
- def https_open(self, req):
- # req.get_full_url() does not contain credentials and we may
- # need them to match the certificates.
- url = req.get_full_url()
- user, password = self.pwmgr.find_stored_password(url)
- res = readauthforuri(self.ui, url, user)
- if res:
- group, auth = res
- self.auth = auth
- self.ui.debug("using auth.%s.* for authentication\n" % group)
- else:
- self.auth = None
- return self.do_open(self._makesslconnection, req, True)
- def _makesslconnection(self, host, port=443, *args, **kwargs):
- keyfile = None
- certfile = None
- if args: # key_file
- keyfile = args.pop(0)
- if args: # cert_file
- certfile = args.pop(0)
- # if the user has specified different key/cert files in
- # hgrc, we prefer these
- if self.auth and 'key' in self.auth and 'cert' in self.auth:
- keyfile = self.auth['key']
- certfile = self.auth['cert']
- # let host port take precedence
- if ':' in host and '[' not in host or ']:' in host:
- host, port = host.rsplit(':', 1)
- port = int(port)
- if '[' in host:
- host = host[1:-1]
- kwargs['keyfile'] = keyfile
- kwargs['certfile'] = certfile
- kwargs.update(sslutil.sslkwargs(self.ui, host))
- con = HTTPConnection(host, port, use_ssl=True,
- ssl_wrap_socket=sslutil.ssl_wrap_socket,
- ssl_validator=sslutil.validator(self.ui, host),
- **kwargs)
- return con