PageRenderTime 1004ms CodeModel.GetById 131ms app.highlight 707ms RepoModel.GetById 148ms app.codeStats 1ms

/Lib/urllib.py

http://unladen-swallow.googlecode.com/
Python | 1625 lines | 1614 code | 3 blank | 8 comment | 28 complexity | 0fefd7a61481221c9e3561a1774f3156 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1"""Open an arbitrary URL.
   2
   3See the following document for more info on URLs:
   4"Names and Addresses, URIs, URLs, URNs, URCs", at
   5http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7See also the HTTP spec (from which the error codes are derived):
   8"HTTP - Hypertext Transfer Protocol", at
   9http://www.w3.org/pub/WWW/Protocols/
  10
  11Related standards and specs:
  12- RFC1808: the "relative URL" spec. (authoritative status)
  13- RFC1738 - the "URL standard". (authoritative status)
  14- RFC1630 - the "URI spec". (informational status)
  15
  16The object returned by URLopener().open(file) will differ per
  17protocol.  All you know is that is has methods read(), readline(),
  18readlines(), fileno(), close() and info().  The read*(), fileno()
  19and close() methods work like those of open files.
  20The info() method returns a mimetools.Message object which can be
  21used to query various info about the object, if available.
  22(mimetools.Message objects are queried with the getheader() method.)
  23"""
  24
  25import string
  26import socket
  27import os
  28import time
  29import sys
  30from urlparse import urljoin as basejoin
  31import warnings
  32
  33__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  34           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  35           "urlencode", "url2pathname", "pathname2url", "splittag",
  36           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  37           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  38           "splitnport", "splitquery", "splitattr", "splitvalue",
  39           "getproxies"]
  40
  41__version__ = '1.17'    # XXX This version is not always updated :-(
  42
  43MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  44
  45# Helper for non-unix systems
  46if os.name == 'mac':
  47    from macurl2path import url2pathname, pathname2url
  48elif os.name == 'nt':
  49    from nturl2path import url2pathname, pathname2url
  50elif os.name == 'riscos':
  51    from rourl2path import url2pathname, pathname2url
  52else:
  53    def url2pathname(pathname):
  54        """OS-specific conversion from a relative URL of the 'file' scheme
  55        to a file system path; not recommended for general use."""
  56        return unquote(pathname)
  57
  58    def pathname2url(pathname):
  59        """OS-specific conversion from a file system path to a relative URL
  60        of the 'file' scheme; not recommended for general use."""
  61        return quote(pathname)
  62
  63# This really consists of two pieces:
  64# (1) a class which handles opening of all sorts of URLs
  65#     (plus assorted utilities etc.)
  66# (2) a set of functions for parsing URLs
  67# XXX Should these be separated out into different modules?
  68
  69
  70# Shortcut for basic usage
  71_urlopener = None
  72def urlopen(url, data=None, proxies=None):
  73    """Create a file-like object for the specified URL to read from."""
  74    from warnings import warnpy3k
  75    warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
  76                        "favor of urllib2.urlopen()", stacklevel=2)
  77
  78    global _urlopener
  79    if proxies is not None:
  80        opener = FancyURLopener(proxies=proxies)
  81    elif not _urlopener:
  82        opener = FancyURLopener()
  83        _urlopener = opener
  84    else:
  85        opener = _urlopener
  86    if data is None:
  87        return opener.open(url)
  88    else:
  89        return opener.open(url, data)
  90def urlretrieve(url, filename=None, reporthook=None, data=None):
  91    global _urlopener
  92    if not _urlopener:
  93        _urlopener = FancyURLopener()
  94    return _urlopener.retrieve(url, filename, reporthook, data)
  95def urlcleanup():
  96    if _urlopener:
  97        _urlopener.cleanup()
  98
  99# check for SSL
 100try:
 101    import ssl
 102except:
 103    _have_ssl = False
 104else:
 105    _have_ssl = True
 106
 107# exception raised when downloaded size does not match content-length
 108class ContentTooShortError(IOError):
 109    def __init__(self, message, content):
 110        IOError.__init__(self, message)
 111        self.content = content
 112
 113ftpcache = {}
 114class URLopener:
 115    """Class to open URLs.
 116    This is a class rather than just a subroutine because we may need
 117    more than one set of global protocol-specific options.
 118    Note -- this is a base class for those who don't want the
 119    automatic handling of errors type 302 (relocated) and 401
 120    (authorization needed)."""
 121
 122    __tempfiles = None
 123
 124    version = "Python-urllib/%s" % __version__
 125
 126    # Constructor
 127    def __init__(self, proxies=None, **x509):
 128        if proxies is None:
 129            proxies = getproxies()
 130        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 131        self.proxies = proxies
 132        self.key_file = x509.get('key_file')
 133        self.cert_file = x509.get('cert_file')
 134        self.addheaders = [('User-Agent', self.version)]
 135        self.__tempfiles = []
 136        self.__unlink = os.unlink # See cleanup()
 137        self.tempcache = None
 138        # Undocumented feature: if you assign {} to tempcache,
 139        # it is used to cache files retrieved with
 140        # self.retrieve().  This is not enabled by default
 141        # since it does not work for changing documents (and I
 142        # haven't got the logic to check expiration headers
 143        # yet).
 144        self.ftpcache = ftpcache
 145        # Undocumented feature: you can use a different
 146        # ftp cache by assigning to the .ftpcache member;
 147        # in case you want logically independent URL openers
 148        # XXX This is not threadsafe.  Bah.
 149
 150    def __del__(self):
 151        self.close()
 152
 153    def close(self):
 154        self.cleanup()
 155
 156    def cleanup(self):
 157        # This code sometimes runs when the rest of this module
 158        # has already been deleted, so it can't use any globals
 159        # or import anything.
 160        if self.__tempfiles:
 161            for file in self.__tempfiles:
 162                try:
 163                    self.__unlink(file)
 164                except OSError:
 165                    pass
 166            del self.__tempfiles[:]
 167        if self.tempcache:
 168            self.tempcache.clear()
 169
 170    def addheader(self, *args):
 171        """Add a header to be used by the HTTP interface only
 172        e.g. u.addheader('Accept', 'sound/basic')"""
 173        self.addheaders.append(args)
 174
 175    # External interface
 176    def open(self, fullurl, data=None):
 177        """Use URLopener().open(file) instead of open(file, 'r')."""
 178        fullurl = unwrap(toBytes(fullurl))
 179        # percent encode url. fixing lame server errors like space within url
 180        # parts
 181        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
 182        if self.tempcache and fullurl in self.tempcache:
 183            filename, headers = self.tempcache[fullurl]
 184            fp = open(filename, 'rb')
 185            return addinfourl(fp, headers, fullurl)
 186        urltype, url = splittype(fullurl)
 187        if not urltype:
 188            urltype = 'file'
 189        if urltype in self.proxies:
 190            proxy = self.proxies[urltype]
 191            urltype, proxyhost = splittype(proxy)
 192            host, selector = splithost(proxyhost)
 193            url = (host, fullurl) # Signal special case to open_*()
 194        else:
 195            proxy = None
 196        name = 'open_' + urltype
 197        self.type = urltype
 198        name = name.replace('-', '_')
 199        if not hasattr(self, name):
 200            if proxy:
 201                return self.open_unknown_proxy(proxy, fullurl, data)
 202            else:
 203                return self.open_unknown(fullurl, data)
 204        try:
 205            if data is None:
 206                return getattr(self, name)(url)
 207            else:
 208                return getattr(self, name)(url, data)
 209        except socket.error, msg:
 210            raise IOError, ('socket error', msg), sys.exc_info()[2]
 211
 212    def open_unknown(self, fullurl, data=None):
 213        """Overridable interface to open unknown URL type."""
 214        type, url = splittype(fullurl)
 215        raise IOError, ('url error', 'unknown url type', type)
 216
 217    def open_unknown_proxy(self, proxy, fullurl, data=None):
 218        """Overridable interface to open unknown URL type."""
 219        type, url = splittype(fullurl)
 220        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 221
 222    # External interface
 223    def retrieve(self, url, filename=None, reporthook=None, data=None):
 224        """retrieve(url) returns (filename, headers) for a local object
 225        or (tempfilename, headers) for a remote object."""
 226        url = unwrap(toBytes(url))
 227        if self.tempcache and url in self.tempcache:
 228            return self.tempcache[url]
 229        type, url1 = splittype(url)
 230        if filename is None and (not type or type == 'file'):
 231            try:
 232                fp = self.open_local_file(url1)
 233                hdrs = fp.info()
 234                del fp
 235                return url2pathname(splithost(url1)[1]), hdrs
 236            except IOError, msg:
 237                pass
 238        fp = self.open(url, data)
 239        try:
 240            headers = fp.info()
 241            if filename:
 242                tfp = open(filename, 'wb')
 243            else:
 244                import tempfile
 245                garbage, path = splittype(url)
 246                garbage, path = splithost(path or "")
 247                path, garbage = splitquery(path or "")
 248                path, garbage = splitattr(path or "")
 249                suffix = os.path.splitext(path)[1]
 250                (fd, filename) = tempfile.mkstemp(suffix)
 251                self.__tempfiles.append(filename)
 252                tfp = os.fdopen(fd, 'wb')
 253            try:
 254                result = filename, headers
 255                if self.tempcache is not None:
 256                    self.tempcache[url] = result
 257                bs = 1024*8
 258                size = -1
 259                read = 0
 260                blocknum = 0
 261                if reporthook:
 262                    if "content-length" in headers:
 263                        size = int(headers["Content-Length"])
 264                    reporthook(blocknum, bs, size)
 265                while 1:
 266                    block = fp.read(bs)
 267                    if block == "":
 268                        break
 269                    read += len(block)
 270                    tfp.write(block)
 271                    blocknum += 1
 272                    if reporthook:
 273                        reporthook(blocknum, bs, size)
 274            finally:
 275                tfp.close()
 276        finally:
 277            fp.close()
 278        del fp
 279        del tfp
 280
 281        # raise exception if actual size does not match content-length header
 282        if size >= 0 and read < size:
 283            raise ContentTooShortError("retrieval incomplete: got only %i out "
 284                                       "of %i bytes" % (read, size), result)
 285
 286        return result
 287
 288    # Each method named open_<type> knows how to open that type of URL
 289
 290    def open_http(self, url, data=None):
 291        """Use HTTP protocol."""
 292        import httplib
 293        user_passwd = None
 294        proxy_passwd= None
 295        if isinstance(url, str):
 296            host, selector = splithost(url)
 297            if host:
 298                user_passwd, host = splituser(host)
 299                host = unquote(host)
 300            realhost = host
 301        else:
 302            host, selector = url
 303            # check whether the proxy contains authorization information
 304            proxy_passwd, host = splituser(host)
 305            # now we proceed with the url we want to obtain
 306            urltype, rest = splittype(selector)
 307            url = rest
 308            user_passwd = None
 309            if urltype.lower() != 'http':
 310                realhost = None
 311            else:
 312                realhost, rest = splithost(rest)
 313                if realhost:
 314                    user_passwd, realhost = splituser(realhost)
 315                if user_passwd:
 316                    selector = "%s://%s%s" % (urltype, realhost, rest)
 317                if proxy_bypass(realhost):
 318                    host = realhost
 319
 320            #print "proxy via http:", host, selector
 321        if not host: raise IOError, ('http error', 'no host given')
 322
 323        if proxy_passwd:
 324            import base64
 325            proxy_auth = base64.b64encode(proxy_passwd).strip()
 326        else:
 327            proxy_auth = None
 328
 329        if user_passwd:
 330            import base64
 331            auth = base64.b64encode(user_passwd).strip()
 332        else:
 333            auth = None
 334        h = httplib.HTTP(host)
 335        if data is not None:
 336            h.putrequest('POST', selector)
 337            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
 338            h.putheader('Content-Length', '%d' % len(data))
 339        else:
 340            h.putrequest('GET', selector)
 341        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 342        if auth: h.putheader('Authorization', 'Basic %s' % auth)
 343        if realhost: h.putheader('Host', realhost)
 344        for args in self.addheaders: h.putheader(*args)
 345        h.endheaders()
 346        if data is not None:
 347            h.send(data)
 348        errcode, errmsg, headers = h.getreply()
 349        fp = h.getfile()
 350        if errcode == -1:
 351            if fp: fp.close()
 352            # something went wrong with the HTTP status line
 353            raise IOError, ('http protocol error', 0,
 354                            'got a bad status line', None)
 355        # According to RFC 2616, "2xx" code indicates that the client's
 356        # request was successfully received, understood, and accepted.
 357        if (200 <= errcode < 300):
 358            return addinfourl(fp, headers, "http:" + url, errcode)
 359        else:
 360            if data is None:
 361                return self.http_error(url, fp, errcode, errmsg, headers)
 362            else:
 363                return self.http_error(url, fp, errcode, errmsg, headers, data)
 364
 365    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 366        """Handle http errors.
 367        Derived class can override this, or provide specific handlers
 368        named http_error_DDD where DDD is the 3-digit error code."""
 369        # First check if there's a specific handler for this error
 370        name = 'http_error_%d' % errcode
 371        if hasattr(self, name):
 372            method = getattr(self, name)
 373            if data is None:
 374                result = method(url, fp, errcode, errmsg, headers)
 375            else:
 376                result = method(url, fp, errcode, errmsg, headers, data)
 377            if result: return result
 378        return self.http_error_default(url, fp, errcode, errmsg, headers)
 379
 380    def http_error_default(self, url, fp, errcode, errmsg, headers):
 381        """Default error handler: close the connection and raise IOError."""
 382        void = fp.read()
 383        fp.close()
 384        raise IOError, ('http error', errcode, errmsg, headers)
 385
 386    if _have_ssl:
 387        def open_https(self, url, data=None):
 388            """Use HTTPS protocol."""
 389
 390            import httplib
 391            user_passwd = None
 392            proxy_passwd = None
 393            if isinstance(url, str):
 394                host, selector = splithost(url)
 395                if host:
 396                    user_passwd, host = splituser(host)
 397                    host = unquote(host)
 398                realhost = host
 399            else:
 400                host, selector = url
 401                # here, we determine, whether the proxy contains authorization information
 402                proxy_passwd, host = splituser(host)
 403                urltype, rest = splittype(selector)
 404                url = rest
 405                user_passwd = None
 406                if urltype.lower() != 'https':
 407                    realhost = None
 408                else:
 409                    realhost, rest = splithost(rest)
 410                    if realhost:
 411                        user_passwd, realhost = splituser(realhost)
 412                    if user_passwd:
 413                        selector = "%s://%s%s" % (urltype, realhost, rest)
 414                #print "proxy via https:", host, selector
 415            if not host: raise IOError, ('https error', 'no host given')
 416            if proxy_passwd:
 417                import base64
 418                proxy_auth = base64.b64encode(proxy_passwd).strip()
 419            else:
 420                proxy_auth = None
 421            if user_passwd:
 422                import base64
 423                auth = base64.b64encode(user_passwd).strip()
 424            else:
 425                auth = None
 426            h = httplib.HTTPS(host, 0,
 427                              key_file=self.key_file,
 428                              cert_file=self.cert_file)
 429            if data is not None:
 430                h.putrequest('POST', selector)
 431                h.putheader('Content-Type',
 432                            'application/x-www-form-urlencoded')
 433                h.putheader('Content-Length', '%d' % len(data))
 434            else:
 435                h.putrequest('GET', selector)
 436            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 437            if auth: h.putheader('Authorization', 'Basic %s' % auth)
 438            if realhost: h.putheader('Host', realhost)
 439            for args in self.addheaders: h.putheader(*args)
 440            h.endheaders()
 441            if data is not None:
 442                h.send(data)
 443            errcode, errmsg, headers = h.getreply()
 444            fp = h.getfile()
 445            if errcode == -1:
 446                if fp: fp.close()
 447                # something went wrong with the HTTP status line
 448                raise IOError, ('http protocol error', 0,
 449                                'got a bad status line', None)
 450            # According to RFC 2616, "2xx" code indicates that the client's
 451            # request was successfully received, understood, and accepted.
 452            if (200 <= errcode < 300):
 453                return addinfourl(fp, headers, "https:" + url, errcode)
 454            else:
 455                if data is None:
 456                    return self.http_error(url, fp, errcode, errmsg, headers)
 457                else:
 458                    return self.http_error(url, fp, errcode, errmsg, headers,
 459                                           data)
 460
 461    def open_file(self, url):
 462        """Use local file or FTP depending on form of URL."""
 463        if not isinstance(url, str):
 464            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
 465        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 466            return self.open_ftp(url)
 467        else:
 468            return self.open_local_file(url)
 469
 470    def open_local_file(self, url):
 471        """Use local file."""
 472        import mimetypes, mimetools, email.utils
 473        try:
 474            from cStringIO import StringIO
 475        except ImportError:
 476            from StringIO import StringIO
 477        host, file = splithost(url)
 478        localname = url2pathname(file)
 479        try:
 480            stats = os.stat(localname)
 481        except OSError, e:
 482            raise IOError(e.errno, e.strerror, e.filename)
 483        size = stats.st_size
 484        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
 485        mtype = mimetypes.guess_type(url)[0]
 486        headers = mimetools.Message(StringIO(
 487            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 488            (mtype or 'text/plain', size, modified)))
 489        if not host:
 490            urlfile = file
 491            if file[:1] == '/':
 492                urlfile = 'file://' + file
 493            return addinfourl(open(localname, 'rb'),
 494                              headers, urlfile)
 495        host, port = splitport(host)
 496        if not port \
 497           and socket.gethostbyname(host) in (localhost(), thishost()):
 498            urlfile = file
 499            if file[:1] == '/':
 500                urlfile = 'file://' + file
 501            return addinfourl(open(localname, 'rb'),
 502                              headers, urlfile)
 503        raise IOError, ('local file error', 'not on local host')
 504
 505    def open_ftp(self, url):
 506        """Use FTP protocol."""
 507        if not isinstance(url, str):
 508            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
 509        import mimetypes, mimetools
 510        try:
 511            from cStringIO import StringIO
 512        except ImportError:
 513            from StringIO import StringIO
 514        host, path = splithost(url)
 515        if not host: raise IOError, ('ftp error', 'no host given')
 516        host, port = splitport(host)
 517        user, host = splituser(host)
 518        if user: user, passwd = splitpasswd(user)
 519        else: passwd = None
 520        host = unquote(host)
 521        user = unquote(user or '')
 522        passwd = unquote(passwd or '')
 523        host = socket.gethostbyname(host)
 524        if not port:
 525            import ftplib
 526            port = ftplib.FTP_PORT
 527        else:
 528            port = int(port)
 529        path, attrs = splitattr(path)
 530        path = unquote(path)
 531        dirs = path.split('/')
 532        dirs, file = dirs[:-1], dirs[-1]
 533        if dirs and not dirs[0]: dirs = dirs[1:]
 534        if dirs and not dirs[0]: dirs[0] = '/'
 535        key = user, host, port, '/'.join(dirs)
 536        # XXX thread unsafe!
 537        if len(self.ftpcache) > MAXFTPCACHE:
 538            # Prune the cache, rather arbitrarily
 539            for k in self.ftpcache.keys():
 540                if k != key:
 541                    v = self.ftpcache[k]
 542                    del self.ftpcache[k]
 543                    v.close()
 544        try:
 545            if not key in self.ftpcache:
 546                self.ftpcache[key] = \
 547                    ftpwrapper(user, passwd, host, port, dirs)
 548            if not file: type = 'D'
 549            else: type = 'I'
 550            for attr in attrs:
 551                attr, value = splitvalue(attr)
 552                if attr.lower() == 'type' and \
 553                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
 554                    type = value.upper()
 555            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 556            mtype = mimetypes.guess_type("ftp:" + url)[0]
 557            headers = ""
 558            if mtype:
 559                headers += "Content-Type: %s\n" % mtype
 560            if retrlen is not None and retrlen >= 0:
 561                headers += "Content-Length: %d\n" % retrlen
 562            headers = mimetools.Message(StringIO(headers))
 563            return addinfourl(fp, headers, "ftp:" + url)
 564        except ftperrors(), msg:
 565            raise IOError, ('ftp error', msg), sys.exc_info()[2]
 566
 567    def open_data(self, url, data=None):
 568        """Use "data" URL."""
 569        if not isinstance(url, str):
 570            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
 571        # ignore POSTed data
 572        #
 573        # syntax of data URLs:
 574        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 575        # mediatype := [ type "/" subtype ] *( ";" parameter )
 576        # data      := *urlchar
 577        # parameter := attribute "=" value
 578        import mimetools
 579        try:
 580            from cStringIO import StringIO
 581        except ImportError:
 582            from StringIO import StringIO
 583        try:
 584            [type, data] = url.split(',', 1)
 585        except ValueError:
 586            raise IOError, ('data error', 'bad data URL')
 587        if not type:
 588            type = 'text/plain;charset=US-ASCII'
 589        semi = type.rfind(';')
 590        if semi >= 0 and '=' not in type[semi:]:
 591            encoding = type[semi+1:]
 592            type = type[:semi]
 593        else:
 594            encoding = ''
 595        msg = []
 596        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 597                                            time.gmtime(time.time())))
 598        msg.append('Content-type: %s' % type)
 599        if encoding == 'base64':
 600            import base64
 601            data = base64.decodestring(data)
 602        else:
 603            data = unquote(data)
 604        msg.append('Content-Length: %d' % len(data))
 605        msg.append('')
 606        msg.append(data)
 607        msg = '\n'.join(msg)
 608        f = StringIO(msg)
 609        headers = mimetools.Message(f, 0)
 610        #f.fileno = None     # needed for addinfourl
 611        return addinfourl(f, headers, url)
 612
 613
 614class FancyURLopener(URLopener):
 615    """Derived class with handlers for errors we can handle (perhaps)."""
 616
 617    def __init__(self, *args, **kwargs):
 618        URLopener.__init__(self, *args, **kwargs)
 619        self.auth_cache = {}
 620        self.tries = 0
 621        self.maxtries = 10
 622
 623    def http_error_default(self, url, fp, errcode, errmsg, headers):
 624        """Default error handling -- don't raise an exception."""
 625        return addinfourl(fp, headers, "http:" + url, errcode)
 626
 627    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 628        """Error 302 -- relocated (temporarily)."""
 629        self.tries += 1
 630        if self.maxtries and self.tries >= self.maxtries:
 631            if hasattr(self, "http_error_500"):
 632                meth = self.http_error_500
 633            else:
 634                meth = self.http_error_default
 635            self.tries = 0
 636            return meth(url, fp, 500,
 637                        "Internal Server Error: Redirect Recursion", headers)
 638        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 639                                        data)
 640        self.tries = 0
 641        return result
 642
 643    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 644        if 'location' in headers:
 645            newurl = headers['location']
 646        elif 'uri' in headers:
 647            newurl = headers['uri']
 648        else:
 649            return
 650        void = fp.read()
 651        fp.close()
 652        # In case the server sent a relative URL, join with original:
 653        newurl = basejoin(self.type + ":" + url, newurl)
 654        return self.open(newurl)
 655
 656    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 657        """Error 301 -- also relocated (permanently)."""
 658        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 659
 660    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 661        """Error 303 -- also relocated (essentially identical to 302)."""
 662        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 663
 664    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
 665        """Error 307 -- relocated, but turn POST into error."""
 666        if data is None:
 667            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 668        else:
 669            return self.http_error_default(url, fp, errcode, errmsg, headers)
 670
 671    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 672        """Error 401 -- authentication required.
 673        This function supports Basic authentication only."""
 674        if not 'www-authenticate' in headers:
 675            URLopener.http_error_default(self, url, fp,
 676                                         errcode, errmsg, headers)
 677        stuff = headers['www-authenticate']
 678        import re
 679        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 680        if not match:
 681            URLopener.http_error_default(self, url, fp,
 682                                         errcode, errmsg, headers)
 683        scheme, realm = match.groups()
 684        if scheme.lower() != 'basic':
 685            URLopener.http_error_default(self, url, fp,
 686                                         errcode, errmsg, headers)
 687        name = 'retry_' + self.type + '_basic_auth'
 688        if data is None:
 689            return getattr(self,name)(url, realm)
 690        else:
 691            return getattr(self,name)(url, realm, data)
 692
 693    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
 694        """Error 407 -- proxy authentication required.
 695        This function supports Basic authentication only."""
 696        if not 'proxy-authenticate' in headers:
 697            URLopener.http_error_default(self, url, fp,
 698                                         errcode, errmsg, headers)
 699        stuff = headers['proxy-authenticate']
 700        import re
 701        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 702        if not match:
 703            URLopener.http_error_default(self, url, fp,
 704                                         errcode, errmsg, headers)
 705        scheme, realm = match.groups()
 706        if scheme.lower() != 'basic':
 707            URLopener.http_error_default(self, url, fp,
 708                                         errcode, errmsg, headers)
 709        name = 'retry_proxy_' + self.type + '_basic_auth'
 710        if data is None:
 711            return getattr(self,name)(url, realm)
 712        else:
 713            return getattr(self,name)(url, realm, data)
 714
 715    def retry_proxy_http_basic_auth(self, url, realm, data=None):
 716        host, selector = splithost(url)
 717        newurl = 'http://' + host + selector
 718        proxy = self.proxies['http']
 719        urltype, proxyhost = splittype(proxy)
 720        proxyhost, proxyselector = splithost(proxyhost)
 721        i = proxyhost.find('@') + 1
 722        proxyhost = proxyhost[i:]
 723        user, passwd = self.get_user_passwd(proxyhost, realm, i)
 724        if not (user or passwd): return None
 725        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 726        self.proxies['http'] = 'http://' + proxyhost + proxyselector
 727        if data is None:
 728            return self.open(newurl)
 729        else:
 730            return self.open(newurl, data)
 731
 732    def retry_proxy_https_basic_auth(self, url, realm, data=None):
 733        host, selector = splithost(url)
 734        newurl = 'https://' + host + selector
 735        proxy = self.proxies['https']
 736        urltype, proxyhost = splittype(proxy)
 737        proxyhost, proxyselector = splithost(proxyhost)
 738        i = proxyhost.find('@') + 1
 739        proxyhost = proxyhost[i:]
 740        user, passwd = self.get_user_passwd(proxyhost, realm, i)
 741        if not (user or passwd): return None
 742        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 743        self.proxies['https'] = 'https://' + proxyhost + proxyselector
 744        if data is None:
 745            return self.open(newurl)
 746        else:
 747            return self.open(newurl, data)
 748
 749    def retry_http_basic_auth(self, url, realm, data=None):
 750        host, selector = splithost(url)
 751        i = host.find('@') + 1
 752        host = host[i:]
 753        user, passwd = self.get_user_passwd(host, realm, i)
 754        if not (user or passwd): return None
 755        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 756        newurl = 'http://' + host + selector
 757        if data is None:
 758            return self.open(newurl)
 759        else:
 760            return self.open(newurl, data)
 761
 762    def retry_https_basic_auth(self, url, realm, data=None):
 763        host, selector = splithost(url)
 764        i = host.find('@') + 1
 765        host = host[i:]
 766        user, passwd = self.get_user_passwd(host, realm, i)
 767        if not (user or passwd): return None
 768        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 769        newurl = 'https://' + host + selector
 770        if data is None:
 771            return self.open(newurl)
 772        else:
 773            return self.open(newurl, data)
 774
 775    def get_user_passwd(self, host, realm, clear_cache = 0):
 776        key = realm + '@' + host.lower()
 777        if key in self.auth_cache:
 778            if clear_cache:
 779                del self.auth_cache[key]
 780            else:
 781                return self.auth_cache[key]
 782        user, passwd = self.prompt_user_passwd(host, realm)
 783        if user or passwd: self.auth_cache[key] = (user, passwd)
 784        return user, passwd
 785
 786    def prompt_user_passwd(self, host, realm):
 787        """Override this in a GUI environment!"""
 788        import getpass
 789        try:
 790            user = raw_input("Enter username for %s at %s: " % (realm,
 791                                                                host))
 792            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 793                (user, realm, host))
 794            return user, passwd
 795        except KeyboardInterrupt:
 796            print
 797            return None, None
 798
 799
 800# Utility functions
 801
 802_localhost = None
 803def localhost():
 804    """Return the IP address of the magic hostname 'localhost'."""
 805    global _localhost
 806    if _localhost is None:
 807        _localhost = socket.gethostbyname('localhost')
 808    return _localhost
 809
 810_thishost = None
 811def thishost():
 812    """Return the IP address of the current host."""
 813    global _thishost
 814    if _thishost is None:
 815        _thishost = socket.gethostbyname(socket.gethostname())
 816    return _thishost
 817
 818_ftperrors = None
 819def ftperrors():
 820    """Return the set of errors raised by the FTP class."""
 821    global _ftperrors
 822    if _ftperrors is None:
 823        import ftplib
 824        _ftperrors = ftplib.all_errors
 825    return _ftperrors
 826
 827_noheaders = None
 828def noheaders():
 829    """Return an empty mimetools.Message object."""
 830    global _noheaders
 831    if _noheaders is None:
 832        import mimetools
 833        try:
 834            from cStringIO import StringIO
 835        except ImportError:
 836            from StringIO import StringIO
 837        _noheaders = mimetools.Message(StringIO(), 0)
 838        _noheaders.fp.close()   # Recycle file descriptor
 839    return _noheaders
 840
 841
 842# Utility classes
 843
 844class ftpwrapper:
 845    """Class used by open_ftp() for cache of open FTP connections."""
 846
 847    def __init__(self, user, passwd, host, port, dirs,
 848                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 849        self.user = user
 850        self.passwd = passwd
 851        self.host = host
 852        self.port = port
 853        self.dirs = dirs
 854        self.timeout = timeout
 855        self.init()
 856
 857    def init(self):
 858        import ftplib
 859        self.busy = 0
 860        self.ftp = ftplib.FTP()
 861        self.ftp.connect(self.host, self.port, self.timeout)
 862        self.ftp.login(self.user, self.passwd)
 863        for dir in self.dirs:
 864            self.ftp.cwd(dir)
 865
 866    def retrfile(self, file, type):
 867        import ftplib
 868        self.endtransfer()
 869        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 870        else: cmd = 'TYPE ' + type; isdir = 0
 871        try:
 872            self.ftp.voidcmd(cmd)
 873        except ftplib.all_errors:
 874            self.init()
 875            self.ftp.voidcmd(cmd)
 876        conn = None
 877        if file and not isdir:
 878            # Try to retrieve as a file
 879            try:
 880                cmd = 'RETR ' + file
 881                conn = self.ftp.ntransfercmd(cmd)
 882            except ftplib.error_perm, reason:
 883                if str(reason)[:3] != '550':
 884                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
 885        if not conn:
 886            # Set transfer mode to ASCII!
 887            self.ftp.voidcmd('TYPE A')
 888            # Try a directory listing. Verify that directory exists.
 889            if file:
 890                pwd = self.ftp.pwd()
 891                try:
 892                    try:
 893                        self.ftp.cwd(file)
 894                    except ftplib.error_perm, reason:
 895                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
 896                finally:
 897                    self.ftp.cwd(pwd)
 898                cmd = 'LIST ' + file
 899            else:
 900                cmd = 'LIST'
 901            conn = self.ftp.ntransfercmd(cmd)
 902        self.busy = 1
 903        # Pass back both a suitably decorated object and a retrieval length
 904        return (addclosehook(conn[0].makefile('rb'),
 905                             self.endtransfer), conn[1])
 906    def endtransfer(self):
 907        if not self.busy:
 908            return
 909        self.busy = 0
 910        try:
 911            self.ftp.voidresp()
 912        except ftperrors():
 913            pass
 914
 915    def close(self):
 916        self.endtransfer()
 917        try:
 918            self.ftp.close()
 919        except ftperrors():
 920            pass
 921
 922class addbase:
 923    """Base class for addinfo and addclosehook."""
 924
 925    def __init__(self, fp):
 926        self.fp = fp
 927        self.read = self.fp.read
 928        self.readline = self.fp.readline
 929        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 930        if hasattr(self.fp, "fileno"):
 931            self.fileno = self.fp.fileno
 932        else:
 933            self.fileno = lambda: None
 934        if hasattr(self.fp, "__iter__"):
 935            self.__iter__ = self.fp.__iter__
 936            if hasattr(self.fp, "next"):
 937                self.next = self.fp.next
 938
 939    def __repr__(self):
 940        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
 941                                             id(self), self.fp)
 942
 943    def close(self):
 944        self.read = None
 945        self.readline = None
 946        self.readlines = None
 947        self.fileno = None
 948        if self.fp: self.fp.close()
 949        self.fp = None
 950
 951class addclosehook(addbase):
 952    """Class to add a close hook to an open file."""
 953
 954    def __init__(self, fp, closehook, *hookargs):
 955        addbase.__init__(self, fp)
 956        self.closehook = closehook
 957        self.hookargs = hookargs
 958
 959    def close(self):
 960        addbase.close(self)
 961        if self.closehook:
 962            self.closehook(*self.hookargs)
 963            self.closehook = None
 964            self.hookargs = None
 965
 966class addinfo(addbase):
 967    """class to add an info() method to an open file."""
 968
 969    def __init__(self, fp, headers):
 970        addbase.__init__(self, fp)
 971        self.headers = headers
 972
 973    def info(self):
 974        return self.headers
 975
 976class addinfourl(addbase):
 977    """class to add info() and geturl() methods to an open file."""
 978
 979    def __init__(self, fp, headers, url, code=None):
 980        addbase.__init__(self, fp)
 981        self.headers = headers
 982        self.url = url
 983        self.code = code
 984
 985    def info(self):
 986        return self.headers
 987
 988    def getcode(self):
 989        return self.code
 990
 991    def geturl(self):
 992        return self.url
 993
 994
 995# Utilities to parse URLs (most of these return None for missing parts):
 996# unwrap('<URL:type://host/path>') --> 'type://host/path'
 997# splittype('type:opaquestring') --> 'type', 'opaquestring'
 998# splithost('//host[:port]/path') --> 'host[:port]', '/path'
 999# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1000# splitpasswd('user:passwd') -> 'user', 'passwd'
1001# splitport('host:port') --> 'host', 'port'
1002# splitquery('/path?query') --> '/path', 'query'
1003# splittag('/path#tag') --> '/path', 'tag'
1004# splitattr('/path;attr1=value1;attr2=value2;...') ->
1005#   '/path', ['attr1=value1', 'attr2=value2', ...]
1006# splitvalue('attr=value') --> 'attr', 'value'
1007# unquote('abc%20def') -> 'abc def'
1008# quote('abc def') -> 'abc%20def')
1009
1010try:
1011    unicode
1012except NameError:
1013    def _is_unicode(x):
1014        return 0
1015else:
1016    def _is_unicode(x):
1017        return isinstance(x, unicode)
1018
1019def toBytes(url):
1020    """toBytes(u"URL") --> 'URL'."""
1021    # Most URL schemes require ASCII. If that changes, the conversion
1022    # can be relaxed
1023    if _is_unicode(url):
1024        try:
1025            url = url.encode("ASCII")
1026        except UnicodeError:
1027            raise UnicodeError("URL " + repr(url) +
1028                               " contains non-ASCII characters")
1029    return url
1030
1031def unwrap(url):
1032    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1033    url = url.strip()
1034    if url[:1] == '<' and url[-1:] == '>':
1035        url = url[1:-1].strip()
1036    if url[:4] == 'URL:': url = url[4:].strip()
1037    return url
1038
1039_typeprog = None
1040def splittype(url):
1041    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1042    global _typeprog
1043    if _typeprog is None:
1044        import re
1045        _typeprog = re.compile('^([^/:]+):')
1046
1047    match = _typeprog.match(url)
1048    if match:
1049        scheme = match.group(1)
1050        return scheme.lower(), url[len(scheme) + 1:]
1051    return None, url
1052
1053_hostprog = None
1054def splithost(url):
1055    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1056    global _hostprog
1057    if _hostprog is None:
1058        import re
1059        _hostprog = re.compile('^//([^/?]*)(.*)$')
1060
1061    match = _hostprog.match(url)
1062    if match: return match.group(1, 2)
1063    return None, url
1064
1065_userprog = None
1066def splituser(host):
1067    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1068    global _userprog
1069    if _userprog is None:
1070        import re
1071        _userprog = re.compile('^(.*)@(.*)$')
1072
1073    match = _userprog.match(host)
1074    if match: return map(unquote, match.group(1, 2))
1075    return None, host
1076
1077_passwdprog = None
1078def splitpasswd(user):
1079    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1080    global _passwdprog
1081    if _passwdprog is None:
1082        import re
1083        _passwdprog = re.compile('^([^:]*):(.*)$')
1084
1085    match = _passwdprog.match(user)
1086    if match: return match.group(1, 2)
1087    return user, None
1088
1089# splittag('/path#tag') --> '/path', 'tag'
1090_portprog = None
1091def splitport(host):
1092    """splitport('host:port') --> 'host', 'port'."""
1093    global _portprog
1094    if _portprog is None:
1095        import re
1096        _portprog = re.compile('^(.*):([0-9]+)$')
1097
1098    match = _portprog.match(host)
1099    if match: return match.group(1, 2)
1100    return host, None
1101
1102_nportprog = None
1103def splitnport(host, defport=-1):
1104    """Split host and port, returning numeric port.
1105    Return given default port if no ':' found; defaults to -1.
1106    Return numerical port if a valid number are found after ':'.
1107    Return None if ':' but not a valid number."""
1108    global _nportprog
1109    if _nportprog is None:
1110        import re
1111        _nportprog = re.compile('^(.*):(.*)$')
1112
1113    match = _nportprog.match(host)
1114    if match:
1115        host, port = match.group(1, 2)
1116        try:
1117            if not port: raise ValueError, "no digits"
1118            nport = int(port)
1119        except ValueError:
1120            nport = None
1121        return host, nport
1122    return host, defport
1123
1124_queryprog = None
1125def splitquery(url):
1126    """splitquery('/path?query') --> '/path', 'query'."""
1127    global _queryprog
1128    if _queryprog is None:
1129        import re
1130        _queryprog = re.compile('^(.*)\?([^?]*)$')
1131
1132    match = _queryprog.match(url)
1133    if match: return match.group(1, 2)
1134    return url, None
1135
1136_tagprog = None
1137def splittag(url):
1138    """splittag('/path#tag') --> '/path', 'tag'."""
1139    global _tagprog
1140    if _tagprog is None:
1141        import re
1142        _tagprog = re.compile('^(.*)#([^#]*)$')
1143
1144    match = _tagprog.match(url)
1145    if match: return match.group(1, 2)
1146    return url, None
1147
1148def splitattr(url):
1149    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1150        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1151    words = url.split(';')
1152    return words[0], words[1:]
1153
1154_valueprog = None
1155def splitvalue(attr):
1156    """splitvalue('attr=value') --> 'attr', 'value'."""
1157    global _valueprog
1158    if _valueprog is None:
1159        import re
1160        _valueprog = re.compile('^([^=]*)=(.*)$')
1161
1162    match = _valueprog.match(attr)
1163    if match: return match.group(1, 2)
1164    return attr, None
1165
1166_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1167_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1168
1169def unquote(s):
1170    """unquote('abc%20def') -> 'abc def'."""
1171    res = s.split('%')
1172    for i in xrange(1, len(res)):
1173        item = res[i]
1174        try:
1175            res[i] = _hextochr[item[:2]] + item[2:]
1176        except KeyError:
1177            res[i] = '%' + item
1178        except UnicodeDecodeError:
1179            res[i] = unichr(int(item[:2], 16)) + item[2:]
1180    return "".join(res)
1181
1182def unquote_plus(s):
1183    """unquote('%7e/abc+def') -> '~/abc def'"""
1184    s = s.replace('+', ' ')
1185    return unquote(s)
1186
1187always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1188               'abcdefghijklmnopqrstuvwxyz'
1189               '0123456789' '_.-')
1190_safemaps = {}
1191
1192def quote(s, safe = '/'):
1193    """quote('abc def') -> 'abc%20def'
1194
1195    Each part of a URL, e.g. the path info, the query, etc., has a
1196    different set of reserved characters that must be quoted.
1197
1198    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1199    the following reserved characters.
1200
1201    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1202                  "$" | ","
1203
1204    Each of these characters is reserved in some component of a URL,
1205    but not necessarily in all of them.
1206
1207    By default, the quote function is intended for quoting the path
1208    section of a URL.  Thus, it will not encode '/'.  This character
1209    is reserved, but in typical usage the quote function is being
1210    called on a path where the existing slash characters are used as
1211    reserved characters.
1212    """
1213    cachekey = (safe, always_safe)
1214    try:
1215        safe_map = _safemaps[cachekey]
1216    except KeyError:
1217        safe += always_safe
1218        safe_map = {}
1219        for i in range(256):
1220            c = chr(i)
1221            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1222        _safemaps[cachekey] = safe_map
1223    res = map(safe_map.__getitem__, s)
1224    return ''.join(res)
1225
1226def quote_plus(s, safe = ''):
1227    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1228    if ' ' in s:
1229        s = quote(s, safe + ' ')
1230        return s.replace(' ', '+')
1231    return quote(s, safe)
1232
1233def urlencode(query,doseq=0):
1234    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1235
1236    If any values in the query arg are sequences and doseq is true, each
1237    sequence element is converted to a separate parameter.
1238
1239    If the query arg is a sequence of two-element tuples, the order of the
1240    parameters in the output will match the order of parameters in the
1241    input.
1242    """
1243
1244    if hasattr(query,"items"):
1245        # mapping objects
1246        query = query.items()
1247    else:
1248        # it's a bother at times that strings and string-like objects are
1249        # sequences...
1250        try:
1251            # non-sequence items should not work with len()
1252            # non-empty strings will fail this
1253            if len(query) and not isinstance(query[0], tuple):
1254                raise TypeError
1255            # zero-length sequences of all types will get here and succeed,
1256            # but that's a minor nit - since the original implementation
1257            # allowed empty dicts that type of behavior probably should be
1258            # preserved for consistency
1259        except TypeError:
1260            ty,va,tb = sys.exc_info()
1261            raise TypeError, "not a valid non-string sequence or mapping object", tb
1262
1263    l = []
1264    if not doseq:
1265        # preserve old behavior
1266        for k, v in query:
1267            k = quote_plus(str(k))
1268            v = quote_plus(str(v))
1269            l.append(k + '=' + v)
1270    else:
1271        for k, v in query:
1272            k = quote_plus(str(k))
1273            if isinstance(v, str):
1274                v = quote_plus(v)
1275                l.append(k + '=' + v)
1276            elif _is_unicode(v):
1277                # is there a reasonable way to convert to ASCII?
1278                # encode generates a string, but "replace" or "ignore"
1279                # lose information and "strict" can raise UnicodeError
1280                v = quote_plus(v.encode("ASCII","replace"))
1281                l.append(k + '=' + v)
1282            else:
1283                try:
1284                    # is this a sufficient test for sequence-ness?
1285                    x = len(v)
1286                except TypeError:
1287                    # not a sequence
1288                    v = quote_plus(str(v))
1289                    l.append(k + '=' + v)
1290                else:
1291                    # loop over the sequence
1292                    for elt in v:
1293                        l.append(k + '=' + quote_plus(str(elt)))
1294    return '&'.join(l)
1295
1296# Proxy handling
1297def getproxies_environment():
1298    """Return a dictionary of scheme -> proxy server URL mappings.
1299
1300    Scan the environment for variables named <scheme>_proxy;
1301    this seems to be the standard convention.  If you need a
1302    different way, you can pass a proxies dictionary to the
1303    [Fancy]URLopener constructor.
1304
1305    """
1306    proxies = {}
1307    for name, value in os.environ.items():
1308        name = name.lower()
1309        if value and name[-6:] == '_proxy':
1310            proxies[name[:-6]] = value
1311    return proxies
1312
1313def proxy_bypass_environment(host):
1314    """Test if proxies should not be used for a particular host.
1315
1316    Checks the environment for a variable named no_proxy, which should
1317    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1318    """
1319    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1320    # '*' is special case for always bypass
1321    if no_proxy == '*':
1322        return 1
1323    # strip port off host
1324    hostonly, port = splitport(host)
1325    # check if the host ends with any of the DNS suffixes
1326    for name in no_proxy.split(','):
1327        if name and (hostonly.endswith(name) or host.endswith(name)):
1328            return 1
1329    # otherwise, don't bypass
1330    return 0
1331
1332
1333if sys.platform == 'darwin':
1334    from _scproxy import _get_proxy_settings, _get_proxies
1335
1336    def proxy_bypass_macosx_sysconf(host):
1337        """
1338        Return True iff this host shouldn't be accessed using a proxy
1339
1340        This function uses the MacOSX framework SystemConfiguration
1341        to fetch the proxy information.
1342        """
1343        import re
1344        import socket
1345        from fnmatch import fnmatch
1346
1347        hostonly, port = splitport(host)
1348
1349        def ip2num(ipAddr):
1350            parts = ipAddr.split('.')
1351            parts = map(int, parts)
1352            if len(parts) != 4:
1353                parts = (parts + [0, 0, 0, 0])[:4]
1354            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1355
1356        proxy_settings = _get_proxy_settings()
1357
1358        # Check for simple host names:
1359        if '.' not in host:
1360            if proxy_settings['exclude_simple']:
1361                return True
1362
1363        hostIP = None
1364
1365        for value in proxy_settings.get('exceptions', ()):
1366            # Items in the list are strings like these: *.local, 169.254/16
1367            if not value: continue
1368
1369            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1370            if m is not None:
1371                if hostIP is None:
1372                    try:
1373                        hostIP = socket.gethostbyname(hostonly)
1374                        hostIP = ip2num(hostIP)
1375                    except socket.error:
1376                        continue
1377
1378                base = ip2num(m.group(1))
1379                mask = int(m.group(2)[1:])
1380                mask = 32 - mask
1381
1382                if (hostIP >> mask) == (base >> mask):
1383                    return True
1384
1385            elif fnmatch(host, value):
1386                return True
1387
1388        return False
1389
1390
1391    def getproxies_macosx_sysconf():
1392        """Return a dictionary of scheme -> proxy server URL mappings.
1393
1394        This funct…

Large files files are truncated, but you can click here to view the full file