PageRenderTime 308ms CodeModel.GetById 13ms app.highlight 240ms RepoModel.GetById 1ms app.codeStats 1ms

/Lib/cookielib.py

http://unladen-swallow.googlecode.com/
Python | 1789 lines | 1714 code | 24 blank | 51 comment | 77 complexity | b0d4e5e10a8f123e781f577b60395c66 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1"""HTTP cookie handling for web clients.
   2
   3This module has (now fairly distant) origins in Gisle Aas' Perl module
   4HTTP::Cookies, from the libwww-perl library.
   5
   6Docstrings, comments and debug strings in this code refer to the
   7attributes of the HTTP cookie system as cookie-attributes, to distinguish
   8them clearly from Python attributes.
   9
  10Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
  11distributed with the Python standard library, but are available from
  12http://wwwsearch.sf.net/):
  13
  14                        CookieJar____
  15                        /     \      \
  16            FileCookieJar      \      \
  17             /    |   \         \      \
  18 MozillaCookieJar | LWPCookieJar \      \
  19                  |               |      \
  20                  |   ---MSIEBase |       \
  21                  |  /      |     |        \
  22                  | /   MSIEDBCookieJar BSDDBCookieJar
  23                  |/
  24               MSIECookieJar
  25
  26"""
  27
  28__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
  29           'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
  30           'MozillaCookieJar']
  31
  32import re, urlparse, copy, time, urllib
  33try:
  34    import threading as _threading
  35except ImportError:
  36    import dummy_threading as _threading
  37import httplib  # only for the default HTTP port
  38from calendar import timegm
  39
  40debug = False   # set to True to enable debugging via the logging module
  41logger = None
  42
  43def _debug(*args):
  44    if not debug:
  45        return
  46    global logger
  47    if not logger:
  48        import logging
  49        logger = logging.getLogger("cookielib")
  50    return logger.debug(*args)
  51
  52
  53DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
  54MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
  55                         "instance initialised with one)")
  56
  57def _warn_unhandled_exception():
  58    # There are a few catch-all except: statements in this module, for
  59    # catching input that's bad in unexpected ways.  Warn if any
  60    # exceptions are caught there.
  61    import warnings, traceback, StringIO
  62    f = StringIO.StringIO()
  63    traceback.print_exc(None, f)
  64    msg = f.getvalue()
  65    warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
  66
  67
  68# Date/time conversion
  69# -----------------------------------------------------------------------------
  70
  71EPOCH_YEAR = 1970
  72def _timegm(tt):
  73    year, month, mday, hour, min, sec = tt[:6]
  74    if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
  75        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
  76        return timegm(tt)
  77    else:
  78        return None
  79
  80DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
  81MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
  82          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
  83MONTHS_LOWER = []
  84for month in MONTHS: MONTHS_LOWER.append(month.lower())
  85
  86def time2isoz(t=None):
  87    """Return a string representing time in seconds since epoch, t.
  88
  89    If the function is called without an argument, it will use the current
  90    time.
  91
  92    The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
  93    representing Universal Time (UTC, aka GMT).  An example of this format is:
  94
  95    1994-11-24 08:49:37Z
  96
  97    """
  98    if t is None: t = time.time()
  99    year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
 100    return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
 101        year, mon, mday, hour, min, sec)
 102
 103def time2netscape(t=None):
 104    """Return a string representing time in seconds since epoch, t.
 105
 106    If the function is called without an argument, it will use the current
 107    time.
 108
 109    The format of the returned string is like this:
 110
 111    Wed, DD-Mon-YYYY HH:MM:SS GMT
 112
 113    """
 114    if t is None: t = time.time()
 115    year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
 116    return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
 117        DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
 118
 119
 120UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
 121
 122TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
 123def offset_from_tz_string(tz):
 124    offset = None
 125    if tz in UTC_ZONES:
 126        offset = 0
 127    else:
 128        m = TIMEZONE_RE.search(tz)
 129        if m:
 130            offset = 3600 * int(m.group(2))
 131            if m.group(3):
 132                offset = offset + 60 * int(m.group(3))
 133            if m.group(1) == '-':
 134                offset = -offset
 135    return offset
 136
 137def _str2time(day, mon, yr, hr, min, sec, tz):
 138    # translate month name to number
 139    # month numbers start with 1 (January)
 140    try:
 141        mon = MONTHS_LOWER.index(mon.lower())+1
 142    except ValueError:
 143        # maybe it's already a number
 144        try:
 145            imon = int(mon)
 146        except ValueError:
 147            return None
 148        if 1 <= imon <= 12:
 149            mon = imon
 150        else:
 151            return None
 152
 153    # make sure clock elements are defined
 154    if hr is None: hr = 0
 155    if min is None: min = 0
 156    if sec is None: sec = 0
 157
 158    yr = int(yr)
 159    day = int(day)
 160    hr = int(hr)
 161    min = int(min)
 162    sec = int(sec)
 163
 164    if yr < 1000:
 165        # find "obvious" year
 166        cur_yr = time.localtime(time.time())[0]
 167        m = cur_yr % 100
 168        tmp = yr
 169        yr = yr + cur_yr - m
 170        m = m - tmp
 171        if abs(m) > 50:
 172            if m > 0: yr = yr + 100
 173            else: yr = yr - 100
 174
 175    # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
 176    t = _timegm((yr, mon, day, hr, min, sec, tz))
 177
 178    if t is not None:
 179        # adjust time using timezone string, to get absolute time since epoch
 180        if tz is None:
 181            tz = "UTC"
 182        tz = tz.upper()
 183        offset = offset_from_tz_string(tz)
 184        if offset is None:
 185            return None
 186        t = t - offset
 187
 188    return t
 189
 190STRICT_DATE_RE = re.compile(
 191    r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
 192    "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
 193WEEKDAY_RE = re.compile(
 194    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
 195LOOSE_HTTP_DATE_RE = re.compile(
 196    r"""^
 197    (\d\d?)            # day
 198       (?:\s+|[-\/])
 199    (\w+)              # month
 200        (?:\s+|[-\/])
 201    (\d+)              # year
 202    (?:
 203          (?:\s+|:)    # separator before clock
 204       (\d\d?):(\d\d)  # hour:min
 205       (?::(\d\d))?    # optional seconds
 206    )?                 # optional clock
 207       \s*
 208    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
 209       \s*
 210    (?:\(\w+\))?       # ASCII representation of timezone in parens.
 211       \s*$""", re.X)
 212def http2time(text):
 213    """Returns time in seconds since epoch of time represented by a string.
 214
 215    Return value is an integer.
 216
 217    None is returned if the format of str is unrecognized, the time is outside
 218    the representable range, or the timezone string is not recognized.  If the
 219    string contains no timezone, UTC is assumed.
 220
 221    The timezone in the string may be numerical (like "-0800" or "+0100") or a
 222    string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
 223    timezone strings equivalent to UTC (zero offset) are known to the function.
 224
 225    The function loosely parses the following formats:
 226
 227    Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
 228    Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
 229    Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
 230    09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
 231    08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
 232    08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
 233
 234    The parser ignores leading and trailing whitespace.  The time may be
 235    absent.
 236
 237    If the year is given with only 2 digits, the function will select the
 238    century that makes the year closest to the current date.
 239
 240    """
 241    # fast exit for strictly conforming string
 242    m = STRICT_DATE_RE.search(text)
 243    if m:
 244        g = m.groups()
 245        mon = MONTHS_LOWER.index(g[1].lower()) + 1
 246        tt = (int(g[2]), mon, int(g[0]),
 247              int(g[3]), int(g[4]), float(g[5]))
 248        return _timegm(tt)
 249
 250    # No, we need some messy parsing...
 251
 252    # clean up
 253    text = text.lstrip()
 254    text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday
 255
 256    # tz is time zone specifier string
 257    day, mon, yr, hr, min, sec, tz = [None]*7
 258
 259    # loose regexp parse
 260    m = LOOSE_HTTP_DATE_RE.search(text)
 261    if m is not None:
 262        day, mon, yr, hr, min, sec, tz = m.groups()
 263    else:
 264        return None  # bad format
 265
 266    return _str2time(day, mon, yr, hr, min, sec, tz)
 267
 268ISO_DATE_RE = re.compile(
 269    """^
 270    (\d{4})              # year
 271       [-\/]?
 272    (\d\d?)              # numerical month
 273       [-\/]?
 274    (\d\d?)              # day
 275   (?:
 276         (?:\s+|[-:Tt])  # separator before clock
 277      (\d\d?):?(\d\d)    # hour:min
 278      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
 279   )?                    # optional clock
 280      \s*
 281   ([-+]?\d\d?:?(:?\d\d)?
 282    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
 283      \s*$""", re.X)
 284def iso2time(text):
 285    """
 286    As for http2time, but parses the ISO 8601 formats:
 287
 288    1994-02-03 14:15:29 -0100    -- ISO 8601 format
 289    1994-02-03 14:15:29          -- zone is optional
 290    1994-02-03                   -- only date
 291    1994-02-03T14:15:29          -- Use T as separator
 292    19940203T141529Z             -- ISO 8601 compact format
 293    19940203                     -- only date
 294
 295    """
 296    # clean up
 297    text = text.lstrip()
 298
 299    # tz is time zone specifier string
 300    day, mon, yr, hr, min, sec, tz = [None]*7
 301
 302    # loose regexp parse
 303    m = ISO_DATE_RE.search(text)
 304    if m is not None:
 305        # XXX there's an extra bit of the timezone I'm ignoring here: is
 306        #   this the right thing to do?
 307        yr, mon, day, hr, min, sec, tz, _ = m.groups()
 308    else:
 309        return None  # bad format
 310
 311    return _str2time(day, mon, yr, hr, min, sec, tz)
 312
 313
 314# Header parsing
 315# -----------------------------------------------------------------------------
 316
 317def unmatched(match):
 318    """Return unmatched part of re.Match object."""
 319    start, end = match.span(0)
 320    return match.string[:start]+match.string[end:]
 321
 322HEADER_TOKEN_RE =        re.compile(r"^\s*([^=\s;,]+)")
 323HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
 324HEADER_VALUE_RE =        re.compile(r"^\s*=\s*([^\s;,]*)")
 325HEADER_ESCAPE_RE = re.compile(r"\\(.)")
 326def split_header_words(header_values):
 327    r"""Parse header values into a list of lists containing key,value pairs.
 328
 329    The function knows how to deal with ",", ";" and "=" as well as quoted
 330    values after "=".  A list of space separated tokens are parsed as if they
 331    were separated by ";".
 332
 333    If the header_values passed as argument contains multiple values, then they
 334    are treated as if they were a single value separated by comma ",".
 335
 336    This means that this function is useful for parsing header fields that
 337    follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
 338    the requirement for tokens).
 339
 340      headers           = #header
 341      header            = (token | parameter) *( [";"] (token | parameter))
 342
 343      token             = 1*<any CHAR except CTLs or separators>
 344      separators        = "(" | ")" | "<" | ">" | "@"
 345                        | "," | ";" | ":" | "\" | <">
 346                        | "/" | "[" | "]" | "?" | "="
 347                        | "{" | "}" | SP | HT
 348
 349      quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
 350      qdtext            = <any TEXT except <">>
 351      quoted-pair       = "\" CHAR
 352
 353      parameter         = attribute "=" value
 354      attribute         = token
 355      value             = token | quoted-string
 356
 357    Each header is represented by a list of key/value pairs.  The value for a
 358    simple token (not part of a parameter) is None.  Syntactically incorrect
 359    headers will not necessarily be parsed as you would want.
 360
 361    This is easier to describe with some examples:
 362
 363    >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
 364    [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
 365    >>> split_header_words(['text/html; charset="iso-8859-1"'])
 366    [[('text/html', None), ('charset', 'iso-8859-1')]]
 367    >>> split_header_words([r'Basic realm="\"foo\bar\""'])
 368    [[('Basic', None), ('realm', '"foobar"')]]
 369
 370    """
 371    assert not isinstance(header_values, basestring)
 372    result = []
 373    for text in header_values:
 374        orig_text = text
 375        pairs = []
 376        while text:
 377            m = HEADER_TOKEN_RE.search(text)
 378            if m:
 379                text = unmatched(m)
 380                name = m.group(1)
 381                m = HEADER_QUOTED_VALUE_RE.search(text)
 382                if m:  # quoted value
 383                    text = unmatched(m)
 384                    value = m.group(1)
 385                    value = HEADER_ESCAPE_RE.sub(r"\1", value)
 386                else:
 387                    m = HEADER_VALUE_RE.search(text)
 388                    if m:  # unquoted value
 389                        text = unmatched(m)
 390                        value = m.group(1)
 391                        value = value.rstrip()
 392                    else:
 393                        # no value, a lone token
 394                        value = None
 395                pairs.append((name, value))
 396            elif text.lstrip().startswith(","):
 397                # concatenated headers, as per RFC 2616 section 4.2
 398                text = text.lstrip()[1:]
 399                if pairs: result.append(pairs)
 400                pairs = []
 401            else:
 402                # skip junk
 403                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
 404                assert nr_junk_chars > 0, (
 405                    "split_header_words bug: '%s', '%s', %s" %
 406                    (orig_text, text, pairs))
 407                text = non_junk
 408        if pairs: result.append(pairs)
 409    return result
 410
 411HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
 412def join_header_words(lists):
 413    """Do the inverse (almost) of the conversion done by split_header_words.
 414
 415    Takes a list of lists of (key, value) pairs and produces a single header
 416    value.  Attribute values are quoted if needed.
 417
 418    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
 419    'text/plain; charset="iso-8859/1"'
 420    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
 421    'text/plain, charset="iso-8859/1"'
 422
 423    """
 424    headers = []
 425    for pairs in lists:
 426        attr = []
 427        for k, v in pairs:
 428            if v is not None:
 429                if not re.search(r"^\w+$", v):
 430                    v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
 431                    v = '"%s"' % v
 432                k = "%s=%s" % (k, v)
 433            attr.append(k)
 434        if attr: headers.append("; ".join(attr))
 435    return ", ".join(headers)
 436
 437def parse_ns_headers(ns_headers):
 438    """Ad-hoc parser for Netscape protocol cookie-attributes.
 439
 440    The old Netscape cookie format for Set-Cookie can for instance contain
 441    an unquoted "," in the expires field, so we have to use this ad-hoc
 442    parser instead of split_header_words.
 443
 444    XXX This may not make the best possible effort to parse all the crap
 445    that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
 446    parser is probably better, so could do worse than following that if
 447    this ever gives any trouble.
 448
 449    Currently, this is also used for parsing RFC 2109 cookies.
 450
 451    """
 452    known_attrs = ("expires", "domain", "path", "secure",
 453                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
 454                   "port", "max-age")
 455
 456    result = []
 457    for ns_header in ns_headers:
 458        pairs = []
 459        version_set = False
 460        for ii, param in enumerate(re.split(r";\s*", ns_header)):
 461            param = param.rstrip()
 462            if param == "": continue
 463            if "=" not in param:
 464                k, v = param, None
 465            else:
 466                k, v = re.split(r"\s*=\s*", param, 1)
 467                k = k.lstrip()
 468            if ii != 0:
 469                lc = k.lower()
 470                if lc in known_attrs:
 471                    k = lc
 472                if k == "version":
 473                    # This is an RFC 2109 cookie.
 474                    version_set = True
 475                if k == "expires":
 476                    # convert expires date to seconds since epoch
 477                    if v.startswith('"'): v = v[1:]
 478                    if v.endswith('"'): v = v[:-1]
 479                    v = http2time(v)  # None if invalid
 480            pairs.append((k, v))
 481
 482        if pairs:
 483            if not version_set:
 484                pairs.append(("version", "0"))
 485            result.append(pairs)
 486
 487    return result
 488
 489
 490IPV4_RE = re.compile(r"\.\d+$")
 491def is_HDN(text):
 492    """Return True if text is a host domain name."""
 493    # XXX
 494    # This may well be wrong.  Which RFC is HDN defined in, if any (for
 495    #  the purposes of RFC 2965)?
 496    # For the current implementation, what about IPv6?  Remember to look
 497    #  at other uses of IPV4_RE also, if change this.
 498    if IPV4_RE.search(text):
 499        return False
 500    if text == "":
 501        return False
 502    if text[0] == "." or text[-1] == ".":
 503        return False
 504    return True
 505
 506def domain_match(A, B):
 507    """Return True if domain A domain-matches domain B, according to RFC 2965.
 508
 509    A and B may be host domain names or IP addresses.
 510
 511    RFC 2965, section 1:
 512
 513    Host names can be specified either as an IP address or a HDN string.
 514    Sometimes we compare one host name with another.  (Such comparisons SHALL
 515    be case-insensitive.)  Host A's name domain-matches host B's if
 516
 517         *  their host name strings string-compare equal; or
 518
 519         * A is a HDN string and has the form NB, where N is a non-empty
 520            name string, B has the form .B', and B' is a HDN string.  (So,
 521            x.y.com domain-matches .Y.com but not Y.com.)
 522
 523    Note that domain-match is not a commutative operation: a.b.c.com
 524    domain-matches .c.com, but not the reverse.
 525
 526    """
 527    # Note that, if A or B are IP addresses, the only relevant part of the
 528    # definition of the domain-match algorithm is the direct string-compare.
 529    A = A.lower()
 530    B = B.lower()
 531    if A == B:
 532        return True
 533    if not is_HDN(A):
 534        return False
 535    i = A.rfind(B)
 536    if i == -1 or i == 0:
 537        # A does not have form NB, or N is the empty string
 538        return False
 539    if not B.startswith("."):
 540        return False
 541    if not is_HDN(B[1:]):
 542        return False
 543    return True
 544
 545def liberal_is_HDN(text):
 546    """Return True if text is a sort-of-like a host domain name.
 547
 548    For accepting/blocking domains.
 549
 550    """
 551    if IPV4_RE.search(text):
 552        return False
 553    return True
 554
 555def user_domain_match(A, B):
 556    """For blocking/accepting domains.
 557
 558    A and B may be host domain names or IP addresses.
 559
 560    """
 561    A = A.lower()
 562    B = B.lower()
 563    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
 564        if A == B:
 565            # equal IP addresses
 566            return True
 567        return False
 568    initial_dot = B.startswith(".")
 569    if initial_dot and A.endswith(B):
 570        return True
 571    if not initial_dot and A == B:
 572        return True
 573    return False
 574
 575cut_port_re = re.compile(r":\d+$")
 576def request_host(request):
 577    """Return request-host, as defined by RFC 2965.
 578
 579    Variation from RFC: returned value is lowercased, for convenient
 580    comparison.
 581
 582    """
 583    url = request.get_full_url()
 584    host = urlparse.urlparse(url)[1]
 585    if host == "":
 586        host = request.get_header("Host", "")
 587
 588    # remove port, if present
 589    host = cut_port_re.sub("", host, 1)
 590    return host.lower()
 591
 592def eff_request_host(request):
 593    """Return a tuple (request-host, effective request-host name).
 594
 595    As defined by RFC 2965, except both are lowercased.
 596
 597    """
 598    erhn = req_host = request_host(request)
 599    if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
 600        erhn = req_host + ".local"
 601    return req_host, erhn
 602
 603def request_path(request):
 604    """request-URI, as defined by RFC 2965."""
 605    url = request.get_full_url()
 606    #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
 607    #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
 608    path, parameters, query, frag = urlparse.urlparse(url)[2:]
 609    if parameters:
 610        path = "%s;%s" % (path, parameters)
 611    path = escape_path(path)
 612    req_path = urlparse.urlunparse(("", "", path, "", query, frag))
 613    if not req_path.startswith("/"):
 614        # fix bad RFC 2396 absoluteURI
 615        req_path = "/"+req_path
 616    return req_path
 617
 618def request_port(request):
 619    host = request.get_host()
 620    i = host.find(':')
 621    if i >= 0:
 622        port = host[i+1:]
 623        try:
 624            int(port)
 625        except ValueError:
 626            _debug("nonnumeric port: '%s'", port)
 627            return None
 628    else:
 629        port = DEFAULT_HTTP_PORT
 630    return port
 631
 632# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
 633# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
 634HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
 635ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
 636def uppercase_escaped_char(match):
 637    return "%%%s" % match.group(1).upper()
 638def escape_path(path):
 639    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
 640    # There's no knowing what character encoding was used to create URLs
 641    # containing %-escapes, but since we have to pick one to escape invalid
 642    # path characters, we pick UTF-8, as recommended in the HTML 4.0
 643    # specification:
 644    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
 645    # And here, kind of: draft-fielding-uri-rfc2396bis-03
 646    # (And in draft IRI specification: draft-duerst-iri-05)
 647    # (And here, for new URI schemes: RFC 2718)
 648    if isinstance(path, unicode):
 649        path = path.encode("utf-8")
 650    path = urllib.quote(path, HTTP_PATH_SAFE)
 651    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
 652    return path
 653
 654def reach(h):
 655    """Return reach of host h, as defined by RFC 2965, section 1.
 656
 657    The reach R of a host name H is defined as follows:
 658
 659       *  If
 660
 661          -  H is the host domain name of a host; and,
 662
 663          -  H has the form A.B; and
 664
 665          -  A has no embedded (that is, interior) dots; and
 666
 667          -  B has at least one embedded dot, or B is the string "local".
 668             then the reach of H is .B.
 669
 670       *  Otherwise, the reach of H is H.
 671
 672    >>> reach("www.acme.com")
 673    '.acme.com'
 674    >>> reach("acme.com")
 675    'acme.com'
 676    >>> reach("acme.local")
 677    '.local'
 678
 679    """
 680    i = h.find(".")
 681    if i >= 0:
 682        #a = h[:i]  # this line is only here to show what a is
 683        b = h[i+1:]
 684        i = b.find(".")
 685        if is_HDN(h) and (i >= 0 or b == "local"):
 686            return "."+b
 687    return h
 688
 689def is_third_party(request):
 690    """
 691
 692    RFC 2965, section 3.3.6:
 693
 694        An unverifiable transaction is to a third-party host if its request-
 695        host U does not domain-match the reach R of the request-host O in the
 696        origin transaction.
 697
 698    """
 699    req_host = request_host(request)
 700    if not domain_match(req_host, reach(request.get_origin_req_host())):
 701        return True
 702    else:
 703        return False
 704
 705
 706class Cookie:
 707    """HTTP Cookie.
 708
 709    This class represents both Netscape and RFC 2965 cookies.
 710
 711    This is deliberately a very simple class.  It just holds attributes.  It's
 712    possible to construct Cookie instances that don't comply with the cookie
 713    standards.  CookieJar.make_cookies is the factory function for Cookie
 714    objects -- it deals with cookie parsing, supplying defaults, and
 715    normalising to the representation used in this class.  CookiePolicy is
 716    responsible for checking them to see whether they should be accepted from
 717    and returned to the server.
 718
 719    Note that the port may be present in the headers, but unspecified ("Port"
 720    rather than"Port=80", for example); if this is the case, port is None.
 721
 722    """
 723
 724    def __init__(self, version, name, value,
 725                 port, port_specified,
 726                 domain, domain_specified, domain_initial_dot,
 727                 path, path_specified,
 728                 secure,
 729                 expires,
 730                 discard,
 731                 comment,
 732                 comment_url,
 733                 rest,
 734                 rfc2109=False,
 735                 ):
 736
 737        if version is not None: version = int(version)
 738        if expires is not None: expires = int(expires)
 739        if port is None and port_specified is True:
 740            raise ValueError("if port is None, port_specified must be false")
 741
 742        self.version = version
 743        self.name = name
 744        self.value = value
 745        self.port = port
 746        self.port_specified = port_specified
 747        # normalise case, as per RFC 2965 section 3.3.3
 748        self.domain = domain.lower()
 749        self.domain_specified = domain_specified
 750        # Sigh.  We need to know whether the domain given in the
 751        # cookie-attribute had an initial dot, in order to follow RFC 2965
 752        # (as clarified in draft errata).  Needed for the returned $Domain
 753        # value.
 754        self.domain_initial_dot = domain_initial_dot
 755        self.path = path
 756        self.path_specified = path_specified
 757        self.secure = secure
 758        self.expires = expires
 759        self.discard = discard
 760        self.comment = comment
 761        self.comment_url = comment_url
 762        self.rfc2109 = rfc2109
 763
 764        self._rest = copy.copy(rest)
 765
 766    def has_nonstandard_attr(self, name):
 767        return name in self._rest
 768    def get_nonstandard_attr(self, name, default=None):
 769        return self._rest.get(name, default)
 770    def set_nonstandard_attr(self, name, value):
 771        self._rest[name] = value
 772
 773    def is_expired(self, now=None):
 774        if now is None: now = time.time()
 775        if (self.expires is not None) and (self.expires <= now):
 776            return True
 777        return False
 778
 779    def __str__(self):
 780        if self.port is None: p = ""
 781        else: p = ":"+self.port
 782        limit = self.domain + p + self.path
 783        if self.value is not None:
 784            namevalue = "%s=%s" % (self.name, self.value)
 785        else:
 786            namevalue = self.name
 787        return "<Cookie %s for %s>" % (namevalue, limit)
 788
 789    def __repr__(self):
 790        args = []
 791        for name in ("version", "name", "value",
 792                     "port", "port_specified",
 793                     "domain", "domain_specified", "domain_initial_dot",
 794                     "path", "path_specified",
 795                     "secure", "expires", "discard", "comment", "comment_url",
 796                     ):
 797            attr = getattr(self, name)
 798            args.append("%s=%s" % (name, repr(attr)))
 799        args.append("rest=%s" % repr(self._rest))
 800        args.append("rfc2109=%s" % repr(self.rfc2109))
 801        return "Cookie(%s)" % ", ".join(args)
 802
 803
 804class CookiePolicy:
 805    """Defines which cookies get accepted from and returned to server.
 806
 807    May also modify cookies, though this is probably a bad idea.
 808
 809    The subclass DefaultCookiePolicy defines the standard rules for Netscape
 810    and RFC 2965 cookies -- override that if you want a customised policy.
 811
 812    """
 813    def set_ok(self, cookie, request):
 814        """Return true if (and only if) cookie should be accepted from server.
 815
 816        Currently, pre-expired cookies never get this far -- the CookieJar
 817        class deletes such cookies itself.
 818
 819        """
 820        raise NotImplementedError()
 821
 822    def return_ok(self, cookie, request):
 823        """Return true if (and only if) cookie should be returned to server."""
 824        raise NotImplementedError()
 825
 826    def domain_return_ok(self, domain, request):
 827        """Return false if cookies should not be returned, given cookie domain.
 828        """
 829        return True
 830
 831    def path_return_ok(self, path, request):
 832        """Return false if cookies should not be returned, given cookie path.
 833        """
 834        return True
 835
 836
 837class DefaultCookiePolicy(CookiePolicy):
 838    """Implements the standard rules for accepting and returning cookies."""
 839
 840    DomainStrictNoDots = 1
 841    DomainStrictNonDomain = 2
 842    DomainRFC2965Match = 4
 843
 844    DomainLiberal = 0
 845    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
 846
 847    def __init__(self,
 848                 blocked_domains=None, allowed_domains=None,
 849                 netscape=True, rfc2965=False,
 850                 rfc2109_as_netscape=None,
 851                 hide_cookie2=False,
 852                 strict_domain=False,
 853                 strict_rfc2965_unverifiable=True,
 854                 strict_ns_unverifiable=False,
 855                 strict_ns_domain=DomainLiberal,
 856                 strict_ns_set_initial_dollar=False,
 857                 strict_ns_set_path=False,
 858                 ):
 859        """Constructor arguments should be passed as keyword arguments only."""
 860        self.netscape = netscape
 861        self.rfc2965 = rfc2965
 862        self.rfc2109_as_netscape = rfc2109_as_netscape
 863        self.hide_cookie2 = hide_cookie2
 864        self.strict_domain = strict_domain
 865        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
 866        self.strict_ns_unverifiable = strict_ns_unverifiable
 867        self.strict_ns_domain = strict_ns_domain
 868        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
 869        self.strict_ns_set_path = strict_ns_set_path
 870
 871        if blocked_domains is not None:
 872            self._blocked_domains = tuple(blocked_domains)
 873        else:
 874            self._blocked_domains = ()
 875
 876        if allowed_domains is not None:
 877            allowed_domains = tuple(allowed_domains)
 878        self._allowed_domains = allowed_domains
 879
 880    def blocked_domains(self):
 881        """Return the sequence of blocked domains (as a tuple)."""
 882        return self._blocked_domains
 883    def set_blocked_domains(self, blocked_domains):
 884        """Set the sequence of blocked domains."""
 885        self._blocked_domains = tuple(blocked_domains)
 886
 887    def is_blocked(self, domain):
 888        for blocked_domain in self._blocked_domains:
 889            if user_domain_match(domain, blocked_domain):
 890                return True
 891        return False
 892
 893    def allowed_domains(self):
 894        """Return None, or the sequence of allowed domains (as a tuple)."""
 895        return self._allowed_domains
 896    def set_allowed_domains(self, allowed_domains):
 897        """Set the sequence of allowed domains, or None."""
 898        if allowed_domains is not None:
 899            allowed_domains = tuple(allowed_domains)
 900        self._allowed_domains = allowed_domains
 901
 902    def is_not_allowed(self, domain):
 903        if self._allowed_domains is None:
 904            return False
 905        for allowed_domain in self._allowed_domains:
 906            if user_domain_match(domain, allowed_domain):
 907                return False
 908        return True
 909
 910    def set_ok(self, cookie, request):
 911        """
 912        If you override .set_ok(), be sure to call this method.  If it returns
 913        false, so should your subclass (assuming your subclass wants to be more
 914        strict about which cookies to accept).
 915
 916        """
 917        _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
 918
 919        assert cookie.name is not None
 920
 921        for n in "version", "verifiability", "name", "path", "domain", "port":
 922            fn_name = "set_ok_"+n
 923            fn = getattr(self, fn_name)
 924            if not fn(cookie, request):
 925                return False
 926
 927        return True
 928
 929    def set_ok_version(self, cookie, request):
 930        if cookie.version is None:
 931            # Version is always set to 0 by parse_ns_headers if it's a Netscape
 932            # cookie, so this must be an invalid RFC 2965 cookie.
 933            _debug("   Set-Cookie2 without version attribute (%s=%s)",
 934                   cookie.name, cookie.value)
 935            return False
 936        if cookie.version > 0 and not self.rfc2965:
 937            _debug("   RFC 2965 cookies are switched off")
 938            return False
 939        elif cookie.version == 0 and not self.netscape:
 940            _debug("   Netscape cookies are switched off")
 941            return False
 942        return True
 943
 944    def set_ok_verifiability(self, cookie, request):
 945        if request.is_unverifiable() and is_third_party(request):
 946            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
 947                _debug("   third-party RFC 2965 cookie during "
 948                             "unverifiable transaction")
 949                return False
 950            elif cookie.version == 0 and self.strict_ns_unverifiable:
 951                _debug("   third-party Netscape cookie during "
 952                             "unverifiable transaction")
 953                return False
 954        return True
 955
 956    def set_ok_name(self, cookie, request):
 957        # Try and stop servers setting V0 cookies designed to hack other
 958        # servers that know both V0 and V1 protocols.
 959        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
 960            cookie.name.startswith("$")):
 961            _debug("   illegal name (starts with '$'): '%s'", cookie.name)
 962            return False
 963        return True
 964
 965    def set_ok_path(self, cookie, request):
 966        if cookie.path_specified:
 967            req_path = request_path(request)
 968            if ((cookie.version > 0 or
 969                 (cookie.version == 0 and self.strict_ns_set_path)) and
 970                not req_path.startswith(cookie.path)):
 971                _debug("   path attribute %s is not a prefix of request "
 972                       "path %s", cookie.path, req_path)
 973                return False
 974        return True
 975
 976    def set_ok_domain(self, cookie, request):
 977        if self.is_blocked(cookie.domain):
 978            _debug("   domain %s is in user block-list", cookie.domain)
 979            return False
 980        if self.is_not_allowed(cookie.domain):
 981            _debug("   domain %s is not in user allow-list", cookie.domain)
 982            return False
 983        if cookie.domain_specified:
 984            req_host, erhn = eff_request_host(request)
 985            domain = cookie.domain
 986            if self.strict_domain and (domain.count(".") >= 2):
 987                # XXX This should probably be compared with the Konqueror
 988                # (kcookiejar.cpp) and Mozilla implementations, but it's a
 989                # losing battle.
 990                i = domain.rfind(".")
 991                j = domain.rfind(".", 0, i)
 992                if j == 0:  # domain like .foo.bar
 993                    tld = domain[i+1:]
 994                    sld = domain[j+1:i]
 995                    if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
 996                       "gov", "mil", "int", "aero", "biz", "cat", "coop",
 997                       "info", "jobs", "mobi", "museum", "name", "pro",
 998                       "travel", "eu") and len(tld) == 2:
 999                        # domain like .co.uk
1000                        _debug("   country-code second level domain %s", domain)
1001                        return False
1002            if domain.startswith("."):
1003                undotted_domain = domain[1:]
1004            else:
1005                undotted_domain = domain
1006            embedded_dots = (undotted_domain.find(".") >= 0)
1007            if not embedded_dots and domain != ".local":
1008                _debug("   non-local domain %s contains no embedded dot",
1009                       domain)
1010                return False
1011            if cookie.version == 0:
1012                if (not erhn.endswith(domain) and
1013                    (not erhn.startswith(".") and
1014                     not ("."+erhn).endswith(domain))):
1015                    _debug("   effective request-host %s (even with added "
1016                           "initial dot) does not end end with %s",
1017                           erhn, domain)
1018                    return False
1019            if (cookie.version > 0 or
1020                (self.strict_ns_domain & self.DomainRFC2965Match)):
1021                if not domain_match(erhn, domain):
1022                    _debug("   effective request-host %s does not domain-match "
1023                           "%s", erhn, domain)
1024                    return False
1025            if (cookie.version > 0 or
1026                (self.strict_ns_domain & self.DomainStrictNoDots)):
1027                host_prefix = req_host[:-len(domain)]
1028                if (host_prefix.find(".") >= 0 and
1029                    not IPV4_RE.search(req_host)):
1030                    _debug("   host prefix %s for domain %s contains a dot",
1031                           host_prefix, domain)
1032                    return False
1033        return True
1034
1035    def set_ok_port(self, cookie, request):
1036        if cookie.port_specified:
1037            req_port = request_port(request)
1038            if req_port is None:
1039                req_port = "80"
1040            else:
1041                req_port = str(req_port)
1042            for p in cookie.port.split(","):
1043                try:
1044                    int(p)
1045                except ValueError:
1046                    _debug("   bad port %s (not numeric)", p)
1047                    return False
1048                if p == req_port:
1049                    break
1050            else:
1051                _debug("   request port (%s) not found in %s",
1052                       req_port, cookie.port)
1053                return False
1054        return True
1055
1056    def return_ok(self, cookie, request):
1057        """
1058        If you override .return_ok(), be sure to call this method.  If it
1059        returns false, so should your subclass (assuming your subclass wants to
1060        be more strict about which cookies to return).
1061
1062        """
1063        # Path has already been checked by .path_return_ok(), and domain
1064        # blocking done by .domain_return_ok().
1065        _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
1066
1067        for n in "version", "verifiability", "secure", "expires", "port", "domain":
1068            fn_name = "return_ok_"+n
1069            fn = getattr(self, fn_name)
1070            if not fn(cookie, request):
1071                return False
1072        return True
1073
1074    def return_ok_version(self, cookie, request):
1075        if cookie.version > 0 and not self.rfc2965:
1076            _debug("   RFC 2965 cookies are switched off")
1077            return False
1078        elif cookie.version == 0 and not self.netscape:
1079            _debug("   Netscape cookies are switched off")
1080            return False
1081        return True
1082
1083    def return_ok_verifiability(self, cookie, request):
1084        if request.is_unverifiable() and is_third_party(request):
1085            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
1086                _debug("   third-party RFC 2965 cookie during unverifiable "
1087                       "transaction")
1088                return False
1089            elif cookie.version == 0 and self.strict_ns_unverifiable:
1090                _debug("   third-party Netscape cookie during unverifiable "
1091                       "transaction")
1092                return False
1093        return True
1094
1095    def return_ok_secure(self, cookie, request):
1096        if cookie.secure and request.get_type() != "https":
1097            _debug("   secure cookie with non-secure request")
1098            return False
1099        return True
1100
1101    def return_ok_expires(self, cookie, request):
1102        if cookie.is_expired(self._now):
1103            _debug("   cookie expired")
1104            return False
1105        return True
1106
1107    def return_ok_port(self, cookie, request):
1108        if cookie.port:
1109            req_port = request_port(request)
1110            if req_port is None:
1111                req_port = "80"
1112            for p in cookie.port.split(","):
1113                if p == req_port:
1114                    break
1115            else:
1116                _debug("   request port %s does not match cookie port %s",
1117                       req_port, cookie.port)
1118                return False
1119        return True
1120
1121    def return_ok_domain(self, cookie, request):
1122        req_host, erhn = eff_request_host(request)
1123        domain = cookie.domain
1124
1125        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1126        if (cookie.version == 0 and
1127            (self.strict_ns_domain & self.DomainStrictNonDomain) and
1128            not cookie.domain_specified and domain != erhn):
1129            _debug("   cookie with unspecified domain does not string-compare "
1130                   "equal to request domain")
1131            return False
1132
1133        if cookie.version > 0 and not domain_match(erhn, domain):
1134            _debug("   effective request-host name %s does not domain-match "
1135                   "RFC 2965 cookie domain %s", erhn, domain)
1136            return False
1137        if cookie.version == 0 and not ("."+erhn).endswith(domain):
1138            _debug("   request-host %s does not match Netscape cookie domain "
1139                   "%s", req_host, domain)
1140            return False
1141        return True
1142
1143    def domain_return_ok(self, domain, request):
1144        # Liberal check of.  This is here as an optimization to avoid
1145        # having to load lots of MSIE cookie files unless necessary.
1146        req_host, erhn = eff_request_host(request)
1147        if not req_host.startswith("."):
1148            req_host = "."+req_host
1149        if not erhn.startswith("."):
1150            erhn = "."+erhn
1151        if not (req_host.endswith(domain) or erhn.endswith(domain)):
1152            #_debug("   request domain %s does not match cookie domain %s",
1153            #       req_host, domain)
1154            return False
1155
1156        if self.is_blocked(domain):
1157            _debug("   domain %s is in user block-list", domain)
1158            return False
1159        if self.is_not_allowed(domain):
1160            _debug("   domain %s is not in user allow-list", domain)
1161            return False
1162
1163        return True
1164
1165    def path_return_ok(self, path, request):
1166        _debug("- checking cookie path=%s", path)
1167        req_path = request_path(request)
1168        if not req_path.startswith(path):
1169            _debug("  %s does not path-match %s", req_path, path)
1170            return False
1171        return True
1172
1173
1174def vals_sorted_by_key(adict):
1175    keys = adict.keys()
1176    keys.sort()
1177    return map(adict.get, keys)
1178
1179def deepvalues(mapping):
1180    """Iterates over nested mapping, depth-first, in sorted order by key."""
1181    values = vals_sorted_by_key(mapping)
1182    for obj in values:
1183        mapping = False
1184        try:
1185            obj.items
1186        except AttributeError:
1187            pass
1188        else:
1189            mapping = True
1190            for subobj in deepvalues(obj):
1191                yield subobj
1192        if not mapping:
1193            yield obj
1194
1195
1196# Used as second parameter to dict.get() method, to distinguish absent
1197# dict key from one with a None value.
1198class Absent: pass
1199
1200class CookieJar:
1201    """Collection of HTTP cookies.
1202
1203    You may not need to know about this class: try
1204    urllib2.build_opener(HTTPCookieProcessor).open(url).
1205
1206    """
1207
1208    non_word_re = re.compile(r"\W")
1209    quote_re = re.compile(r"([\"\\])")
1210    strict_domain_re = re.compile(r"\.?[^.]*")
1211    domain_re = re.compile(r"[^.]*")
1212    dots_re = re.compile(r"^\.+")
1213
1214    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1215
1216    def __init__(self, policy=None):
1217        if policy is None:
1218            policy = DefaultCookiePolicy()
1219        self._policy = policy
1220
1221        self._cookies_lock = _threading.RLock()
1222        self._cookies = {}
1223
1224    def set_policy(self, policy):
1225        self._policy = policy
1226
1227    def _cookies_for_domain(self, domain, request):
1228        cookies = []
1229        if not self._policy.domain_return_ok(domain, request):
1230            return []
1231        _debug("Checking %s for cookies to return", domain)
1232        cookies_by_path = self._cookies[domain]
1233        for path in cookies_by_path.keys():
1234            if not self._policy.path_return_ok(path, request):
1235                continue
1236            cookies_by_name = cookies_by_path[path]
1237            for cookie in cookies_by_name.values():
1238                if not self._policy.return_ok(cookie, request):
1239                    _debug("   not returning cookie")
1240                    continue
1241                _debug("   it's a match")
1242                cookies.append(cookie)
1243        return cookies
1244
1245    def _cookies_for_request(self, request):
1246        """Return a list of cookies to be returned to server."""
1247        cookies = []
1248        for domain in self._cookies.keys():
1249            cookies.extend(self._cookies_for_domain(domain, request))
1250        return cookies
1251
1252    def _cookie_attrs(self, cookies):
1253        """Return a list of cookie-attributes to be returned to server.
1254
1255        like ['foo="bar"; $Path="/"', ...]
1256
1257        The $Version attribute is also added when appropriate (currently only
1258        once per request).
1259
1260        """
1261        # add cookies in order of most specific (ie. longest) path first
1262        cookies.sort(key=lambda arg: len(arg.path), reverse=True)
1263
1264        version_set = False
1265
1266        attrs = []
1267        for cookie in cookies:
1268            # set version of Cookie header
1269            # XXX
1270            # What should it be if multiple matching Set-Cookie headers have
1271            #  different versions themselves?
1272            # Answer: there is no answer; was supposed to be settled by
1273            #  RFC 2965 errata, but that may never appear...
1274            version = cookie.version
1275            if not version_set:
1276                version_set = True
1277                if version > 0:
1278                    attrs.append("$Version=%s" % version)
1279
1280            # quote cookie value if necessary
1281            # (not for Netscape protocol, which already has any quotes
1282            #  intact, due to the poorly-specified Netscape Cookie: syntax)
1283            if ((cookie.value is not None) and
1284                self.non_word_re.search(cookie.value) and version > 0):
1285                value = self.quote_re.sub(r"\\\1", cookie.value)
1286            else:
1287                value = cookie.value
1288
1289            # add cookie-attributes to be returned in Cookie header
1290            if cookie.value is None:
1291                attrs.append(cookie.name)
1292            else:
1293                attrs.append("%s=%s" % (cookie.name, value))
1294            if version > 0:
1295                if cookie.path_specified:
1296                    attrs.append('$Path="%s"' % cookie.path)
1297                if cookie.domain.startswith("."):
1298                    domain = cookie.domain
1299                    if (not cookie.domain_initial_dot and
1300                        domain.startswith(".")):
1301                        domain = domain[1:]
1302                    attrs.append('$Domain="%s"' % domain)
1303                if cookie.port is not None:
1304                    p = "$Port"
1305                    if cookie.port_specified:
1306                        p = p + ('="%s"' % cookie.port)
1307                    attrs.append(p)
1308
1309        return attrs
1310
1311    def add_cookie_header(self, request):
1312        """Add correct Cookie: header to request (urllib2.Request object).
1313
1314        The Cookie2 header is also added unless policy.hide_cookie2 is true.
1315
1316        """
1317        _debug("add_cookie_header")
1318        self._cookies_lock.acquire()
1319        try:
1320
1321            self._policy._now = self._now = int(time.time())
1322
1323            cookies = self._cookies_for_request(request)
1324
1325            attrs = self._cookie_attrs(cookies)
1326            if attrs:
1327                if not request.has_header("Cookie"):
1328                    request.add_unredirected_header(
1329                        "Cookie", "; ".join(attrs))
1330
1331            # if necessary, advertise that we know RFC 2965
1332            if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1333                not request.has_header("Cookie2")):
1334                for cookie in cookies:
1335                    if cookie.version != 1:
1336                        request.add_unredirected_header("Cookie2", '$Version="1"')
1337                        break
1338
1339        finally:
1340            self._cookies_lock.release()
1341
1342        self.clear_expired_cookies()
1343
1344    def _normalized_cookie_tuples(self, attrs_set):
1345        """Return list of tuples containing normalised cookie information.
1346
1347        attrs_set is the list of lists of key,value pairs extracted from
1348        the Set-Cookie or Set-Cookie2 headers.
1349
1350        Tuples are name, value, standard, rest, where name and value are the
1351        cookie name and value, standard is a dictionary containing the standard
1352        cookie-attributes (discard, secure, version, expires or max-age,
1353        domain, path and port) and rest is a dictionary containing the rest of
1354        the cookie-attributes.
1355
1356        """
1357        cookie_tuples = []
1358
1359        boolean_attrs = "discard", "secure"
1360        value_attrs = ("version",
1361                       "expires", "max-age",
1362                       "domain", "path", "port",
1363                       "comment", "commenturl")
1364
1365        for cookie_attrs in attrs_set:
1366            name, value = cookie_attrs[0]
1367
1368            # Build dictionary of standard cookie-attributes (standard) and
1369            # dictionary of other cookie-attributes (rest).
1370
1371            # Note: expiry time is normalised to seconds since epoch.  V0
1372            # cookies should have the Expires cookie-attribute, and V1 cookies
1373            # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1374            # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1375            # accept either (but prefer Max-Age).
1376            max_age_set = False
1377
1378            bad_cookie = False
1379
1380            standard = {}
1381            rest = {}
1382            for k, v in cookie_attrs[1:]:
1383                lc = k.lower()
1384                # don't lose case distinction for unknown fields
1385                if lc in value_attrs or lc in boolean_attrs:
1386                    k = lc
1387                if k in boolean_attrs and v is None:
1388                    # boolean cookie-attribute is present, but has no value
1389                    # (like "discard", rather than "port=80")
1390                    v = True
1391                if k in standard:
1392                    # only first value is significant
1393                    continue
1394                if k == "domain":
1395                    if v is None:
1396                        _debug("   missing value for domain attribute")
1397                        bad_cookie = True
1398                        break
1399                    # RFC 2965 sectio…

Large files files are truncated, but you can click here to view the full file