PageRenderTime 60ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/External.LCA_RESTRICTED/Languages/IronPython/27/Lib/httplib.py

http://github.com/IronLanguages/main
Python | 1445 lines | 1353 code | 21 blank | 71 comment | 33 complexity | f23a712cc5f4289dcc5883546d14dbcb MD5 | raw file
Possible License(s): CPL-1.0, BSD-3-Clause, ISC, GPL-2.0, MPL-2.0-no-copyleft-exception

Large files files are truncated, but you can click here to view the full file

  1. r"""HTTP/1.1 client library
  2. <intro stuff goes here>
  3. <other stuff, too>
  4. HTTPConnection goes through a number of "states", which define when a client
  5. may legally make another request or fetch the response for a particular
  6. request. This diagram details these state transitions:
  7. (null)
  8. |
  9. | HTTPConnection()
  10. v
  11. Idle
  12. |
  13. | putrequest()
  14. v
  15. Request-started
  16. |
  17. | ( putheader() )* endheaders()
  18. v
  19. Request-sent
  20. |
  21. | response = getresponse()
  22. v
  23. Unread-response [Response-headers-read]
  24. |\____________________
  25. | |
  26. | response.read() | putrequest()
  27. v v
  28. Idle Req-started-unread-response
  29. ______/|
  30. / |
  31. response.read() | | ( putheader() )* endheaders()
  32. v v
  33. Request-started Req-sent-unread-response
  34. |
  35. | response.read()
  36. v
  37. Request-sent
  38. This diagram presents the following rules:
  39. -- a second request may not be started until {response-headers-read}
  40. -- a response [object] cannot be retrieved until {request-sent}
  41. -- there is no differentiation between an unread response body and a
  42. partially read response body
  43. Note: this enforcement is applied by the HTTPConnection class. The
  44. HTTPResponse class does not enforce this state machine, which
  45. implies sophisticated clients may accelerate the request/response
  46. pipeline. Caution should be taken, though: accelerating the states
  47. beyond the above pattern may imply knowledge of the server's
  48. connection-close behavior for certain requests. For example, it
  49. is impossible to tell whether the server will close the connection
  50. UNTIL the response headers have been read; this means that further
  51. requests cannot be placed into the pipeline until it is known that
  52. the server will NOT be closing the connection.
  53. Logical State __state __response
  54. ------------- ------- ----------
  55. Idle _CS_IDLE None
  56. Request-started _CS_REQ_STARTED None
  57. Request-sent _CS_REQ_SENT None
  58. Unread-response _CS_IDLE <response_class>
  59. Req-started-unread-response _CS_REQ_STARTED <response_class>
  60. Req-sent-unread-response _CS_REQ_SENT <response_class>
  61. """
  62. from array import array
  63. import os
  64. import re
  65. import socket
  66. from sys import py3kwarning
  67. from urlparse import urlsplit
  68. import warnings
  69. with warnings.catch_warnings():
  70. if py3kwarning:
  71. warnings.filterwarnings("ignore", ".*mimetools has been removed",
  72. DeprecationWarning)
  73. import mimetools
  74. try:
  75. from cStringIO import StringIO
  76. except ImportError:
  77. from StringIO import StringIO
  78. __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  79. "HTTPException", "NotConnected", "UnknownProtocol",
  80. "UnknownTransferEncoding", "UnimplementedFileMode",
  81. "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  82. "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  83. "BadStatusLine", "error", "responses"]
  84. HTTP_PORT = 80
  85. HTTPS_PORT = 443
  86. _UNKNOWN = 'UNKNOWN'
  87. # connection states
  88. _CS_IDLE = 'Idle'
  89. _CS_REQ_STARTED = 'Request-started'
  90. _CS_REQ_SENT = 'Request-sent'
  91. # status codes
  92. # informational
  93. CONTINUE = 100
  94. SWITCHING_PROTOCOLS = 101
  95. PROCESSING = 102
  96. # successful
  97. OK = 200
  98. CREATED = 201
  99. ACCEPTED = 202
  100. NON_AUTHORITATIVE_INFORMATION = 203
  101. NO_CONTENT = 204
  102. RESET_CONTENT = 205
  103. PARTIAL_CONTENT = 206
  104. MULTI_STATUS = 207
  105. IM_USED = 226
  106. # redirection
  107. MULTIPLE_CHOICES = 300
  108. MOVED_PERMANENTLY = 301
  109. FOUND = 302
  110. SEE_OTHER = 303
  111. NOT_MODIFIED = 304
  112. USE_PROXY = 305
  113. TEMPORARY_REDIRECT = 307
  114. # client error
  115. BAD_REQUEST = 400
  116. UNAUTHORIZED = 401
  117. PAYMENT_REQUIRED = 402
  118. FORBIDDEN = 403
  119. NOT_FOUND = 404
  120. METHOD_NOT_ALLOWED = 405
  121. NOT_ACCEPTABLE = 406
  122. PROXY_AUTHENTICATION_REQUIRED = 407
  123. REQUEST_TIMEOUT = 408
  124. CONFLICT = 409
  125. GONE = 410
  126. LENGTH_REQUIRED = 411
  127. PRECONDITION_FAILED = 412
  128. REQUEST_ENTITY_TOO_LARGE = 413
  129. REQUEST_URI_TOO_LONG = 414
  130. UNSUPPORTED_MEDIA_TYPE = 415
  131. REQUESTED_RANGE_NOT_SATISFIABLE = 416
  132. EXPECTATION_FAILED = 417
  133. UNPROCESSABLE_ENTITY = 422
  134. LOCKED = 423
  135. FAILED_DEPENDENCY = 424
  136. UPGRADE_REQUIRED = 426
  137. # server error
  138. INTERNAL_SERVER_ERROR = 500
  139. NOT_IMPLEMENTED = 501
  140. BAD_GATEWAY = 502
  141. SERVICE_UNAVAILABLE = 503
  142. GATEWAY_TIMEOUT = 504
  143. HTTP_VERSION_NOT_SUPPORTED = 505
  144. INSUFFICIENT_STORAGE = 507
  145. NOT_EXTENDED = 510
  146. # Mapping status codes to official W3C names
  147. responses = {
  148. 100: 'Continue',
  149. 101: 'Switching Protocols',
  150. 200: 'OK',
  151. 201: 'Created',
  152. 202: 'Accepted',
  153. 203: 'Non-Authoritative Information',
  154. 204: 'No Content',
  155. 205: 'Reset Content',
  156. 206: 'Partial Content',
  157. 300: 'Multiple Choices',
  158. 301: 'Moved Permanently',
  159. 302: 'Found',
  160. 303: 'See Other',
  161. 304: 'Not Modified',
  162. 305: 'Use Proxy',
  163. 306: '(Unused)',
  164. 307: 'Temporary Redirect',
  165. 400: 'Bad Request',
  166. 401: 'Unauthorized',
  167. 402: 'Payment Required',
  168. 403: 'Forbidden',
  169. 404: 'Not Found',
  170. 405: 'Method Not Allowed',
  171. 406: 'Not Acceptable',
  172. 407: 'Proxy Authentication Required',
  173. 408: 'Request Timeout',
  174. 409: 'Conflict',
  175. 410: 'Gone',
  176. 411: 'Length Required',
  177. 412: 'Precondition Failed',
  178. 413: 'Request Entity Too Large',
  179. 414: 'Request-URI Too Long',
  180. 415: 'Unsupported Media Type',
  181. 416: 'Requested Range Not Satisfiable',
  182. 417: 'Expectation Failed',
  183. 500: 'Internal Server Error',
  184. 501: 'Not Implemented',
  185. 502: 'Bad Gateway',
  186. 503: 'Service Unavailable',
  187. 504: 'Gateway Timeout',
  188. 505: 'HTTP Version Not Supported',
  189. }
  190. # maximal amount of data to read at one time in _safe_read
  191. MAXAMOUNT = 1048576
  192. # maximal line length when calling readline().
  193. _MAXLINE = 65536
  194. # maximum amount of headers accepted
  195. _MAXHEADERS = 100
  196. # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
  197. #
  198. # VCHAR = %x21-7E
  199. # obs-text = %x80-FF
  200. # header-field = field-name ":" OWS field-value OWS
  201. # field-name = token
  202. # field-value = *( field-content / obs-fold )
  203. # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  204. # field-vchar = VCHAR / obs-text
  205. #
  206. # obs-fold = CRLF 1*( SP / HTAB )
  207. # ; obsolete line folding
  208. # ; see Section 3.2.4
  209. # token = 1*tchar
  210. #
  211. # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
  212. # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
  213. # / DIGIT / ALPHA
  214. # ; any VCHAR, except delimiters
  215. #
  216. # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
  217. # the patterns for both name and value are more leniant than RFC
  218. # definitions to allow for backwards compatibility
  219. _is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
  220. _is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
  221. # We always set the Content-Length header for these methods because some
  222. # servers will otherwise respond with a 411
  223. _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
  224. class HTTPMessage(mimetools.Message):
  225. def addheader(self, key, value):
  226. """Add header for field key handling repeats."""
  227. prev = self.dict.get(key)
  228. if prev is None:
  229. self.dict[key] = value
  230. else:
  231. combined = ", ".join((prev, value))
  232. self.dict[key] = combined
  233. def addcontinue(self, key, more):
  234. """Add more field data from a continuation line."""
  235. prev = self.dict[key]
  236. self.dict[key] = prev + "\n " + more
  237. def readheaders(self):
  238. """Read header lines.
  239. Read header lines up to the entirely blank line that terminates them.
  240. The (normally blank) line that ends the headers is skipped, but not
  241. included in the returned list. If a non-header line ends the headers,
  242. (which is an error), an attempt is made to backspace over it; it is
  243. never included in the returned list.
  244. The variable self.status is set to the empty string if all went well,
  245. otherwise it is an error message. The variable self.headers is a
  246. completely uninterpreted list of lines contained in the header (so
  247. printing them will reproduce the header exactly as it appears in the
  248. file).
  249. If multiple header fields with the same name occur, they are combined
  250. according to the rules in RFC 2616 sec 4.2:
  251. Appending each subsequent field-value to the first, each separated
  252. by a comma. The order in which header fields with the same field-name
  253. are received is significant to the interpretation of the combined
  254. field value.
  255. """
  256. # XXX The implementation overrides the readheaders() method of
  257. # rfc822.Message. The base class design isn't amenable to
  258. # customized behavior here so the method here is a copy of the
  259. # base class code with a few small changes.
  260. self.dict = {}
  261. self.unixfrom = ''
  262. self.headers = hlist = []
  263. self.status = ''
  264. headerseen = ""
  265. firstline = 1
  266. startofline = unread = tell = None
  267. if hasattr(self.fp, 'unread'):
  268. unread = self.fp.unread
  269. elif self.seekable:
  270. tell = self.fp.tell
  271. while True:
  272. if len(hlist) > _MAXHEADERS:
  273. raise HTTPException("got more than %d headers" % _MAXHEADERS)
  274. if tell:
  275. try:
  276. startofline = tell()
  277. except IOError:
  278. startofline = tell = None
  279. self.seekable = 0
  280. line = self.fp.readline(_MAXLINE + 1)
  281. if len(line) > _MAXLINE:
  282. raise LineTooLong("header line")
  283. if not line:
  284. self.status = 'EOF in headers'
  285. break
  286. # Skip unix From name time lines
  287. if firstline and line.startswith('From '):
  288. self.unixfrom = self.unixfrom + line
  289. continue
  290. firstline = 0
  291. if headerseen and line[0] in ' \t':
  292. # XXX Not sure if continuation lines are handled properly
  293. # for http and/or for repeating headers
  294. # It's a continuation line.
  295. hlist.append(line)
  296. self.addcontinue(headerseen, line.strip())
  297. continue
  298. elif self.iscomment(line):
  299. # It's a comment. Ignore it.
  300. continue
  301. elif self.islast(line):
  302. # Note! No pushback here! The delimiter line gets eaten.
  303. break
  304. headerseen = self.isheader(line)
  305. if headerseen:
  306. # It's a legal header line, save it.
  307. hlist.append(line)
  308. self.addheader(headerseen, line[len(headerseen)+1:].strip())
  309. continue
  310. elif headerseen is not None:
  311. # An empty header name. These aren't allowed in HTTP, but it's
  312. # probably a benign mistake. Don't add the header, just keep
  313. # going.
  314. continue
  315. else:
  316. # It's not a header line; throw it back and stop here.
  317. if not self.dict:
  318. self.status = 'No headers'
  319. else:
  320. self.status = 'Non-header line where header expected'
  321. # Try to undo the read.
  322. if unread:
  323. unread(line)
  324. elif tell:
  325. self.fp.seek(startofline)
  326. else:
  327. self.status = self.status + '; bad seek'
  328. break
  329. class HTTPResponse:
  330. # strict: If true, raise BadStatusLine if the status line can't be
  331. # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
  332. # false because it prevents clients from talking to HTTP/0.9
  333. # servers. Note that a response with a sufficiently corrupted
  334. # status line will look like an HTTP/0.9 response.
  335. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
  336. def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
  337. if buffering:
  338. # The caller won't be using any sock.recv() calls, so buffering
  339. # is fine and recommended for performance.
  340. self.fp = sock.makefile('rb')
  341. else:
  342. # The buffer size is specified as zero, because the headers of
  343. # the response are read with readline(). If the reads were
  344. # buffered the readline() calls could consume some of the
  345. # response, which make be read via a recv() on the underlying
  346. # socket.
  347. self.fp = sock.makefile('rb', 0)
  348. self.debuglevel = debuglevel
  349. self.strict = strict
  350. self._method = method
  351. self.msg = None
  352. # from the Status-Line of the response
  353. self.version = _UNKNOWN # HTTP-Version
  354. self.status = _UNKNOWN # Status-Code
  355. self.reason = _UNKNOWN # Reason-Phrase
  356. self.chunked = _UNKNOWN # is "chunked" being used?
  357. self.chunk_left = _UNKNOWN # bytes left to read in current chunk
  358. self.length = _UNKNOWN # number of bytes left in response
  359. self.will_close = _UNKNOWN # conn will close at end of response
  360. def _read_status(self):
  361. # Initialize with Simple-Response defaults
  362. line = self.fp.readline(_MAXLINE + 1)
  363. if len(line) > _MAXLINE:
  364. raise LineTooLong("header line")
  365. if self.debuglevel > 0:
  366. print "reply:", repr(line)
  367. if not line:
  368. # Presumably, the server closed the connection before
  369. # sending a valid response.
  370. raise BadStatusLine(line)
  371. try:
  372. [version, status, reason] = line.split(None, 2)
  373. except ValueError:
  374. try:
  375. [version, status] = line.split(None, 1)
  376. reason = ""
  377. except ValueError:
  378. # empty version will cause next test to fail and status
  379. # will be treated as 0.9 response.
  380. version = ""
  381. if not version.startswith('HTTP/'):
  382. if self.strict:
  383. self.close()
  384. raise BadStatusLine(line)
  385. else:
  386. # assume it's a Simple-Response from an 0.9 server
  387. self.fp = LineAndFileWrapper(line, self.fp)
  388. return "HTTP/0.9", 200, ""
  389. # The status code is a three-digit number
  390. try:
  391. status = int(status)
  392. if status < 100 or status > 999:
  393. raise BadStatusLine(line)
  394. except ValueError:
  395. raise BadStatusLine(line)
  396. return version, status, reason
  397. def begin(self):
  398. if self.msg is not None:
  399. # we've already started reading the response
  400. return
  401. # read until we get a non-100 response
  402. while True:
  403. version, status, reason = self._read_status()
  404. if status != CONTINUE:
  405. break
  406. # skip the header from the 100 response
  407. while True:
  408. skip = self.fp.readline(_MAXLINE + 1)
  409. if len(skip) > _MAXLINE:
  410. raise LineTooLong("header line")
  411. skip = skip.strip()
  412. if not skip:
  413. break
  414. if self.debuglevel > 0:
  415. print "header:", skip
  416. self.status = status
  417. self.reason = reason.strip()
  418. if version == 'HTTP/1.0':
  419. self.version = 10
  420. elif version.startswith('HTTP/1.'):
  421. self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
  422. elif version == 'HTTP/0.9':
  423. self.version = 9
  424. else:
  425. raise UnknownProtocol(version)
  426. if self.version == 9:
  427. self.length = None
  428. self.chunked = 0
  429. self.will_close = 1
  430. self.msg = HTTPMessage(StringIO())
  431. return
  432. self.msg = HTTPMessage(self.fp, 0)
  433. if self.debuglevel > 0:
  434. for hdr in self.msg.headers:
  435. print "header:", hdr,
  436. # don't let the msg keep an fp
  437. self.msg.fp = None
  438. # are we using the chunked-style of transfer encoding?
  439. tr_enc = self.msg.getheader('transfer-encoding')
  440. if tr_enc and tr_enc.lower() == "chunked":
  441. self.chunked = 1
  442. self.chunk_left = None
  443. else:
  444. self.chunked = 0
  445. # will the connection close at the end of the response?
  446. self.will_close = self._check_close()
  447. # do we have a Content-Length?
  448. # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
  449. length = self.msg.getheader('content-length')
  450. if length and not self.chunked:
  451. try:
  452. self.length = int(length)
  453. except ValueError:
  454. self.length = None
  455. else:
  456. if self.length < 0: # ignore nonsensical negative lengths
  457. self.length = None
  458. else:
  459. self.length = None
  460. # does the body have a fixed length? (of zero)
  461. if (status == NO_CONTENT or status == NOT_MODIFIED or
  462. 100 <= status < 200 or # 1xx codes
  463. self._method == 'HEAD'):
  464. self.length = 0
  465. # if the connection remains open, and we aren't using chunked, and
  466. # a content-length was not provided, then assume that the connection
  467. # WILL close.
  468. if not self.will_close and \
  469. not self.chunked and \
  470. self.length is None:
  471. self.will_close = 1
  472. def _check_close(self):
  473. conn = self.msg.getheader('connection')
  474. if self.version == 11:
  475. # An HTTP/1.1 proxy is assumed to stay open unless
  476. # explicitly closed.
  477. conn = self.msg.getheader('connection')
  478. if conn and "close" in conn.lower():
  479. return True
  480. return False
  481. # Some HTTP/1.0 implementations have support for persistent
  482. # connections, using rules different than HTTP/1.1.
  483. # For older HTTP, Keep-Alive indicates persistent connection.
  484. if self.msg.getheader('keep-alive'):
  485. return False
  486. # At least Akamai returns a "Connection: Keep-Alive" header,
  487. # which was supposed to be sent by the client.
  488. if conn and "keep-alive" in conn.lower():
  489. return False
  490. # Proxy-Connection is a netscape hack.
  491. pconn = self.msg.getheader('proxy-connection')
  492. if pconn and "keep-alive" in pconn.lower():
  493. return False
  494. # otherwise, assume it will close
  495. return True
  496. def close(self):
  497. fp = self.fp
  498. if fp:
  499. self.fp = None
  500. fp.close()
  501. def isclosed(self):
  502. # NOTE: it is possible that we will not ever call self.close(). This
  503. # case occurs when will_close is TRUE, length is None, and we
  504. # read up to the last byte, but NOT past it.
  505. #
  506. # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
  507. # called, meaning self.isclosed() is meaningful.
  508. return self.fp is None
  509. # XXX It would be nice to have readline and __iter__ for this, too.
  510. def read(self, amt=None):
  511. if self.fp is None:
  512. return ''
  513. if self._method == 'HEAD':
  514. self.close()
  515. return ''
  516. if self.chunked:
  517. return self._read_chunked(amt)
  518. if amt is None:
  519. # unbounded read
  520. if self.length is None:
  521. s = self.fp.read()
  522. else:
  523. try:
  524. s = self._safe_read(self.length)
  525. except IncompleteRead:
  526. self.close()
  527. raise
  528. self.length = 0
  529. self.close() # we read everything
  530. return s
  531. if self.length is not None:
  532. if amt > self.length:
  533. # clip the read to the "end of response"
  534. amt = self.length
  535. # we do not use _safe_read() here because this may be a .will_close
  536. # connection, and the user is reading more bytes than will be provided
  537. # (for example, reading in 1k chunks)
  538. s = self.fp.read(amt)
  539. if not s and amt:
  540. # Ideally, we would raise IncompleteRead if the content-length
  541. # wasn't satisfied, but it might break compatibility.
  542. self.close()
  543. if self.length is not None:
  544. self.length -= len(s)
  545. if not self.length:
  546. self.close()
  547. return s
  548. def _read_chunked(self, amt):
  549. assert self.chunked != _UNKNOWN
  550. chunk_left = self.chunk_left
  551. value = []
  552. while True:
  553. if chunk_left is None:
  554. line = self.fp.readline(_MAXLINE + 1)
  555. if len(line) > _MAXLINE:
  556. raise LineTooLong("chunk size")
  557. i = line.find(';')
  558. if i >= 0:
  559. line = line[:i] # strip chunk-extensions
  560. try:
  561. chunk_left = int(line, 16)
  562. except ValueError:
  563. # close the connection as protocol synchronisation is
  564. # probably lost
  565. self.close()
  566. raise IncompleteRead(''.join(value))
  567. if chunk_left == 0:
  568. break
  569. if amt is None:
  570. value.append(self._safe_read(chunk_left))
  571. elif amt < chunk_left:
  572. value.append(self._safe_read(amt))
  573. self.chunk_left = chunk_left - amt
  574. return ''.join(value)
  575. elif amt == chunk_left:
  576. value.append(self._safe_read(amt))
  577. self._safe_read(2) # toss the CRLF at the end of the chunk
  578. self.chunk_left = None
  579. return ''.join(value)
  580. else:
  581. value.append(self._safe_read(chunk_left))
  582. amt -= chunk_left
  583. # we read the whole chunk, get another
  584. self._safe_read(2) # toss the CRLF at the end of the chunk
  585. chunk_left = None
  586. # read and discard trailer up to the CRLF terminator
  587. ### note: we shouldn't have any trailers!
  588. while True:
  589. line = self.fp.readline(_MAXLINE + 1)
  590. if len(line) > _MAXLINE:
  591. raise LineTooLong("trailer line")
  592. if not line:
  593. # a vanishingly small number of sites EOF without
  594. # sending the trailer
  595. break
  596. if line == '\r\n':
  597. break
  598. # we read everything; close the "file"
  599. self.close()
  600. return ''.join(value)
  601. def _safe_read(self, amt):
  602. """Read the number of bytes requested, compensating for partial reads.
  603. Normally, we have a blocking socket, but a read() can be interrupted
  604. by a signal (resulting in a partial read).
  605. Note that we cannot distinguish between EOF and an interrupt when zero
  606. bytes have been read. IncompleteRead() will be raised in this
  607. situation.
  608. This function should be used when <amt> bytes "should" be present for
  609. reading. If the bytes are truly not available (due to EOF), then the
  610. IncompleteRead exception can be used to detect the problem.
  611. """
  612. # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
  613. # return less than x bytes unless EOF is encountered. It now handles
  614. # signal interruptions (socket.error EINTR) internally. This code
  615. # never caught that exception anyways. It seems largely pointless.
  616. # self.fp.read(amt) will work fine.
  617. s = []
  618. while amt > 0:
  619. chunk = self.fp.read(min(amt, MAXAMOUNT))
  620. if not chunk:
  621. raise IncompleteRead(''.join(s), amt)
  622. s.append(chunk)
  623. amt -= len(chunk)
  624. return ''.join(s)
  625. def fileno(self):
  626. return self.fp.fileno()
  627. def getheader(self, name, default=None):
  628. if self.msg is None:
  629. raise ResponseNotReady()
  630. return self.msg.getheader(name, default)
  631. def getheaders(self):
  632. """Return list of (header, value) tuples."""
  633. if self.msg is None:
  634. raise ResponseNotReady()
  635. return self.msg.items()
  636. class HTTPConnection:
  637. _http_vsn = 11
  638. _http_vsn_str = 'HTTP/1.1'
  639. response_class = HTTPResponse
  640. default_port = HTTP_PORT
  641. auto_open = 1
  642. debuglevel = 0
  643. strict = 0
  644. def __init__(self, host, port=None, strict=None,
  645. timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
  646. self.timeout = timeout
  647. self.source_address = source_address
  648. self.sock = None
  649. self._buffer = []
  650. self.__response = None
  651. self.__state = _CS_IDLE
  652. self._method = None
  653. self._tunnel_host = None
  654. self._tunnel_port = None
  655. self._tunnel_headers = {}
  656. if strict is not None:
  657. self.strict = strict
  658. (self.host, self.port) = self._get_hostport(host, port)
  659. # This is stored as an instance variable to allow unittests
  660. # to replace with a suitable mock
  661. self._create_connection = socket.create_connection
  662. def set_tunnel(self, host, port=None, headers=None):
  663. """ Set up host and port for HTTP CONNECT tunnelling.
  664. In a connection that uses HTTP Connect tunneling, the host passed to the
  665. constructor is used as proxy server that relays all communication to the
  666. endpoint passed to set_tunnel. This is done by sending a HTTP CONNECT
  667. request to the proxy server when the connection is established.
  668. This method must be called before the HTTP connection has been
  669. established.
  670. The headers argument should be a mapping of extra HTTP headers
  671. to send with the CONNECT request.
  672. """
  673. # Verify if this is required.
  674. if self.sock:
  675. raise RuntimeError("Can't setup tunnel for established connection.")
  676. self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
  677. if headers:
  678. self._tunnel_headers = headers
  679. else:
  680. self._tunnel_headers.clear()
  681. def _get_hostport(self, host, port):
  682. if port is None:
  683. i = host.rfind(':')
  684. j = host.rfind(']') # ipv6 addresses have [...]
  685. if i > j:
  686. try:
  687. port = int(host[i+1:])
  688. except ValueError:
  689. if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
  690. port = self.default_port
  691. else:
  692. raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
  693. host = host[:i]
  694. else:
  695. port = self.default_port
  696. if host and host[0] == '[' and host[-1] == ']':
  697. host = host[1:-1]
  698. return (host, port)
  699. def set_debuglevel(self, level):
  700. self.debuglevel = level
  701. def _tunnel(self):
  702. self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
  703. self._tunnel_port))
  704. for header, value in self._tunnel_headers.iteritems():
  705. self.send("%s: %s\r\n" % (header, value))
  706. self.send("\r\n")
  707. response = self.response_class(self.sock, strict = self.strict,
  708. method = self._method)
  709. (version, code, message) = response._read_status()
  710. if version == "HTTP/0.9":
  711. # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has
  712. # concluded HTTP/0.9 is being used something has gone wrong.
  713. self.close()
  714. raise socket.error("Invalid response from tunnel request")
  715. if code != 200:
  716. self.close()
  717. raise socket.error("Tunnel connection failed: %d %s" % (code,
  718. message.strip()))
  719. while True:
  720. line = response.fp.readline(_MAXLINE + 1)
  721. if len(line) > _MAXLINE:
  722. raise LineTooLong("header line")
  723. if not line:
  724. # for sites which EOF without sending trailer
  725. break
  726. if line == '\r\n':
  727. break
  728. def connect(self):
  729. """Connect to the host and port specified in __init__."""
  730. self.sock = self._create_connection((self.host,self.port),
  731. self.timeout, self.source_address)
  732. if self._tunnel_host:
  733. self._tunnel()
  734. def close(self):
  735. """Close the connection to the HTTP server."""
  736. self.__state = _CS_IDLE
  737. try:
  738. sock = self.sock
  739. if sock:
  740. self.sock = None
  741. sock.close() # close it manually... there may be other refs
  742. finally:
  743. response = self.__response
  744. if response:
  745. self.__response = None
  746. response.close()
  747. def send(self, data):
  748. """Send `data' to the server."""
  749. if self.sock is None:
  750. if self.auto_open:
  751. self.connect()
  752. else:
  753. raise NotConnected()
  754. if self.debuglevel > 0:
  755. print "send:", repr(data)
  756. blocksize = 8192
  757. if hasattr(data,'read') and not isinstance(data, array):
  758. if self.debuglevel > 0: print "sendIng a read()able"
  759. datablock = data.read(blocksize)
  760. while datablock:
  761. self.sock.sendall(datablock)
  762. datablock = data.read(blocksize)
  763. else:
  764. self.sock.sendall(data)
  765. def _output(self, s):
  766. """Add a line of output to the current request buffer.
  767. Assumes that the line does *not* end with \\r\\n.
  768. """
  769. self._buffer.append(s)
  770. def _send_output(self, message_body=None):
  771. """Send the currently buffered request and clear the buffer.
  772. Appends an extra \\r\\n to the buffer.
  773. A message_body may be specified, to be appended to the request.
  774. """
  775. self._buffer.extend(("", ""))
  776. msg = "\r\n".join(self._buffer)
  777. del self._buffer[:]
  778. # If msg and message_body are sent in a single send() call,
  779. # it will avoid performance problems caused by the interaction
  780. # between delayed ack and the Nagle algorithm.
  781. if isinstance(message_body, str):
  782. msg += message_body
  783. message_body = None
  784. self.send(msg)
  785. if message_body is not None:
  786. #message_body was not a string (i.e. it is a file) and
  787. #we must run the risk of Nagle
  788. self.send(message_body)
  789. def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
  790. """Send a request to the server.
  791. `method' specifies an HTTP request method, e.g. 'GET'.
  792. `url' specifies the object being requested, e.g. '/index.html'.
  793. `skip_host' if True does not add automatically a 'Host:' header
  794. `skip_accept_encoding' if True does not add automatically an
  795. 'Accept-Encoding:' header
  796. """
  797. # if a prior response has been completed, then forget about it.
  798. if self.__response and self.__response.isclosed():
  799. self.__response = None
  800. # in certain cases, we cannot issue another request on this connection.
  801. # this occurs when:
  802. # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
  803. # 2) a response to a previous request has signalled that it is going
  804. # to close the connection upon completion.
  805. # 3) the headers for the previous response have not been read, thus
  806. # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
  807. #
  808. # if there is no prior response, then we can request at will.
  809. #
  810. # if point (2) is true, then we will have passed the socket to the
  811. # response (effectively meaning, "there is no prior response"), and
  812. # will open a new one when a new request is made.
  813. #
  814. # Note: if a prior response exists, then we *can* start a new request.
  815. # We are not allowed to begin fetching the response to this new
  816. # request, however, until that prior response is complete.
  817. #
  818. if self.__state == _CS_IDLE:
  819. self.__state = _CS_REQ_STARTED
  820. else:
  821. raise CannotSendRequest()
  822. # Save the method we use, we need it later in the response phase
  823. self._method = method
  824. if not url:
  825. url = '/'
  826. hdr = '%s %s %s' % (method, url, self._http_vsn_str)
  827. self._output(hdr)
  828. if self._http_vsn == 11:
  829. # Issue some standard headers for better HTTP/1.1 compliance
  830. if not skip_host:
  831. # this header is issued *only* for HTTP/1.1
  832. # connections. more specifically, this means it is
  833. # only issued when the client uses the new
  834. # HTTPConnection() class. backwards-compat clients
  835. # will be using HTTP/1.0 and those clients may be
  836. # issuing this header themselves. we should NOT issue
  837. # it twice; some web servers (such as Apache) barf
  838. # when they see two Host: headers
  839. # If we need a non-standard port,include it in the
  840. # header. If the request is going through a proxy,
  841. # but the host of the actual URL, not the host of the
  842. # proxy.
  843. netloc = ''
  844. if url.startswith('http'):
  845. nil, netloc, nil, nil, nil = urlsplit(url)
  846. if netloc:
  847. try:
  848. netloc_enc = netloc.encode("ascii")
  849. except UnicodeEncodeError:
  850. netloc_enc = netloc.encode("idna")
  851. self.putheader('Host', netloc_enc)
  852. else:
  853. if self._tunnel_host:
  854. host = self._tunnel_host
  855. port = self._tunnel_port
  856. else:
  857. host = self.host
  858. port = self.port
  859. try:
  860. host_enc = host.encode("ascii")
  861. except UnicodeEncodeError:
  862. host_enc = host.encode("idna")
  863. # Wrap the IPv6 Host Header with [] (RFC 2732)
  864. if host_enc.find(':') >= 0:
  865. host_enc = "[" + host_enc + "]"
  866. if port == self.default_port:
  867. self.putheader('Host', host_enc)
  868. else:
  869. self.putheader('Host', "%s:%s" % (host_enc, port))
  870. # note: we are assuming that clients will not attempt to set these
  871. # headers since *this* library must deal with the
  872. # consequences. this also means that when the supporting
  873. # libraries are updated to recognize other forms, then this
  874. # code should be changed (removed or updated).
  875. # we only want a Content-Encoding of "identity" since we don't
  876. # support encodings such as x-gzip or x-deflate.
  877. if not skip_accept_encoding:
  878. self.putheader('Accept-Encoding', 'identity')
  879. # we can accept "chunked" Transfer-Encodings, but no others
  880. # NOTE: no TE header implies *only* "chunked"
  881. #self.putheader('TE', 'chunked')
  882. # if TE is supplied in the header, then it must appear in a
  883. # Connection header.
  884. #self.putheader('Connection', 'TE')
  885. else:
  886. # For HTTP/1.0, the server will assume "not chunked"
  887. pass
  888. def putheader(self, header, *values):
  889. """Send a request header line to the server.
  890. For example: h.putheader('Accept', 'text/html')
  891. """
  892. if self.__state != _CS_REQ_STARTED:
  893. raise CannotSendHeader()
  894. header = '%s' % header
  895. if not _is_legal_header_name(header):
  896. raise ValueError('Invalid header name %r' % (header,))
  897. values = [str(v) for v in values]
  898. for one_value in values:
  899. if _is_illegal_header_value(one_value):
  900. raise ValueError('Invalid header value %r' % (one_value,))
  901. hdr = '%s: %s' % (header, '\r\n\t'.join(values))
  902. self._output(hdr)
  903. def endheaders(self, message_body=None):
  904. """Indicate that the last header line has been sent to the server.
  905. This method sends the request to the server. The optional
  906. message_body argument can be used to pass a message body
  907. associated with the request. The message body will be sent in
  908. the same packet as the message headers if it is string, otherwise it is
  909. sent as a separate packet.
  910. """
  911. if self.__state == _CS_REQ_STARTED:
  912. self.__state = _CS_REQ_SENT
  913. else:
  914. raise CannotSendHeader()
  915. self._send_output(message_body)
  916. def request(self, method, url, body=None, headers={}):
  917. """Send a complete request to the server."""
  918. self._send_request(method, url, body, headers)
  919. def _set_content_length(self, body, method):
  920. # Set the content-length based on the body. If the body is "empty", we
  921. # set Content-Length: 0 for methods that expect a body (RFC 7230,
  922. # Section 3.3.2). If the body is set for other methods, we set the
  923. # header provided we can figure out what the length is.
  924. thelen = None
  925. if body is None and method.upper() in _METHODS_EXPECTING_BODY:
  926. thelen = '0'
  927. elif body is not None:
  928. try:
  929. thelen = str(len(body))
  930. except (TypeError, AttributeError):
  931. # If this is a file-like object, try to
  932. # fstat its file descriptor
  933. try:
  934. thelen = str(os.fstat(body.fileno()).st_size)
  935. except (AttributeError, OSError):
  936. # Don't send a length if this failed
  937. if self.debuglevel > 0: print "Cannot stat!!"
  938. if thelen is not None:
  939. self.putheader('Content-Length', thelen)
  940. def _send_request(self, method, url, body, headers):
  941. # Honor explicitly requested Host: and Accept-Encoding: headers.
  942. header_names = dict.fromkeys([k.lower() for k in headers])
  943. skips = {}
  944. if 'host' in header_names:
  945. skips['skip_host'] = 1
  946. if 'accept-encoding' in header_names:
  947. skips['skip_accept_encoding'] = 1
  948. self.putrequest(method, url, **skips)
  949. if 'content-length' not in header_names:
  950. self._set_content_length(body, method)
  951. for hdr, value in headers.iteritems():
  952. self.putheader(hdr, value)
  953. self.endheaders(body)
  954. def getresponse(self, buffering=False):
  955. "Get the response from the server."
  956. # if a prior response has been completed, then forget about it.
  957. if self.__response and self.__response.isclosed():
  958. self.__response = None
  959. #
  960. # if a prior response exists, then it must be completed (otherwise, we
  961. # cannot read this response's header to determine the connection-close
  962. # behavior)
  963. #
  964. # note: if a prior response existed, but was connection-close, then the
  965. # socket and response were made independent of this HTTPConnection
  966. # object since a new request requires that we open a whole new
  967. # connection
  968. #
  969. # this means the prior response had one of two states:
  970. # 1) will_close: this connection was reset and the prior socket and
  971. # response operate independently
  972. # 2) persistent: the response was retained and we await its
  973. # isclosed() status to become true.
  974. #
  975. if self.__state != _CS_REQ_SENT or self.__response:
  976. raise ResponseNotReady()
  977. args = (self.sock,)
  978. kwds = {"strict":self.strict, "method":self._method}
  979. if self.debuglevel > 0:
  980. args += (self.debuglevel,)
  981. if buffering:
  982. #only add this keyword if non-default, for compatibility with
  983. #other response_classes.
  984. kwds["buffering"] = True;
  985. response = self.response_class(*args, **kwds)
  986. try:
  987. response.begin()
  988. assert response.will_close != _UNKNOWN
  989. self.__state = _CS_IDLE
  990. if response.will_close:
  991. # this effectively passes the connection to the response
  992. self.close()
  993. else:
  994. # remember this, so we can tell when it is complete
  995. self.__response = response
  996. return response
  997. except:
  998. response.close()
  999. raise
  1000. class HTTP:
  1001. "Compatibility class with httplib.py from 1.5."
  1002. _http_vsn = 10
  1003. _http_vsn_str = 'HTTP/1.0'
  1004. debuglevel = 0
  1005. _connection_class = HTTPConnection
  1006. def __init__(self, host='', port=None, strict=None):
  1007. "Provide a default host, since the superclass requires one."
  1008. # some joker passed 0 explicitly, meaning default port
  1009. if port == 0:
  1010. port = None
  1011. # Note that we may pass an empty string as the host; this will raise
  1012. # an error when we attempt to connect. Presumably, the client code
  1013. # will call connect before then, with a proper host.
  1014. self._setup(self._connection_class(host, port, strict))
  1015. def _setup(self, conn):
  1016. self._conn = conn
  1017. # set up delegation to flesh out interface
  1018. self.send = conn.send
  1019. self.putrequest = conn.putrequest
  1020. self.putheader = conn.putheader
  1021. self.endheaders = conn.endheaders
  1022. self.set_debuglevel = conn.set_debuglevel
  1023. conn._http_vsn = self._http_vsn
  1024. conn._http_vsn_str = self._http_vsn_str
  1025. self.file = None
  1026. def connect(self, host=None, port=None):
  1027. "Accept arguments to set the host/port, since the superclass doesn't."
  1028. if host is not None:
  1029. (self._conn.host, self._conn.port) = self._conn._get_hostport(host, port)
  1030. self._conn.connect()
  1031. def getfile(self):
  1032. "Provide a getfile, since the superclass' does not use this concept."
  1033. return self.file
  1034. def getreply(self, buffering=False):
  1035. """Compat definition since superclass does not define it.
  1036. Returns a tuple consisting of:
  1037. - server status code (e.g. '200' if all goes well)
  1038. - server "reason" corresponding to status code
  1039. - any RFC822 headers in the response from the server
  1040. """
  1041. try:
  1042. if not buffering:
  1043. response = self._conn.getresponse()
  1044. else:
  1045. #only add this keyword if non-default for compatibility
  1046. #with other connection classes
  1047. response = self._conn.getresponse(buffering)
  1048. except BadStatusLine, e:
  1049. ### hmm. if getresponse() ever closes the socket on a bad request,
  1050. ### then we are going to have problems with self.sock
  1051. ### should we keep this behavior? do people use it?
  1052. # keep the socket open (as a file), and return it
  1053. self.file = self._conn.sock.makefile('rb', 0)
  1054. # close our socket -- we want to restart after any protocol error
  1055. self.close()
  1056. self.headers = None
  1057. return -1, e.line, None
  1058. self.headers = response.msg
  1059. self.file = response.fp
  1060. return response.status, response.reason, response.msg
  1061. def close(self):
  1062. self._conn.close()
  1063. # note that self.file == response.fp, which gets closed by the
  1064. # superclass. just clear the object ref here.
  1065. ### hmm. messy. if status==-1, then self.file is owned by us.
  1066. ### well... we aren't explicitly closing, but losing this ref will
  1067. ### do it
  1068. self.file = None
  1069. try:
  1070. import ssl
  1071. except ImportError:
  1072. pass
  1073. else:
  1074. class HTTPSConnection(HTTPConnection):
  1075. "This class allows communication via SSL."
  1076. default_port = HTTPS_PORT
  1077. def __init__(self, host, port=None, key_file=None, cert_file=None,
  1078. strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  1079. source_address=None, context=None):
  1080. HTTPConnection.__init__(self, host, port, strict, timeout,
  1081. source_address)
  1082. self.key_file = key_file
  1083. self.cert_file = cert_file
  1084. if context is None:
  1085. context = ssl._create_default_https_context()
  1086. if key_file or cert_file:
  1087. context.load_cert_chain(cert_file, key_file)
  1088. self._context = context
  1089. def connect(self):
  1090. "Connect to a host on a given (SSL) port."
  1091. HTTPConnection.connect(self)
  1092. if self._tunnel_host:
  1093. server_hostname = self._tunnel_host
  1094. else:
  1095. server_hostname = self.host
  1096. self.sock = self._context.wrap_socket(self.sock,
  1097. server_hostname=server_hostname)
  1098. __all__.append("HTTPSConnection")
  1099. class HTTPS(HTTP):
  1100. """Compatibility with 1.5 httplib interface
  1101. Python 1.5.2 did not have an HTTPS class, but it defined an
  1102. interface for sending http requests that is also useful for
  1103. https.
  1104. """
  1105. _connection_class = HTTPSConnection
  1106. def __init__(self, host='', port=None, key_file=None, cert_file=None,
  1107. strict=None, context=None):
  1108. # provide a default host, pass the X509 cert info
  1109. # urf. compensate for bad input.
  1110. if port == 0:
  1111. port = None
  1112. self._setup(self._connection_class(host, port, key_file,
  1113. cert_file, strict,
  1114. context=context))
  1115. # we never actually use these for anything, but we keep them
  1116. # here for compatibility with post-1.5.2 CVS.
  1117. self.key_file = key_file
  1118. self.cert_file = cert_file
  1119. def FakeSocket (sock, sslobj):
  1120. warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
  1121. "Use the result of ssl.wrap_socket() directly instead.",
  1122. DeprecationWarning, stacklevel=2)
  1123. return sslobj
  1124. class HTTPException(Exception):
  1125. # Subclasses that define an __init__ must call Exception.__init__
  1126. # or define self.args. Otherwise, str() will fail.
  1127. pass
  1128. class NotConnected(HTTPException):
  1129. pass
  1130. class InvalidURL(HTTPException):
  1131. pass
  1132. class UnknownProtocol(HTTPException):
  1133. def __init__(self, version):
  1134. self.args = version,
  1135. self.version = version
  1136. class UnknownTransferEncoding(HTTPException):
  1137. pass
  1138. class UnimplementedFileMode(HTTPException):
  1139. pass
  1140. class IncompleteRead(HTTPException):
  1141. def __init__(self, partial, expected=None):
  1142. self.args = partial,
  1143. self.partial = partial
  1144. self.expected = expected
  1145. def __repr__(self):
  1146. if self.expected is not None:
  1147. e = ', %i more expected' % self.expected
  1148. else:
  1149. e = ''
  1150. return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
  1151. def __str__(self):
  1152. return repr(self)
  1153. class ImproperConnectionState(HTTPException):
  1154. pass
  1155. class CannotSendRequest(ImproperConnectionState):
  1156. pass
  1157. class CannotSendHeader(ImproperConnectionState):
  1158. pass
  1159. class ResponseNotReady(ImproperConnectionState):
  1160. pass
  1161. class BadStatusLine(HTTPException):
  1162. def __init__(self, line):
  1163. if not line:
  1164. line = repr(line)
  1165. self.args = line,
  1166. self.line = line
  1167. class LineTooLong(HTTPException):
  1168. def __init__(self, line_type):
  1169. HTTPException.__init__(self, "got more than %d bytes when reading %s"
  1170. % (_MAXLINE, line_type))
  1171. # for backwards compatibility
  1172. error = HTTPException
  1173. class LineAndFileWrapper:
  1174. """A limited file-like object for HTTP/0.9 responses."""
  1175. # The status-line parsing code calls readline(), which normally
  1176. # get the HTTP status line. For a 0.9 response, however, this is
  1177. # actually the first line of the body! Clients need to get a
  1178. # readable file object that contains that line.
  1179. def __init__(self, line, file):
  1180. self._line = line
  1181. self._file = file
  1182. self._line_consumed = 0
  1183. self._line_offset = 0
  1184. self._line_left = len(line)
  1185. def __getattr__(self, attr):
  1186. return getattr(self._file, attr)
  1187. def _done(self):
  1188. # called when the last byte is read from the line. After the
  1189. # call, all read methods are delegated to the underlying file
  1190. # object.
  1191. self._line_consumed = 1
  1192. self.read = self._file.read
  1193. self.readline = self._file.readline
  1194. self.readlines = self._file.readlines
  1195. def read(self, amt=None):
  1196. if self._line_consumed:
  1197. return self._file.read(a

Large files files are truncated, but you can click here to view the full file