PageRenderTime 52ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/modified-2.7/httplib.py

https://bitbucket.org/dac_io/pypy
Python | 1396 lines | 1391 code | 0 blank | 5 comment | 2 complexity | 5c0768cf471ab8aeabe67d6af6c61f3c MD5 | raw file
  1. """HTTP/1.1 client library
  2. <intro stuff goes here>
  3. <other stuff, too>
  4. HTTPConnection goes through a number of "states", which define when a client
  5. may legally make another request or fetch the response for a particular
  6. request. This diagram details these state transitions:
  7. (null)
  8. |
  9. | HTTPConnection()
  10. v
  11. Idle
  12. |
  13. | putrequest()
  14. v
  15. Request-started
  16. |
  17. | ( putheader() )* endheaders()
  18. v
  19. Request-sent
  20. |
  21. | response = getresponse()
  22. v
  23. Unread-response [Response-headers-read]
  24. |\____________________
  25. | |
  26. | response.read() | putrequest()
  27. v v
  28. Idle Req-started-unread-response
  29. ______/|
  30. / |
  31. response.read() | | ( putheader() )* endheaders()
  32. v v
  33. Request-started Req-sent-unread-response
  34. |
  35. | response.read()
  36. v
  37. Request-sent
  38. This diagram presents the following rules:
  39. -- a second request may not be started until {response-headers-read}
  40. -- a response [object] cannot be retrieved until {request-sent}
  41. -- there is no differentiation between an unread response body and a
  42. partially read response body
  43. Note: this enforcement is applied by the HTTPConnection class. The
  44. HTTPResponse class does not enforce this state machine, which
  45. implies sophisticated clients may accelerate the request/response
  46. pipeline. Caution should be taken, though: accelerating the states
  47. beyond the above pattern may imply knowledge of the server's
  48. connection-close behavior for certain requests. For example, it
  49. is impossible to tell whether the server will close the connection
  50. UNTIL the response headers have been read; this means that further
  51. requests cannot be placed into the pipeline until it is known that
  52. the server will NOT be closing the connection.
  53. Logical State __state __response
  54. ------------- ------- ----------
  55. Idle _CS_IDLE None
  56. Request-started _CS_REQ_STARTED None
  57. Request-sent _CS_REQ_SENT None
  58. Unread-response _CS_IDLE <response_class>
  59. Req-started-unread-response _CS_REQ_STARTED <response_class>
  60. Req-sent-unread-response _CS_REQ_SENT <response_class>
  61. """
  62. from array import array
  63. import os
  64. import socket
  65. from sys import py3kwarning
  66. from urlparse import urlsplit
  67. import warnings
  68. with warnings.catch_warnings():
  69. if py3kwarning:
  70. warnings.filterwarnings("ignore", ".*mimetools has been removed",
  71. DeprecationWarning)
  72. import mimetools
  73. try:
  74. from cStringIO import StringIO
  75. except ImportError:
  76. from StringIO import StringIO
  77. __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  78. "HTTPException", "NotConnected", "UnknownProtocol",
  79. "UnknownTransferEncoding", "UnimplementedFileMode",
  80. "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  81. "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  82. "BadStatusLine", "error", "responses"]
  83. HTTP_PORT = 80
  84. HTTPS_PORT = 443
  85. _UNKNOWN = 'UNKNOWN'
  86. # connection states
  87. _CS_IDLE = 'Idle'
  88. _CS_REQ_STARTED = 'Request-started'
  89. _CS_REQ_SENT = 'Request-sent'
  90. # status codes
  91. # informational
  92. CONTINUE = 100
  93. SWITCHING_PROTOCOLS = 101
  94. PROCESSING = 102
  95. # successful
  96. OK = 200
  97. CREATED = 201
  98. ACCEPTED = 202
  99. NON_AUTHORITATIVE_INFORMATION = 203
  100. NO_CONTENT = 204
  101. RESET_CONTENT = 205
  102. PARTIAL_CONTENT = 206
  103. MULTI_STATUS = 207
  104. IM_USED = 226
  105. # redirection
  106. MULTIPLE_CHOICES = 300
  107. MOVED_PERMANENTLY = 301
  108. FOUND = 302
  109. SEE_OTHER = 303
  110. NOT_MODIFIED = 304
  111. USE_PROXY = 305
  112. TEMPORARY_REDIRECT = 307
  113. # client error
  114. BAD_REQUEST = 400
  115. UNAUTHORIZED = 401
  116. PAYMENT_REQUIRED = 402
  117. FORBIDDEN = 403
  118. NOT_FOUND = 404
  119. METHOD_NOT_ALLOWED = 405
  120. NOT_ACCEPTABLE = 406
  121. PROXY_AUTHENTICATION_REQUIRED = 407
  122. REQUEST_TIMEOUT = 408
  123. CONFLICT = 409
  124. GONE = 410
  125. LENGTH_REQUIRED = 411
  126. PRECONDITION_FAILED = 412
  127. REQUEST_ENTITY_TOO_LARGE = 413
  128. REQUEST_URI_TOO_LONG = 414
  129. UNSUPPORTED_MEDIA_TYPE = 415
  130. REQUESTED_RANGE_NOT_SATISFIABLE = 416
  131. EXPECTATION_FAILED = 417
  132. UNPROCESSABLE_ENTITY = 422
  133. LOCKED = 423
  134. FAILED_DEPENDENCY = 424
  135. UPGRADE_REQUIRED = 426
  136. # server error
  137. INTERNAL_SERVER_ERROR = 500
  138. NOT_IMPLEMENTED = 501
  139. BAD_GATEWAY = 502
  140. SERVICE_UNAVAILABLE = 503
  141. GATEWAY_TIMEOUT = 504
  142. HTTP_VERSION_NOT_SUPPORTED = 505
  143. INSUFFICIENT_STORAGE = 507
  144. NOT_EXTENDED = 510
  145. # Mapping status codes to official W3C names
  146. responses = {
  147. 100: 'Continue',
  148. 101: 'Switching Protocols',
  149. 200: 'OK',
  150. 201: 'Created',
  151. 202: 'Accepted',
  152. 203: 'Non-Authoritative Information',
  153. 204: 'No Content',
  154. 205: 'Reset Content',
  155. 206: 'Partial Content',
  156. 300: 'Multiple Choices',
  157. 301: 'Moved Permanently',
  158. 302: 'Found',
  159. 303: 'See Other',
  160. 304: 'Not Modified',
  161. 305: 'Use Proxy',
  162. 306: '(Unused)',
  163. 307: 'Temporary Redirect',
  164. 400: 'Bad Request',
  165. 401: 'Unauthorized',
  166. 402: 'Payment Required',
  167. 403: 'Forbidden',
  168. 404: 'Not Found',
  169. 405: 'Method Not Allowed',
  170. 406: 'Not Acceptable',
  171. 407: 'Proxy Authentication Required',
  172. 408: 'Request Timeout',
  173. 409: 'Conflict',
  174. 410: 'Gone',
  175. 411: 'Length Required',
  176. 412: 'Precondition Failed',
  177. 413: 'Request Entity Too Large',
  178. 414: 'Request-URI Too Long',
  179. 415: 'Unsupported Media Type',
  180. 416: 'Requested Range Not Satisfiable',
  181. 417: 'Expectation Failed',
  182. 500: 'Internal Server Error',
  183. 501: 'Not Implemented',
  184. 502: 'Bad Gateway',
  185. 503: 'Service Unavailable',
  186. 504: 'Gateway Timeout',
  187. 505: 'HTTP Version Not Supported',
  188. }
  189. # maximal amount of data to read at one time in _safe_read
  190. MAXAMOUNT = 1048576
  191. # maximal line length when calling readline().
  192. _MAXLINE = 65536
  193. class HTTPMessage(mimetools.Message):
  194. def addheader(self, key, value):
  195. """Add header for field key handling repeats."""
  196. prev = self.dict.get(key)
  197. if prev is None:
  198. self.dict[key] = value
  199. else:
  200. combined = ", ".join((prev, value))
  201. self.dict[key] = combined
  202. def addcontinue(self, key, more):
  203. """Add more field data from a continuation line."""
  204. prev = self.dict[key]
  205. self.dict[key] = prev + "\n " + more
  206. def readheaders(self):
  207. """Read header lines.
  208. Read header lines up to the entirely blank line that terminates them.
  209. The (normally blank) line that ends the headers is skipped, but not
  210. included in the returned list. If a non-header line ends the headers,
  211. (which is an error), an attempt is made to backspace over it; it is
  212. never included in the returned list.
  213. The variable self.status is set to the empty string if all went well,
  214. otherwise it is an error message. The variable self.headers is a
  215. completely uninterpreted list of lines contained in the header (so
  216. printing them will reproduce the header exactly as it appears in the
  217. file).
  218. If multiple header fields with the same name occur, they are combined
  219. according to the rules in RFC 2616 sec 4.2:
  220. Appending each subsequent field-value to the first, each separated
  221. by a comma. The order in which header fields with the same field-name
  222. are received is significant to the interpretation of the combined
  223. field value.
  224. """
  225. # XXX The implementation overrides the readheaders() method of
  226. # rfc822.Message. The base class design isn't amenable to
  227. # customized behavior here so the method here is a copy of the
  228. # base class code with a few small changes.
  229. self.dict = {}
  230. self.unixfrom = ''
  231. self.headers = hlist = []
  232. self.status = ''
  233. headerseen = ""
  234. firstline = 1
  235. startofline = unread = tell = None
  236. if hasattr(self.fp, 'unread'):
  237. unread = self.fp.unread
  238. elif self.seekable:
  239. tell = self.fp.tell
  240. while True:
  241. if tell:
  242. try:
  243. startofline = tell()
  244. except IOError:
  245. startofline = tell = None
  246. self.seekable = 0
  247. line = self.fp.readline(_MAXLINE + 1)
  248. if len(line) > _MAXLINE:
  249. raise LineTooLong("header line")
  250. if not line:
  251. self.status = 'EOF in headers'
  252. break
  253. # Skip unix From name time lines
  254. if firstline and line.startswith('From '):
  255. self.unixfrom = self.unixfrom + line
  256. continue
  257. firstline = 0
  258. if headerseen and line[0] in ' \t':
  259. # XXX Not sure if continuation lines are handled properly
  260. # for http and/or for repeating headers
  261. # It's a continuation line.
  262. hlist.append(line)
  263. self.addcontinue(headerseen, line.strip())
  264. continue
  265. elif self.iscomment(line):
  266. # It's a comment. Ignore it.
  267. continue
  268. elif self.islast(line):
  269. # Note! No pushback here! The delimiter line gets eaten.
  270. break
  271. headerseen = self.isheader(line)
  272. if headerseen:
  273. # It's a legal header line, save it.
  274. hlist.append(line)
  275. self.addheader(headerseen, line[len(headerseen)+1:].strip())
  276. continue
  277. else:
  278. # It's not a header line; throw it back and stop here.
  279. if not self.dict:
  280. self.status = 'No headers'
  281. else:
  282. self.status = 'Non-header line where header expected'
  283. # Try to undo the read.
  284. if unread:
  285. unread(line)
  286. elif tell:
  287. self.fp.seek(startofline)
  288. else:
  289. self.status = self.status + '; bad seek'
  290. break
  291. class HTTPResponse:
  292. # strict: If true, raise BadStatusLine if the status line can't be
  293. # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
  294. # false because it prevents clients from talking to HTTP/0.9
  295. # servers. Note that a response with a sufficiently corrupted
  296. # status line will look like an HTTP/0.9 response.
  297. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
  298. def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
  299. if buffering:
  300. # The caller won't be using any sock.recv() calls, so buffering
  301. # is fine and recommended for performance.
  302. self.fp = sock.makefile('rb')
  303. else:
  304. # The buffer size is specified as zero, because the headers of
  305. # the response are read with readline(). If the reads were
  306. # buffered the readline() calls could consume some of the
  307. # response, which make be read via a recv() on the underlying
  308. # socket.
  309. self.fp = sock.makefile('rb', 0)
  310. self.debuglevel = debuglevel
  311. self.strict = strict
  312. self._method = method
  313. self.msg = None
  314. # from the Status-Line of the response
  315. self.version = _UNKNOWN # HTTP-Version
  316. self.status = _UNKNOWN # Status-Code
  317. self.reason = _UNKNOWN # Reason-Phrase
  318. self.chunked = _UNKNOWN # is "chunked" being used?
  319. self.chunk_left = _UNKNOWN # bytes left to read in current chunk
  320. self.length = _UNKNOWN # number of bytes left in response
  321. self.will_close = _UNKNOWN # conn will close at end of response
  322. def _read_status(self):
  323. # Initialize with Simple-Response defaults
  324. line = self.fp.readline()
  325. if self.debuglevel > 0:
  326. print "reply:", repr(line)
  327. if not line:
  328. # Presumably, the server closed the connection before
  329. # sending a valid response.
  330. raise BadStatusLine(line)
  331. try:
  332. [version, status, reason] = line.split(None, 2)
  333. except ValueError:
  334. try:
  335. [version, status] = line.split(None, 1)
  336. reason = ""
  337. except ValueError:
  338. # empty version will cause next test to fail and status
  339. # will be treated as 0.9 response.
  340. version = ""
  341. if not version.startswith('HTTP/'):
  342. if self.strict:
  343. self.close()
  344. raise BadStatusLine(line)
  345. else:
  346. # assume it's a Simple-Response from an 0.9 server
  347. self.fp = LineAndFileWrapper(line, self.fp)
  348. return "HTTP/0.9", 200, ""
  349. # The status code is a three-digit number
  350. try:
  351. status = int(status)
  352. if status < 100 or status > 999:
  353. raise BadStatusLine(line)
  354. except ValueError:
  355. raise BadStatusLine(line)
  356. return version, status, reason
  357. def begin(self):
  358. if self.msg is not None:
  359. # we've already started reading the response
  360. return
  361. # read until we get a non-100 response
  362. while True:
  363. version, status, reason = self._read_status()
  364. if status != CONTINUE:
  365. break
  366. # skip the header from the 100 response
  367. while True:
  368. skip = self.fp.readline(_MAXLINE + 1)
  369. if len(skip) > _MAXLINE:
  370. raise LineTooLong("header line")
  371. skip = skip.strip()
  372. if not skip:
  373. break
  374. if self.debuglevel > 0:
  375. print "header:", skip
  376. self.status = status
  377. self.reason = reason.strip()
  378. if version == 'HTTP/1.0':
  379. self.version = 10
  380. elif version.startswith('HTTP/1.'):
  381. self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
  382. elif version == 'HTTP/0.9':
  383. self.version = 9
  384. else:
  385. raise UnknownProtocol(version)
  386. if self.version == 9:
  387. self.length = None
  388. self.chunked = 0
  389. self.will_close = 1
  390. self.msg = HTTPMessage(StringIO())
  391. return
  392. self.msg = HTTPMessage(self.fp, 0)
  393. if self.debuglevel > 0:
  394. for hdr in self.msg.headers:
  395. print "header:", hdr,
  396. # don't let the msg keep an fp
  397. self.msg.fp = None
  398. # are we using the chunked-style of transfer encoding?
  399. tr_enc = self.msg.getheader('transfer-encoding')
  400. if tr_enc and tr_enc.lower() == "chunked":
  401. self.chunked = 1
  402. self.chunk_left = None
  403. else:
  404. self.chunked = 0
  405. # will the connection close at the end of the response?
  406. self.will_close = self._check_close()
  407. # do we have a Content-Length?
  408. # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
  409. length = self.msg.getheader('content-length')
  410. if length and not self.chunked:
  411. try:
  412. self.length = int(length)
  413. except ValueError:
  414. self.length = None
  415. else:
  416. if self.length < 0: # ignore nonsensical negative lengths
  417. self.length = None
  418. else:
  419. self.length = None
  420. # does the body have a fixed length? (of zero)
  421. if (status == NO_CONTENT or status == NOT_MODIFIED or
  422. 100 <= status < 200 or # 1xx codes
  423. self._method == 'HEAD'):
  424. self.length = 0
  425. # if the connection remains open, and we aren't using chunked, and
  426. # a content-length was not provided, then assume that the connection
  427. # WILL close.
  428. if not self.will_close and \
  429. not self.chunked and \
  430. self.length is None:
  431. self.will_close = 1
  432. def _check_close(self):
  433. conn = self.msg.getheader('connection')
  434. if self.version == 11:
  435. # An HTTP/1.1 proxy is assumed to stay open unless
  436. # explicitly closed.
  437. conn = self.msg.getheader('connection')
  438. if conn and "close" in conn.lower():
  439. return True
  440. return False
  441. # Some HTTP/1.0 implementations have support for persistent
  442. # connections, using rules different than HTTP/1.1.
  443. # For older HTTP, Keep-Alive indicates persistent connection.
  444. if self.msg.getheader('keep-alive'):
  445. return False
  446. # At least Akamai returns a "Connection: Keep-Alive" header,
  447. # which was supposed to be sent by the client.
  448. if conn and "keep-alive" in conn.lower():
  449. return False
  450. # Proxy-Connection is a netscape hack.
  451. pconn = self.msg.getheader('proxy-connection')
  452. if pconn and "keep-alive" in pconn.lower():
  453. return False
  454. # otherwise, assume it will close
  455. return True
  456. def close(self):
  457. if self.fp:
  458. self.fp.close()
  459. self.fp = None
  460. def isclosed(self):
  461. # NOTE: it is possible that we will not ever call self.close(). This
  462. # case occurs when will_close is TRUE, length is None, and we
  463. # read up to the last byte, but NOT past it.
  464. #
  465. # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
  466. # called, meaning self.isclosed() is meaningful.
  467. return self.fp is None
  468. # XXX It would be nice to have readline and __iter__ for this, too.
  469. def read(self, amt=None):
  470. if self.fp is None:
  471. return ''
  472. if self._method == 'HEAD':
  473. self.close()
  474. return ''
  475. if self.chunked:
  476. return self._read_chunked(amt)
  477. if amt is None:
  478. # unbounded read
  479. if self.length is None:
  480. s = self.fp.read()
  481. else:
  482. s = self._safe_read(self.length)
  483. self.length = 0
  484. self.close() # we read everything
  485. return s
  486. if self.length is not None:
  487. if amt > self.length:
  488. # clip the read to the "end of response"
  489. amt = self.length
  490. # we do not use _safe_read() here because this may be a .will_close
  491. # connection, and the user is reading more bytes than will be provided
  492. # (for example, reading in 1k chunks)
  493. s = self.fp.read(amt)
  494. if self.length is not None:
  495. self.length -= len(s)
  496. if not self.length:
  497. self.close()
  498. return s
  499. def _read_chunked(self, amt):
  500. assert self.chunked != _UNKNOWN
  501. chunk_left = self.chunk_left
  502. value = []
  503. while True:
  504. if chunk_left is None:
  505. line = self.fp.readline(_MAXLINE + 1)
  506. if len(line) > _MAXLINE:
  507. raise LineTooLong("chunk size")
  508. i = line.find(';')
  509. if i >= 0:
  510. line = line[:i] # strip chunk-extensions
  511. try:
  512. chunk_left = int(line, 16)
  513. except ValueError:
  514. # close the connection as protocol synchronisation is
  515. # probably lost
  516. self.close()
  517. raise IncompleteRead(''.join(value))
  518. if chunk_left == 0:
  519. break
  520. if amt is None:
  521. value.append(self._safe_read(chunk_left))
  522. elif amt < chunk_left:
  523. value.append(self._safe_read(amt))
  524. self.chunk_left = chunk_left - amt
  525. return ''.join(value)
  526. elif amt == chunk_left:
  527. value.append(self._safe_read(amt))
  528. self._safe_read(2) # toss the CRLF at the end of the chunk
  529. self.chunk_left = None
  530. return ''.join(value)
  531. else:
  532. value.append(self._safe_read(chunk_left))
  533. amt -= chunk_left
  534. # we read the whole chunk, get another
  535. self._safe_read(2) # toss the CRLF at the end of the chunk
  536. chunk_left = None
  537. # read and discard trailer up to the CRLF terminator
  538. ### note: we shouldn't have any trailers!
  539. while True:
  540. line = self.fp.readline(_MAXLINE + 1)
  541. if len(line) > _MAXLINE:
  542. raise LineTooLong("trailer line")
  543. if not line:
  544. # a vanishingly small number of sites EOF without
  545. # sending the trailer
  546. break
  547. if line == '\r\n':
  548. break
  549. # we read everything; close the "file"
  550. self.close()
  551. return ''.join(value)
  552. def _safe_read(self, amt):
  553. """Read the number of bytes requested, compensating for partial reads.
  554. Normally, we have a blocking socket, but a read() can be interrupted
  555. by a signal (resulting in a partial read).
  556. Note that we cannot distinguish between EOF and an interrupt when zero
  557. bytes have been read. IncompleteRead() will be raised in this
  558. situation.
  559. This function should be used when <amt> bytes "should" be present for
  560. reading. If the bytes are truly not available (due to EOF), then the
  561. IncompleteRead exception can be used to detect the problem.
  562. """
  563. # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
  564. # return less than x bytes unless EOF is encountered. It now handles
  565. # signal interruptions (socket.error EINTR) internally. This code
  566. # never caught that exception anyways. It seems largely pointless.
  567. # self.fp.read(amt) will work fine.
  568. s = []
  569. while amt > 0:
  570. chunk = self.fp.read(min(amt, MAXAMOUNT))
  571. if not chunk:
  572. raise IncompleteRead(''.join(s), amt)
  573. s.append(chunk)
  574. amt -= len(chunk)
  575. return ''.join(s)
  576. def fileno(self):
  577. return self.fp.fileno()
  578. def getheader(self, name, default=None):
  579. if self.msg is None:
  580. raise ResponseNotReady()
  581. return self.msg.getheader(name, default)
  582. def getheaders(self):
  583. """Return list of (header, value) tuples."""
  584. if self.msg is None:
  585. raise ResponseNotReady()
  586. return self.msg.items()
  587. class HTTPConnection:
  588. _http_vsn = 11
  589. _http_vsn_str = 'HTTP/1.1'
  590. response_class = HTTPResponse
  591. default_port = HTTP_PORT
  592. auto_open = 1
  593. debuglevel = 0
  594. strict = 0
  595. def __init__(self, host, port=None, strict=None,
  596. timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
  597. self.timeout = timeout
  598. self.source_address = source_address
  599. self.sock = None
  600. self._buffer = []
  601. self.__response = None
  602. self.__state = _CS_IDLE
  603. self._method = None
  604. self._tunnel_host = None
  605. self._tunnel_port = None
  606. self._tunnel_headers = {}
  607. self._set_hostport(host, port)
  608. if strict is not None:
  609. self.strict = strict
  610. def set_tunnel(self, host, port=None, headers=None):
  611. """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
  612. The headers argument should be a mapping of extra HTTP headers
  613. to send with the CONNECT request.
  614. """
  615. self._tunnel_host = host
  616. self._tunnel_port = port
  617. if headers:
  618. self._tunnel_headers = headers
  619. else:
  620. self._tunnel_headers.clear()
  621. def _set_hostport(self, host, port):
  622. if port is None:
  623. i = host.rfind(':')
  624. j = host.rfind(']') # ipv6 addresses have [...]
  625. if i > j:
  626. try:
  627. port = int(host[i+1:])
  628. except ValueError:
  629. raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
  630. host = host[:i]
  631. else:
  632. port = self.default_port
  633. if host and host[0] == '[' and host[-1] == ']':
  634. host = host[1:-1]
  635. self.host = host
  636. self.port = port
  637. def set_debuglevel(self, level):
  638. self.debuglevel = level
  639. def _tunnel(self):
  640. self._set_hostport(self._tunnel_host, self._tunnel_port)
  641. self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
  642. for header, value in self._tunnel_headers.iteritems():
  643. self.send("%s: %s\r\n" % (header, value))
  644. self.send("\r\n")
  645. response = self.response_class(self.sock, strict = self.strict,
  646. method = self._method)
  647. (version, code, message) = response._read_status()
  648. if code != 200:
  649. self.close()
  650. raise socket.error("Tunnel connection failed: %d %s" % (code,
  651. message.strip()))
  652. while True:
  653. line = response.fp.readline(_MAXLINE + 1)
  654. if len(line) > _MAXLINE:
  655. raise LineTooLong("header line")
  656. if line == '\r\n': break
  657. def connect(self):
  658. """Connect to the host and port specified in __init__."""
  659. self.sock = socket.create_connection((self.host,self.port),
  660. self.timeout, self.source_address)
  661. if self._tunnel_host:
  662. self._tunnel()
  663. def close(self):
  664. """Close the connection to the HTTP server."""
  665. if self.sock:
  666. self.sock.close() # close it manually... there may be other refs
  667. self.sock = None
  668. if self.__response:
  669. self.__response.close()
  670. self.__response = None
  671. self.__state = _CS_IDLE
  672. def send(self, data):
  673. """Send `data' to the server."""
  674. if self.sock is None:
  675. if self.auto_open:
  676. self.connect()
  677. else:
  678. raise NotConnected()
  679. if self.debuglevel > 0:
  680. print "send:", repr(data)
  681. blocksize = 8192
  682. if hasattr(data,'read') and not isinstance(data, array):
  683. if self.debuglevel > 0: print "sendIng a read()able"
  684. datablock = data.read(blocksize)
  685. while datablock:
  686. self.sock.sendall(datablock)
  687. datablock = data.read(blocksize)
  688. else:
  689. self.sock.sendall(data)
  690. def _output(self, s):
  691. """Add a line of output to the current request buffer.
  692. Assumes that the line does *not* end with \\r\\n.
  693. """
  694. self._buffer.append(s)
  695. def _send_output(self, message_body=None):
  696. """Send the currently buffered request and clear the buffer.
  697. Appends an extra \\r\\n to the buffer.
  698. A message_body may be specified, to be appended to the request.
  699. """
  700. self._buffer.extend(("", ""))
  701. msg = "\r\n".join(self._buffer)
  702. del self._buffer[:]
  703. # If msg and message_body are sent in a single send() call,
  704. # it will avoid performance problems caused by the interaction
  705. # between delayed ack and the Nagle algorithm.
  706. if isinstance(message_body, str):
  707. msg += message_body
  708. message_body = None
  709. self.send(msg)
  710. if message_body is not None:
  711. #message_body was not a string (i.e. it is a file) and
  712. #we must run the risk of Nagle
  713. self.send(message_body)
  714. def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
  715. """Send a request to the server.
  716. `method' specifies an HTTP request method, e.g. 'GET'.
  717. `url' specifies the object being requested, e.g. '/index.html'.
  718. `skip_host' if True does not add automatically a 'Host:' header
  719. `skip_accept_encoding' if True does not add automatically an
  720. 'Accept-Encoding:' header
  721. """
  722. # if a prior response has been completed, then forget about it.
  723. if self.__response and self.__response.isclosed():
  724. self.__response = None
  725. # in certain cases, we cannot issue another request on this connection.
  726. # this occurs when:
  727. # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
  728. # 2) a response to a previous request has signalled that it is going
  729. # to close the connection upon completion.
  730. # 3) the headers for the previous response have not been read, thus
  731. # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
  732. #
  733. # if there is no prior response, then we can request at will.
  734. #
  735. # if point (2) is true, then we will have passed the socket to the
  736. # response (effectively meaning, "there is no prior response"), and
  737. # will open a new one when a new request is made.
  738. #
  739. # Note: if a prior response exists, then we *can* start a new request.
  740. # We are not allowed to begin fetching the response to this new
  741. # request, however, until that prior response is complete.
  742. #
  743. if self.__state == _CS_IDLE:
  744. self.__state = _CS_REQ_STARTED
  745. else:
  746. raise CannotSendRequest()
  747. # Save the method we use, we need it later in the response phase
  748. self._method = method
  749. if not url:
  750. url = '/'
  751. hdr = '%s %s %s' % (method, url, self._http_vsn_str)
  752. self._output(hdr)
  753. if self._http_vsn == 11:
  754. # Issue some standard headers for better HTTP/1.1 compliance
  755. if not skip_host:
  756. # this header is issued *only* for HTTP/1.1
  757. # connections. more specifically, this means it is
  758. # only issued when the client uses the new
  759. # HTTPConnection() class. backwards-compat clients
  760. # will be using HTTP/1.0 and those clients may be
  761. # issuing this header themselves. we should NOT issue
  762. # it twice; some web servers (such as Apache) barf
  763. # when they see two Host: headers
  764. # If we need a non-standard port,include it in the
  765. # header. If the request is going through a proxy,
  766. # but the host of the actual URL, not the host of the
  767. # proxy.
  768. netloc = ''
  769. if url.startswith('http'):
  770. nil, netloc, nil, nil, nil = urlsplit(url)
  771. if netloc:
  772. try:
  773. netloc_enc = netloc.encode("ascii")
  774. except UnicodeEncodeError:
  775. netloc_enc = netloc.encode("idna")
  776. self.putheader('Host', netloc_enc)
  777. else:
  778. try:
  779. host_enc = self.host.encode("ascii")
  780. except UnicodeEncodeError:
  781. host_enc = self.host.encode("idna")
  782. # Wrap the IPv6 Host Header with [] (RFC 2732)
  783. if host_enc.find(':') >= 0:
  784. host_enc = "[" + host_enc + "]"
  785. if self.port == self.default_port:
  786. self.putheader('Host', host_enc)
  787. else:
  788. self.putheader('Host', "%s:%s" % (host_enc, self.port))
  789. # note: we are assuming that clients will not attempt to set these
  790. # headers since *this* library must deal with the
  791. # consequences. this also means that when the supporting
  792. # libraries are updated to recognize other forms, then this
  793. # code should be changed (removed or updated).
  794. # we only want a Content-Encoding of "identity" since we don't
  795. # support encodings such as x-gzip or x-deflate.
  796. if not skip_accept_encoding:
  797. self.putheader('Accept-Encoding', 'identity')
  798. # we can accept "chunked" Transfer-Encodings, but no others
  799. # NOTE: no TE header implies *only* "chunked"
  800. #self.putheader('TE', 'chunked')
  801. # if TE is supplied in the header, then it must appear in a
  802. # Connection header.
  803. #self.putheader('Connection', 'TE')
  804. else:
  805. # For HTTP/1.0, the server will assume "not chunked"
  806. pass
  807. def putheader(self, header, *values):
  808. """Send a request header line to the server.
  809. For example: h.putheader('Accept', 'text/html')
  810. """
  811. if self.__state != _CS_REQ_STARTED:
  812. raise CannotSendHeader()
  813. hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
  814. self._output(hdr)
  815. def endheaders(self, message_body=None):
  816. """Indicate that the last header line has been sent to the server.
  817. This method sends the request to the server. The optional
  818. message_body argument can be used to pass message body
  819. associated with the request. The message body will be sent in
  820. the same packet as the message headers if possible. The
  821. message_body should be a string.
  822. """
  823. if self.__state == _CS_REQ_STARTED:
  824. self.__state = _CS_REQ_SENT
  825. else:
  826. raise CannotSendHeader()
  827. self._send_output(message_body)
  828. def request(self, method, url, body=None, headers={}):
  829. """Send a complete request to the server."""
  830. self._send_request(method, url, body, headers)
  831. def _set_content_length(self, body):
  832. # Set the content-length based on the body.
  833. thelen = None
  834. try:
  835. thelen = str(len(body))
  836. except TypeError, te:
  837. # If this is a file-like object, try to
  838. # fstat its file descriptor
  839. try:
  840. thelen = str(os.fstat(body.fileno()).st_size)
  841. except (AttributeError, OSError):
  842. # Don't send a length if this failed
  843. if self.debuglevel > 0: print "Cannot stat!!"
  844. if thelen is not None:
  845. self.putheader('Content-Length', thelen)
  846. def _send_request(self, method, url, body, headers):
  847. # Honor explicitly requested Host: and Accept-Encoding: headers.
  848. header_names = dict.fromkeys([k.lower() for k in headers])
  849. skips = {}
  850. if 'host' in header_names:
  851. skips['skip_host'] = 1
  852. if 'accept-encoding' in header_names:
  853. skips['skip_accept_encoding'] = 1
  854. self.putrequest(method, url, **skips)
  855. if body and ('content-length' not in header_names):
  856. self._set_content_length(body)
  857. for hdr, value in headers.iteritems():
  858. self.putheader(hdr, value)
  859. self.endheaders(body)
  860. def getresponse(self, buffering=False):
  861. "Get the response from the server."
  862. # if a prior response has been completed, then forget about it.
  863. if self.__response and self.__response.isclosed():
  864. self.__response = None
  865. #
  866. # if a prior response exists, then it must be completed (otherwise, we
  867. # cannot read this response's header to determine the connection-close
  868. # behavior)
  869. #
  870. # note: if a prior response existed, but was connection-close, then the
  871. # socket and response were made independent of this HTTPConnection
  872. # object since a new request requires that we open a whole new
  873. # connection
  874. #
  875. # this means the prior response had one of two states:
  876. # 1) will_close: this connection was reset and the prior socket and
  877. # response operate independently
  878. # 2) persistent: the response was retained and we await its
  879. # isclosed() status to become true.
  880. #
  881. if self.__state != _CS_REQ_SENT or self.__response:
  882. raise ResponseNotReady()
  883. args = (self.sock,)
  884. kwds = {"strict":self.strict, "method":self._method}
  885. if self.debuglevel > 0:
  886. args += (self.debuglevel,)
  887. if buffering:
  888. #only add this keyword if non-default, for compatibility with
  889. #other response_classes.
  890. kwds["buffering"] = True;
  891. response = self.response_class(*args, **kwds)
  892. try:
  893. response.begin()
  894. except:
  895. response.close()
  896. raise
  897. assert response.will_close != _UNKNOWN
  898. self.__state = _CS_IDLE
  899. if response.will_close:
  900. # this effectively passes the connection to the response
  901. self.close()
  902. else:
  903. # remember this, so we can tell when it is complete
  904. self.__response = response
  905. return response
  906. class HTTP:
  907. "Compatibility class with httplib.py from 1.5."
  908. _http_vsn = 10
  909. _http_vsn_str = 'HTTP/1.0'
  910. debuglevel = 0
  911. _connection_class = HTTPConnection
  912. def __init__(self, host='', port=None, strict=None):
  913. "Provide a default host, since the superclass requires one."
  914. # some joker passed 0 explicitly, meaning default port
  915. if port == 0:
  916. port = None
  917. # Note that we may pass an empty string as the host; this will throw
  918. # an error when we attempt to connect. Presumably, the client code
  919. # will call connect before then, with a proper host.
  920. self._setup(self._connection_class(host, port, strict))
  921. def _setup(self, conn):
  922. self._conn = conn
  923. # set up delegation to flesh out interface
  924. self.send = conn.send
  925. self.putrequest = conn.putrequest
  926. self.putheader = conn.putheader
  927. self.endheaders = conn.endheaders
  928. self.set_debuglevel = conn.set_debuglevel
  929. conn._http_vsn = self._http_vsn
  930. conn._http_vsn_str = self._http_vsn_str
  931. self.file = None
  932. def connect(self, host=None, port=None):
  933. "Accept arguments to set the host/port, since the superclass doesn't."
  934. if host is not None:
  935. self._conn._set_hostport(host, port)
  936. self._conn.connect()
  937. def getfile(self):
  938. "Provide a getfile, since the superclass' does not use this concept."
  939. return self.file
  940. def getreply(self, buffering=False):
  941. """Compat definition since superclass does not define it.
  942. Returns a tuple consisting of:
  943. - server status code (e.g. '200' if all goes well)
  944. - server "reason" corresponding to status code
  945. - any RFC822 headers in the response from the server
  946. """
  947. try:
  948. if not buffering:
  949. response = self._conn.getresponse()
  950. else:
  951. #only add this keyword if non-default for compatibility
  952. #with other connection classes
  953. response = self._conn.getresponse(buffering)
  954. except BadStatusLine, e:
  955. ### hmm. if getresponse() ever closes the socket on a bad request,
  956. ### then we are going to have problems with self.sock
  957. ### should we keep this behavior? do people use it?
  958. # keep the socket open (as a file), and return it
  959. self.file = self._conn.sock.makefile('rb', 0)
  960. # close our socket -- we want to restart after any protocol error
  961. self.close()
  962. self.headers = None
  963. return -1, e.line, None
  964. self.headers = response.msg
  965. self.file = response.fp
  966. return response.status, response.reason, response.msg
  967. def close(self):
  968. self._conn.close()
  969. # note that self.file == response.fp, which gets closed by the
  970. # superclass. just clear the object ref here.
  971. ### hmm. messy. if status==-1, then self.file is owned by us.
  972. ### well... we aren't explicitly closing, but losing this ref will
  973. ### do it
  974. self.file = None
  975. try:
  976. import ssl
  977. except ImportError:
  978. pass
  979. else:
  980. class HTTPSConnection(HTTPConnection):
  981. "This class allows communication via SSL."
  982. default_port = HTTPS_PORT
  983. def __init__(self, host, port=None, key_file=None, cert_file=None,
  984. strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  985. source_address=None):
  986. HTTPConnection.__init__(self, host, port, strict, timeout,
  987. source_address)
  988. self.key_file = key_file
  989. self.cert_file = cert_file
  990. def connect(self):
  991. "Connect to a host on a given (SSL) port."
  992. sock = socket.create_connection((self.host, self.port),
  993. self.timeout, self.source_address)
  994. if self._tunnel_host:
  995. self.sock = sock
  996. self._tunnel()
  997. self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
  998. __all__.append("HTTPSConnection")
  999. class HTTPS(HTTP):
  1000. """Compatibility with 1.5 httplib interface
  1001. Python 1.5.2 did not have an HTTPS class, but it defined an
  1002. interface for sending http requests that is also useful for
  1003. https.
  1004. """
  1005. _connection_class = HTTPSConnection
  1006. def __init__(self, host='', port=None, key_file=None, cert_file=None,
  1007. strict=None):
  1008. # provide a default host, pass the X509 cert info
  1009. # urf. compensate for bad input.
  1010. if port == 0:
  1011. port = None
  1012. self._setup(self._connection_class(host, port, key_file,
  1013. cert_file, strict))
  1014. # we never actually use these for anything, but we keep them
  1015. # here for compatibility with post-1.5.2 CVS.
  1016. self.key_file = key_file
  1017. self.cert_file = cert_file
  1018. def FakeSocket (sock, sslobj):
  1019. warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
  1020. "Use the result of ssl.wrap_socket() directly instead.",
  1021. DeprecationWarning, stacklevel=2)
  1022. return sslobj
  1023. class HTTPException(Exception):
  1024. # Subclasses that define an __init__ must call Exception.__init__
  1025. # or define self.args. Otherwise, str() will fail.
  1026. pass
  1027. class NotConnected(HTTPException):
  1028. pass
  1029. class InvalidURL(HTTPException):
  1030. pass
  1031. class UnknownProtocol(HTTPException):
  1032. def __init__(self, version):
  1033. self.args = version,
  1034. self.version = version
  1035. class UnknownTransferEncoding(HTTPException):
  1036. pass
  1037. class UnimplementedFileMode(HTTPException):
  1038. pass
  1039. class IncompleteRead(HTTPException):
  1040. def __init__(self, partial, expected=None):
  1041. self.args = partial,
  1042. self.partial = partial
  1043. self.expected = expected
  1044. def __repr__(self):
  1045. if self.expected is not None:
  1046. e = ', %i more expected' % self.expected
  1047. else:
  1048. e = ''
  1049. return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
  1050. def __str__(self):
  1051. return repr(self)
  1052. class ImproperConnectionState(HTTPException):
  1053. pass
  1054. class CannotSendRequest(ImproperConnectionState):
  1055. pass
  1056. class CannotSendHeader(ImproperConnectionState):
  1057. pass
  1058. class ResponseNotReady(ImproperConnectionState):
  1059. pass
  1060. class BadStatusLine(HTTPException):
  1061. def __init__(self, line):
  1062. if not line:
  1063. line = repr(line)
  1064. self.args = line,
  1065. self.line = line
  1066. class LineTooLong(HTTPException):
  1067. def __init__(self, line_type):
  1068. HTTPException.__init__(self, "got more than %d bytes when reading %s"
  1069. % (_MAXLINE, line_type))
  1070. # for backwards compatibility
  1071. error = HTTPException
  1072. class LineAndFileWrapper:
  1073. """A limited file-like object for HTTP/0.9 responses."""
  1074. # The status-line parsing code calls readline(), which normally
  1075. # get the HTTP status line. For a 0.9 response, however, this is
  1076. # actually the first line of the body! Clients need to get a
  1077. # readable file object that contains that line.
  1078. def __init__(self, line, file):
  1079. self._line = line
  1080. self._file = file
  1081. self._line_consumed = 0
  1082. self._line_offset = 0
  1083. self._line_left = len(line)
  1084. def __getattr__(self, attr):
  1085. return getattr(self._file, attr)
  1086. def _done(self):
  1087. # called when the last byte is read from the line. After the
  1088. # call, all read methods are delegated to the underlying file
  1089. # object.
  1090. self._line_consumed = 1
  1091. self.read = self._file.read
  1092. self.readline = self._file.readline
  1093. self.readlines = self._file.readlines
  1094. def read(self, amt=None):
  1095. if self._line_consumed:
  1096. return self._file.read(amt)
  1097. assert self._line_left
  1098. if amt is None or amt > self._line_left:
  1099. s = self._line[self._line_offset:]
  1100. self._done()
  1101. if amt is None:
  1102. return s + self._file.read()
  1103. else:
  1104. return s + self._file.read(amt - len(s))
  1105. else:
  1106. assert amt <= self._line_left
  1107. i = self._line_offset
  1108. j = i + amt
  1109. s = self._line[i:j]
  1110. self._line_offset = j
  1111. self._line_left -= amt
  1112. if self._line_left == 0:
  1113. self._done()
  1114. return s
  1115. def readline(self):
  1116. if self._line_consumed:
  1117. return self._file.readline()
  1118. assert self._line_left
  1119. s = self._line[self._line_offset:]
  1120. self._done()
  1121. return s
  1122. def readlines(self, size=None):
  1123. if self._line_consumed:
  1124. return self._file.readlines(size)
  1125. assert self._line_left
  1126. L = [self._line[self._line_offset:]]
  1127. self._done()
  1128. if size is None:
  1129. return L + self._file.readlines()
  1130. else:
  1131. return L + self._file.readlines(size)
  1132. def test():
  1133. """Test this module.
  1134. A hodge podge of tests collected here, because they have too many
  1135. external dependencies for the regular test suite.
  1136. """
  1137. import sys
  1138. import getopt
  1139. opts, args = getopt.getopt(sys.argv[1:], 'd')
  1140. dl = 0
  1141. for o, a in opts:
  1142. if o == '-d': dl = dl + 1
  1143. host = 'www.python.org'
  1144. selector = '/'
  1145. if args[0:]: host = args[0]
  1146. if args[1:]: selector = args[1]
  1147. h = HTTP()
  1148. h.set_debuglevel(dl)
  1149. h.connect(host)
  1150. h.putrequest('GET', selector)
  1151. h.endheaders()
  1152. status, reason, headers = h.getreply()
  1153. print 'status =', status
  1154. print 'reason =', reason
  1155. print "read", len(h.getfile().read())
  1156. print
  1157. if headers:
  1158. for header in headers.headers: print header.strip()
  1159. print
  1160. # minimal test that code to extract host from url works
  1161. class HTTP11(HTTP):
  1162. _http_vsn = 11
  1163. _http_vsn_str = 'HTTP/1.1'
  1164. h = HTTP11('www.python.org')
  1165. h.putrequest('GET', 'http://www.python.org/~jeremy/')
  1166. h.endheaders()
  1167. h.getreply()
  1168. h.close()
  1169. try:
  1170. import ssl
  1171. except ImportError:
  1172. pass
  1173. else:
  1174. for host, selector in (('sourceforge.net', '/projects/python'),
  1175. ):
  1176. print "https://%s%s" % (host, selector)
  1177. hs = HTTPS()
  1178. hs.set_debuglevel(dl)
  1179. hs.connect(host)
  1180. hs.putrequest('GET', selector)
  1181. hs.endheaders()
  1182. status, reason, headers = hs.getreply()
  1183. print 'status =', status
  1184. print 'reason =', reason
  1185. print "read", len(hs.getfile().read())
  1186. print
  1187. if headers:
  1188. for header in headers.headers: print header.strip()
  1189. print
  1190. if __name__ == '__main__':
  1191. test()