PageRenderTime 446ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/libmproxy/protocol/http.py

https://github.com/zbrdge/mitmproxy
Python | 1073 lines | 1062 code | 3 blank | 8 comment | 7 complexity | 788eccececcecd7c9820190d998fa40e MD5 | raw file
  1. from __future__ import absolute_import
  2. import Cookie, urllib, urlparse, time, copy
  3. from email.utils import parsedate_tz, formatdate, mktime_tz
  4. from netlib import http, tcp, http_status
  5. import netlib.utils
  6. from netlib.odict import ODict, ODictCaseless
  7. from .primitives import KILL, ProtocolHandler, TemporaryServerChangeMixin, Flow, Error
  8. from ..proxy.connection import ServerConnection
  9. from .. import encoding, utils, filt, controller, stateobject, proxy
  10. HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
  11. CONTENT_MISSING = 0
  12. def get_line(fp):
  13. """
  14. Get a line, possibly preceded by a blank.
  15. """
  16. line = fp.readline()
  17. if line == "\r\n" or line == "\n": # Possible leftover from previous message
  18. line = fp.readline()
  19. if line == "":
  20. raise tcp.NetLibDisconnect
  21. return line
  22. class decoded(object):
  23. """
  24. A context manager that decodes a request or response, and then
  25. re-encodes it with the same encoding after execution of the block.
  26. Example:
  27. with decoded(request):
  28. request.content = request.content.replace("foo", "bar")
  29. """
  30. def __init__(self, o):
  31. self.o = o
  32. ce = o.headers.get_first("content-encoding")
  33. if ce in encoding.ENCODINGS:
  34. self.ce = ce
  35. else:
  36. self.ce = None
  37. def __enter__(self):
  38. if self.ce:
  39. self.o.decode()
  40. def __exit__(self, type, value, tb):
  41. if self.ce:
  42. self.o.encode(self.ce)
  43. class HTTPMessage(stateobject.SimpleStateObject):
  44. def __init__(self, httpversion, headers, content, timestamp_start=None,
  45. timestamp_end=None):
  46. self.httpversion = httpversion
  47. self.headers = headers
  48. """@type: ODictCaseless"""
  49. self.content = content
  50. self.timestamp_start = timestamp_start
  51. self.timestamp_end = timestamp_end
  52. self.flow = None # will usually be set by the flow backref mixin
  53. """@type: HTTPFlow"""
  54. _stateobject_attributes = dict(
  55. httpversion=tuple,
  56. headers=ODictCaseless,
  57. content=str,
  58. timestamp_start=float,
  59. timestamp_end=float
  60. )
  61. def get_decoded_content(self):
  62. """
  63. Returns the decoded content based on the current Content-Encoding header.
  64. Doesn't change the message iteself or its headers.
  65. """
  66. ce = self.headers.get_first("content-encoding")
  67. if not self.content or ce not in encoding.ENCODINGS:
  68. return self.content
  69. return encoding.decode(ce, self.content)
  70. def decode(self):
  71. """
  72. Decodes content based on the current Content-Encoding header, then
  73. removes the header. If there is no Content-Encoding header, no
  74. action is taken.
  75. Returns True if decoding succeeded, False otherwise.
  76. """
  77. ce = self.headers.get_first("content-encoding")
  78. if not self.content or ce not in encoding.ENCODINGS:
  79. return False
  80. data = encoding.decode(ce, self.content)
  81. if data is None:
  82. return False
  83. self.content = data
  84. del self.headers["content-encoding"]
  85. return True
  86. def encode(self, e):
  87. """
  88. Encodes content with the encoding e, where e is "gzip", "deflate"
  89. or "identity".
  90. """
  91. # FIXME: Error if there's an existing encoding header?
  92. self.content = encoding.encode(e, self.content)
  93. self.headers["content-encoding"] = [e]
  94. def size(self, **kwargs):
  95. """
  96. Size in bytes of a fully rendered message, including headers and
  97. HTTP lead-in.
  98. """
  99. hl = len(self._assemble_head(**kwargs))
  100. if self.content:
  101. return hl + len(self.content)
  102. else:
  103. return hl
  104. def copy(self):
  105. c = copy.copy(self)
  106. c.headers = self.headers.copy()
  107. return c
  108. def replace(self, pattern, repl, *args, **kwargs):
  109. """
  110. Replaces a regular expression pattern with repl in both the headers
  111. and the body of the message. Encoded content will be decoded
  112. before replacement, and re-encoded afterwards.
  113. Returns the number of replacements made.
  114. """
  115. with decoded(self):
  116. self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
  117. c += self.headers.replace(pattern, repl, *args, **kwargs)
  118. return c
  119. @classmethod
  120. def from_stream(cls, rfile, include_content=True, body_size_limit=None):
  121. """
  122. Parse an HTTP message from a file stream
  123. """
  124. raise NotImplementedError # pragma: nocover
  125. def _assemble_first_line(self):
  126. """
  127. Returns the assembled request/response line
  128. """
  129. raise NotImplementedError # pragma: nocover
  130. def _assemble_headers(self):
  131. """
  132. Returns the assembled headers
  133. """
  134. raise NotImplementedError # pragma: nocover
  135. def _assemble_head(self):
  136. """
  137. Returns the assembled request/response line plus headers
  138. """
  139. raise NotImplementedError # pragma: nocover
  140. def _assemble(self):
  141. """
  142. Returns the assembled request/response
  143. """
  144. raise NotImplementedError # pragma: nocover
  145. class HTTPRequest(HTTPMessage):
  146. """
  147. An HTTP request.
  148. Exposes the following attributes:
  149. flow: Flow object the request belongs to
  150. headers: ODictCaseless object
  151. content: Content of the request, None, or CONTENT_MISSING if there
  152. is content associated, but not present. CONTENT_MISSING evaluates
  153. to False to make checking for the presence of content natural.
  154. form_in: The request form which mitmproxy has received. The following values are possible:
  155. - relative (GET /index.html, OPTIONS *) (covers origin form and asterisk form)
  156. - absolute (GET http://example.com:80/index.html)
  157. - authority-form (CONNECT example.com:443)
  158. Details: http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-25#section-5.3
  159. form_out: The request form which mitmproxy has send out to the destination
  160. method: HTTP method
  161. scheme: URL scheme (http/https) (absolute-form only)
  162. host: Host portion of the URL (absolute-form and authority-form only)
  163. port: Destination port (absolute-form and authority-form only)
  164. path: Path portion of the URL (not present in authority-form)
  165. httpversion: HTTP version tuple
  166. timestamp_start: Timestamp indicating when request transmission started
  167. timestamp_end: Timestamp indicating when request transmission ended
  168. """
  169. def __init__(self, form_in, method, scheme, host, port, path, httpversion, headers,
  170. content, timestamp_start=None, timestamp_end=None, form_out=None):
  171. assert isinstance(headers, ODictCaseless) or not headers
  172. HTTPMessage.__init__(self, httpversion, headers, content, timestamp_start,
  173. timestamp_end)
  174. self.form_in = form_in
  175. self.method = method
  176. self.scheme = scheme
  177. self.host = host
  178. self.port = port
  179. self.path = path
  180. self.httpversion = httpversion
  181. self.form_out = form_out or form_in
  182. # Have this request's cookies been modified by sticky cookies or auth?
  183. self.stickycookie = False
  184. self.stickyauth = False
  185. # Is this request replayed?
  186. self.is_replay = False
  187. _stateobject_attributes = HTTPMessage._stateobject_attributes.copy()
  188. _stateobject_attributes.update(
  189. form_in=str,
  190. method=str,
  191. scheme=str,
  192. host=str,
  193. port=int,
  194. path=str,
  195. form_out=str
  196. )
  197. @classmethod
  198. def _from_state(cls, state):
  199. f = cls(None, None, None, None, None, None, None, None, None, None, None)
  200. f._load_state(state)
  201. return f
  202. @classmethod
  203. def from_stream(cls, rfile, include_content=True, body_size_limit=None):
  204. """
  205. Parse an HTTP request from a file stream
  206. """
  207. httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \
  208. = None, None, None, None, None, None, None, None, None, None
  209. if hasattr(rfile, "reset_timestamps"):
  210. rfile.reset_timestamps()
  211. request_line = get_line(rfile)
  212. if hasattr(rfile, "first_byte_timestamp"):
  213. timestamp_start = rfile.first_byte_timestamp
  214. else:
  215. timestamp_start = utils.timestamp()
  216. request_line_parts = http.parse_init(request_line)
  217. if not request_line_parts:
  218. raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
  219. method, path, httpversion = request_line_parts
  220. if path == '*' or path.startswith("/"):
  221. form_in = "relative"
  222. if not netlib.utils.isascii(path):
  223. raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
  224. elif method.upper() == 'CONNECT':
  225. form_in = "authority"
  226. r = http.parse_init_connect(request_line)
  227. if not r:
  228. raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
  229. host, port, _ = r
  230. path = None
  231. else:
  232. form_in = "absolute"
  233. r = http.parse_init_proxy(request_line)
  234. if not r:
  235. raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
  236. _, scheme, host, port, path, _ = r
  237. headers = http.read_headers(rfile)
  238. if headers is None:
  239. raise http.HttpError(400, "Invalid headers")
  240. if include_content:
  241. content = http.read_http_body(rfile, headers, body_size_limit, True)
  242. timestamp_end = utils.timestamp()
  243. return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers,
  244. content, timestamp_start, timestamp_end)
  245. def _assemble_first_line(self, form=None):
  246. form = form or self.form_out
  247. if form == "relative":
  248. path = self.path if self.method != "OPTIONS" else "*"
  249. request_line = '%s %s HTTP/%s.%s' % \
  250. (self.method, path, self.httpversion[0], self.httpversion[1])
  251. elif form == "authority":
  252. request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port,
  253. self.httpversion[0], self.httpversion[1])
  254. elif form == "absolute":
  255. request_line = '%s %s://%s:%s%s HTTP/%s.%s' % \
  256. (self.method, self.scheme, self.host, self.port, self.path,
  257. self.httpversion[0], self.httpversion[1])
  258. else:
  259. raise http.HttpError(400, "Invalid request form")
  260. return request_line
  261. def _assemble_headers(self):
  262. headers = self.headers.copy()
  263. utils.del_all(
  264. headers,
  265. [
  266. 'Proxy-Connection',
  267. 'Keep-Alive',
  268. 'Connection',
  269. 'Transfer-Encoding'
  270. ]
  271. )
  272. if not 'host' in headers:
  273. headers["Host"] = [utils.hostport(self.scheme,
  274. self.host or self.flow.server_conn.address.host,
  275. self.port or self.flow.server_conn.address.port)]
  276. if self.content:
  277. headers["Content-Length"] = [str(len(self.content))]
  278. elif 'Transfer-Encoding' in self.headers: # content-length for e.g. chuncked transfer-encoding with no content
  279. headers["Content-Length"] = ["0"]
  280. return str(headers)
  281. def _assemble_head(self, form=None):
  282. return "%s\r\n%s\r\n" % (self._assemble_first_line(form), self._assemble_headers())
  283. def _assemble(self, form=None):
  284. """
  285. Assembles the request for transmission to the server. We make some
  286. modifications to make sure interception works properly.
  287. Raises an Exception if the request cannot be assembled.
  288. """
  289. if self.content == CONTENT_MISSING:
  290. raise proxy.ProxyError(502, "Cannot assemble flow with CONTENT_MISSING")
  291. head = self._assemble_head(form)
  292. if self.content:
  293. return head + self.content
  294. else:
  295. return head
  296. def __hash__(self):
  297. return id(self)
  298. def anticache(self):
  299. """
  300. Modifies this request to remove headers that might produce a cached
  301. response. That is, we remove ETags and If-Modified-Since headers.
  302. """
  303. delheaders = [
  304. "if-modified-since",
  305. "if-none-match",
  306. ]
  307. for i in delheaders:
  308. del self.headers[i]
  309. def anticomp(self):
  310. """
  311. Modifies this request to remove headers that will compress the
  312. resource's data.
  313. """
  314. self.headers["accept-encoding"] = ["identity"]
  315. def constrain_encoding(self):
  316. """
  317. Limits the permissible Accept-Encoding values, based on what we can
  318. decode appropriately.
  319. """
  320. if self.headers["accept-encoding"]:
  321. self.headers["accept-encoding"] = [', '.join(
  322. e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
  323. )]
  324. def get_form_urlencoded(self):
  325. """
  326. Retrieves the URL-encoded form data, returning an ODict object.
  327. Returns an empty ODict if there is no data or the content-type
  328. indicates non-form data.
  329. """
  330. if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
  331. return ODict(utils.urldecode(self.content))
  332. return ODict([])
  333. def set_form_urlencoded(self, odict):
  334. """
  335. Sets the body to the URL-encoded form data, and adds the
  336. appropriate content-type header. Note that this will destory the
  337. existing body if there is one.
  338. """
  339. # FIXME: If there's an existing content-type header indicating a
  340. # url-encoded form, leave it alone.
  341. self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
  342. self.content = utils.urlencode(odict.lst)
  343. def get_path_components(self):
  344. """
  345. Returns the path components of the URL as a list of strings.
  346. Components are unquoted.
  347. """
  348. _, _, path, _, _, _ = urlparse.urlparse(self.get_url())
  349. return [urllib.unquote(i) for i in path.split("/") if i]
  350. def set_path_components(self, lst):
  351. """
  352. Takes a list of strings, and sets the path component of the URL.
  353. Components are quoted.
  354. """
  355. lst = [urllib.quote(i, safe="") for i in lst]
  356. path = "/" + "/".join(lst)
  357. scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url())
  358. self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
  359. def get_query(self):
  360. """
  361. Gets the request query string. Returns an ODict object.
  362. """
  363. _, _, _, _, query, _ = urlparse.urlparse(self.get_url())
  364. if query:
  365. return ODict(utils.urldecode(query))
  366. return ODict([])
  367. def set_query(self, odict):
  368. """
  369. Takes an ODict object, and sets the request query string.
  370. """
  371. scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url())
  372. query = utils.urlencode(odict.lst)
  373. self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
  374. def get_host(self, hostheader=False):
  375. """
  376. Heuristic to get the host of the request.
  377. The host is not necessarily equal to the TCP destination of the request,
  378. for example on a transparently proxified absolute-form request to an upstream HTTP proxy.
  379. If hostheader is set to True, the Host: header will be used as additional (and preferred) data source.
  380. """
  381. host = None
  382. if hostheader:
  383. host = self.headers.get_first("host")
  384. if not host:
  385. if self.host:
  386. host = self.host
  387. else:
  388. host = self.flow.server_conn.address.host
  389. host = host.encode("idna")
  390. return host
  391. def get_scheme(self):
  392. """
  393. Returns the request port, either from the request itself or from the flow's server connection
  394. """
  395. if self.scheme:
  396. return self.scheme
  397. return "https" if self.flow.server_conn.ssl_established else "http"
  398. def get_port(self):
  399. """
  400. Returns the request port, either from the request itself or from the flow's server connection
  401. """
  402. if self.port:
  403. return self.port
  404. return self.flow.server_conn.address.port
  405. def get_url(self, hostheader=False):
  406. """
  407. Returns a URL string, constructed from the Request's URL components.
  408. If hostheader is True, we use the value specified in the request
  409. Host header to construct the URL.
  410. """
  411. if self.form_out == "authority": # upstream proxy mode
  412. return "%s:%s" % (self.get_host(hostheader), self.get_port())
  413. return utils.unparse_url(self.get_scheme(),
  414. self.get_host(hostheader),
  415. self.get_port(),
  416. self.path).encode('ascii')
  417. def set_url(self, url):
  418. """
  419. Parses a URL specification, and updates the Request's information
  420. accordingly.
  421. Returns False if the URL was invalid, True if the request succeeded.
  422. """
  423. parts = http.parse_url(url)
  424. if not parts:
  425. return False
  426. scheme, host, port, path = parts
  427. is_ssl = (True if scheme == "https" else False)
  428. self.path = path
  429. if host != self.get_host() or port != self.get_port():
  430. if self.flow.change_server:
  431. self.flow.change_server((host, port), ssl=is_ssl)
  432. else:
  433. # There's not live server connection, we're just changing the attributes here.
  434. self.flow.server_conn = ServerConnection((host, port),
  435. proxy.AddressPriority.MANUALLY_CHANGED)
  436. self.flow.server_conn.ssl_established = is_ssl
  437. # If this is an absolute request, replace the attributes on the request object as well.
  438. if self.host:
  439. self.host = host
  440. if self.port:
  441. self.port = port
  442. if self.scheme:
  443. self.scheme = scheme
  444. return True
  445. def get_cookies(self):
  446. cookie_headers = self.headers.get("cookie")
  447. if not cookie_headers:
  448. return None
  449. cookies = []
  450. for header in cookie_headers:
  451. pairs = [pair.partition("=") for pair in header.split(';')]
  452. cookies.extend((pair[0], (pair[2], {})) for pair in pairs)
  453. return dict(cookies)
  454. def replace(self, pattern, repl, *args, **kwargs):
  455. """
  456. Replaces a regular expression pattern with repl in the headers, the request path
  457. and the body of the request. Encoded content will be decoded before
  458. replacement, and re-encoded afterwards.
  459. Returns the number of replacements made.
  460. """
  461. c = HTTPMessage.replace(self, pattern, repl, *args, **kwargs)
  462. self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
  463. c += pc
  464. return c
  465. class HTTPResponse(HTTPMessage):
  466. """
  467. An HTTP response.
  468. Exposes the following attributes:
  469. flow: Flow object the request belongs to
  470. code: HTTP response code
  471. msg: HTTP response message
  472. headers: ODict object
  473. content: Content of the request, None, or CONTENT_MISSING if there
  474. is content associated, but not present. CONTENT_MISSING evaluates
  475. to False to make checking for the presence of content natural.
  476. httpversion: HTTP version tuple
  477. timestamp_start: Timestamp indicating when request transmission started
  478. timestamp_end: Timestamp indicating when request transmission ended
  479. """
  480. def __init__(self, httpversion, code, msg, headers, content, timestamp_start=None,
  481. timestamp_end=None):
  482. assert isinstance(headers, ODictCaseless) or headers is None
  483. HTTPMessage.__init__(self, httpversion, headers, content, timestamp_start,
  484. timestamp_end)
  485. self.code = code
  486. self.msg = msg
  487. # Is this request replayed?
  488. self.is_replay = False
  489. _stateobject_attributes = HTTPMessage._stateobject_attributes.copy()
  490. _stateobject_attributes.update(
  491. code=int,
  492. msg=str
  493. )
  494. @classmethod
  495. def _from_state(cls, state):
  496. f = cls(None, None, None, None, None)
  497. f._load_state(state)
  498. return f
  499. @classmethod
  500. def from_stream(cls, rfile, request_method, include_content=True, body_size_limit=None):
  501. """
  502. Parse an HTTP response from a file stream
  503. """
  504. if not include_content:
  505. raise NotImplementedError # pragma: nocover
  506. if hasattr(rfile, "reset_timestamps"):
  507. rfile.reset_timestamps()
  508. httpversion, code, msg, headers, content = http.read_response(
  509. rfile,
  510. request_method,
  511. body_size_limit)
  512. if hasattr(rfile, "first_byte_timestamp"):
  513. timestamp_start = rfile.first_byte_timestamp
  514. else:
  515. timestamp_start = utils.timestamp()
  516. timestamp_end = utils.timestamp()
  517. return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start,
  518. timestamp_end)
  519. def _assemble_first_line(self):
  520. return 'HTTP/%s.%s %s %s' % \
  521. (self.httpversion[0], self.httpversion[1], self.code, self.msg)
  522. def _assemble_headers(self):
  523. headers = self.headers.copy()
  524. utils.del_all(
  525. headers,
  526. [
  527. 'Proxy-Connection',
  528. 'Transfer-Encoding'
  529. ]
  530. )
  531. if self.content:
  532. headers["Content-Length"] = [str(len(self.content))]
  533. elif 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content
  534. headers["Content-Length"] = ["0"]
  535. return str(headers)
  536. def _assemble_head(self):
  537. return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers())
  538. def _assemble(self):
  539. """
  540. Assembles the response for transmission to the client. We make some
  541. modifications to make sure interception works properly.
  542. Raises an Exception if the request cannot be assembled.
  543. """
  544. if self.content == CONTENT_MISSING:
  545. raise proxy.ProxyError(502, "Cannot assemble flow with CONTENT_MISSING")
  546. head = self._assemble_head()
  547. if self.content:
  548. return head + self.content
  549. else:
  550. return head
  551. def _refresh_cookie(self, c, delta):
  552. """
  553. Takes a cookie string c and a time delta in seconds, and returns
  554. a refreshed cookie string.
  555. """
  556. c = Cookie.SimpleCookie(str(c))
  557. for i in c.values():
  558. if "expires" in i:
  559. d = parsedate_tz(i["expires"])
  560. if d:
  561. d = mktime_tz(d) + delta
  562. i["expires"] = formatdate(d)
  563. else:
  564. # This can happen when the expires tag is invalid.
  565. # reddit.com sends a an expires tag like this: "Thu, 31 Dec
  566. # 2037 23:59:59 GMT", which is valid RFC 1123, but not
  567. # strictly correct according to the cookie spec. Browsers
  568. # appear to parse this tolerantly - maybe we should too.
  569. # For now, we just ignore this.
  570. del i["expires"]
  571. return c.output(header="").strip()
  572. def refresh(self, now=None):
  573. """
  574. This fairly complex and heuristic function refreshes a server
  575. response for replay.
  576. - It adjusts date, expires and last-modified headers.
  577. - It adjusts cookie expiration.
  578. """
  579. if not now:
  580. now = time.time()
  581. delta = now - self.timestamp_start
  582. refresh_headers = [
  583. "date",
  584. "expires",
  585. "last-modified",
  586. ]
  587. for i in refresh_headers:
  588. if i in self.headers:
  589. d = parsedate_tz(self.headers[i][0])
  590. if d:
  591. new = mktime_tz(d) + delta
  592. self.headers[i] = [formatdate(new)]
  593. c = []
  594. for i in self.headers["set-cookie"]:
  595. c.append(self._refresh_cookie(i, delta))
  596. if c:
  597. self.headers["set-cookie"] = c
  598. def get_cookies(self):
  599. cookie_headers = self.headers.get("set-cookie")
  600. if not cookie_headers:
  601. return None
  602. cookies = []
  603. for header in cookie_headers:
  604. pairs = [pair.partition("=") for pair in header.split(';')]
  605. cookie_name = pairs[0][0] # the key of the first key/value pairs
  606. cookie_value = pairs[0][2] # the value of the first key/value pairs
  607. cookie_parameters = {key.strip().lower(): value.strip() for key, sep, value in
  608. pairs[1:]}
  609. cookies.append((cookie_name, (cookie_value, cookie_parameters)))
  610. return dict(cookies)
  611. class HTTPFlow(Flow):
  612. """
  613. A Flow is a collection of objects representing a single HTTP
  614. transaction. The main attributes are:
  615. request: HTTPRequest object
  616. response: HTTPResponse object
  617. error: Error object
  618. Note that it's possible for a Flow to have both a response and an error
  619. object. This might happen, for instance, when a response was received
  620. from the server, but there was an error sending it back to the client.
  621. The following additional attributes are exposed:
  622. intercepting: Is this flow currently being intercepted?
  623. """
  624. def __init__(self, client_conn, server_conn, change_server=None):
  625. Flow.__init__(self, "http", client_conn, server_conn)
  626. self.request = None
  627. """@type: HTTPRequest"""
  628. self.response = None
  629. """@type: HTTPResponse"""
  630. self.change_server = change_server # Used by flow.request.set_url to change the server address
  631. self.intercepting = False # FIXME: Should that rather be an attribute of Flow?
  632. _backrefattr = Flow._backrefattr + ("request", "response")
  633. _stateobject_attributes = Flow._stateobject_attributes.copy()
  634. _stateobject_attributes.update(
  635. request=HTTPRequest,
  636. response=HTTPResponse
  637. )
  638. @classmethod
  639. def _from_state(cls, state):
  640. f = cls(None, None)
  641. f._load_state(state)
  642. return f
  643. def copy(self):
  644. f = super(HTTPFlow, self).copy()
  645. if self.request:
  646. f.request = self.request.copy()
  647. if self.response:
  648. f.response = self.response.copy()
  649. return f
  650. def match(self, f):
  651. """
  652. Match this flow against a compiled filter expression. Returns True
  653. if matched, False if not.
  654. If f is a string, it will be compiled as a filter expression. If
  655. the expression is invalid, ValueError is raised.
  656. """
  657. if isinstance(f, basestring):
  658. f = filt.parse(f)
  659. if not f:
  660. raise ValueError("Invalid filter expression.")
  661. if f:
  662. return f(self)
  663. return True
  664. def kill(self, master):
  665. """
  666. Kill this request.
  667. """
  668. self.error = Error("Connection killed")
  669. self.error.reply = controller.DummyReply()
  670. if self.request and not self.request.reply.acked:
  671. self.request.reply(KILL)
  672. elif self.response and not self.response.reply.acked:
  673. self.response.reply(KILL)
  674. master.handle_error(self.error)
  675. self.intercepting = False
  676. def intercept(self):
  677. """
  678. Intercept this Flow. Processing will stop until accept_intercept is
  679. called.
  680. """
  681. self.intercepting = True
  682. def accept_intercept(self):
  683. """
  684. Continue with the flow - called after an intercept().
  685. """
  686. if self.request:
  687. if not self.request.reply.acked:
  688. self.request.reply()
  689. elif self.response and not self.response.reply.acked:
  690. self.response.reply()
  691. self.intercepting = False
  692. def replace(self, pattern, repl, *args, **kwargs):
  693. """
  694. Replaces a regular expression pattern with repl in both request and response of the
  695. flow. Encoded content will be decoded before replacement, and
  696. re-encoded afterwards.
  697. Returns the number of replacements made.
  698. """
  699. c = self.request.replace(pattern, repl, *args, **kwargs)
  700. if self.response:
  701. c += self.response.replace(pattern, repl, *args, **kwargs)
  702. return c
  703. class HttpAuthenticationError(Exception):
  704. def __init__(self, auth_headers=None):
  705. super(HttpAuthenticationError, self).__init__("Proxy Authentication Required")
  706. self.headers = auth_headers
  707. self.code = 407
  708. def __repr__(self):
  709. return "Proxy Authentication Required"
  710. class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin):
  711. def __init__(self, c):
  712. super(HTTPHandler, self).__init__(c)
  713. self.expected_form_in = c.config.http_form_in
  714. self.expected_form_out = c.config.http_form_out
  715. self.skip_authentication = False
  716. def handle_messages(self):
  717. while self.handle_flow():
  718. pass
  719. self.c.close = True
  720. def get_response_from_server(self, request):
  721. self.c.establish_server_connection()
  722. request_raw = request._assemble()
  723. for i in range(2):
  724. try:
  725. self.c.server_conn.send(request_raw)
  726. return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method,
  727. body_size_limit=self.c.config.body_size_limit)
  728. except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v:
  729. self.c.log("error in server communication: %s" % str(v), level="debug")
  730. if i < 1:
  731. # In any case, we try to reconnect at least once.
  732. # This is necessary because it might be possible that we already initiated an upstream connection
  733. # after clientconnect that has already been expired, e.g consider the following event log:
  734. # > clientconnect (transparent mode destination known)
  735. # > serverconnect
  736. # > read n% of large request
  737. # > server detects timeout, disconnects
  738. # > read (100-n)% of large request
  739. # > send large request upstream
  740. self.c.server_reconnect()
  741. else:
  742. raise v
  743. def handle_flow(self):
  744. flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.change_server)
  745. try:
  746. req = HTTPRequest.from_stream(self.c.client_conn.rfile,
  747. body_size_limit=self.c.config.body_size_limit)
  748. self.c.log("request", "debug", [req._assemble_first_line(req.form_in)])
  749. send_request_upstream = self.process_request(flow, req)
  750. if not send_request_upstream:
  751. return True
  752. # Be careful NOT to assign the request to the flow before
  753. # process_request completes. This is because the call can raise an
  754. # exception. If the request object is already attached, this results
  755. # in an Error object that has an attached request that has not been
  756. # sent through to the Master.
  757. flow.request = req
  758. request_reply = self.c.channel.ask("request", flow.request)
  759. flow.server_conn = self.c.server_conn
  760. if request_reply is None or request_reply == KILL:
  761. return False
  762. if isinstance(request_reply, HTTPResponse):
  763. flow.response = request_reply
  764. else:
  765. flow.response = self.get_response_from_server(flow.request)
  766. flow.server_conn = self.c.server_conn # no further manipulation of self.c.server_conn beyond this point
  767. # we can safely set it as the final attribute value here.
  768. self.c.log("response", "debug", [flow.response._assemble_first_line()])
  769. response_reply = self.c.channel.ask("response", flow.response)
  770. if response_reply is None or response_reply == KILL:
  771. return False
  772. self.c.client_conn.send(flow.response._assemble())
  773. flow.timestamp_end = utils.timestamp()
  774. if (http.connection_close(flow.request.httpversion, flow.request.headers) or
  775. http.connection_close(flow.response.httpversion, flow.response.headers)):
  776. return False
  777. if flow.request.form_in == "authority":
  778. self.ssl_upgrade()
  779. # If the user has changed the target server on this connection,
  780. # restore the original target server
  781. self.restore_server()
  782. return True
  783. except (HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError), e:
  784. self.handle_error(e, flow)
  785. return False
  786. def handle_error(self, error, flow=None):
  787. message = repr(error)
  788. code = getattr(error, "code", 502)
  789. headers = getattr(error, "headers", None)
  790. if "tlsv1 alert unknown ca" in message:
  791. message = message + " \nThe client does not trust the proxy's certificate."
  792. self.c.log("error: %s" % message, level="info")
  793. if flow:
  794. flow.error = Error(message)
  795. # FIXME: no flows without request or with both request and response at the moement.
  796. if flow.request and not flow.response:
  797. self.c.channel.ask("error", flow.error)
  798. else:
  799. pass # FIXME: Do we want to persist errors without flows?
  800. try:
  801. self.send_error(code, message, headers)
  802. except:
  803. pass
  804. def send_error(self, code, message, headers):
  805. response = http_status.RESPONSES.get(code, "Unknown")
  806. html_content = '<html><head>\n<title>%d %s</title>\n</head>\n<body>\n%s\n</body>\n</html>' % \
  807. (code, response, message)
  808. self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response))
  809. self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version)
  810. self.c.client_conn.wfile.write("Content-type: text/html\r\n")
  811. self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content))
  812. if headers:
  813. for key, value in headers.items():
  814. self.c.client_conn.wfile.write("%s: %s\r\n" % (key, value))
  815. self.c.client_conn.wfile.write("Connection: close\r\n")
  816. self.c.client_conn.wfile.write("\r\n")
  817. self.c.client_conn.wfile.write(html_content)
  818. self.c.client_conn.wfile.flush()
  819. def hook_reconnect(self, upstream_request):
  820. """
  821. If the authority request has been forwarded upstream (because we have another proxy server there),
  822. money-patch the ConnectionHandler.server_reconnect function to resend the CONNECT request on reconnect.
  823. Hooking code isn't particulary beautiful, but it isolates this edge-case from
  824. the protocol-agnostic ConnectionHandler
  825. """
  826. self.c.log("Hook reconnect function", level="debug")
  827. original_reconnect_func = self.c.server_reconnect
  828. def reconnect_http_proxy():
  829. self.c.log("Hooked reconnect function", "debug")
  830. self.c.log("Hook: Run original reconnect", "debug")
  831. original_reconnect_func(no_ssl=True)
  832. self.c.log("Hook: Write CONNECT request to upstream proxy", "debug",
  833. [upstream_request._assemble_first_line()])
  834. self.c.server_conn.send(upstream_request._assemble())
  835. self.c.log("Hook: Read answer to CONNECT request from proxy", "debug")
  836. resp = HTTPResponse.from_stream(self.c.server_conn.rfile, upstream_request.method)
  837. if resp.code != 200:
  838. raise proxy.ProxyError(resp.code,
  839. "Cannot reestablish SSL " +
  840. "connection with upstream proxy: \r\n" +
  841. str(resp.headers))
  842. self.c.log("Hook: Establish SSL with upstream proxy", "debug")
  843. self.c.establish_ssl(server=True)
  844. self.c.server_reconnect = reconnect_http_proxy
  845. def ssl_upgrade(self):
  846. """
  847. Upgrade the connection to SSL after an authority (CONNECT) request has been made.
  848. """
  849. self.c.log("Received CONNECT request. Upgrading to SSL...", "debug")
  850. self.expected_form_in = "relative"
  851. self.expected_form_out = "relative"
  852. self.c.establish_ssl(server=True, client=True)
  853. self.c.log("Upgrade to SSL completed.", "debug")
  854. def process_request(self, flow, request):
  855. if not self.skip_authentication:
  856. self.authenticate(request)
  857. if request.form_in == "authority":
  858. if self.c.client_conn.ssl_established:
  859. raise http.HttpError(400, "Must not CONNECT on already encrypted connection")
  860. if self.expected_form_in == "absolute":
  861. if not self.c.config.get_upstream_server:
  862. self.c.set_server_address((request.host, request.port),
  863. proxy.AddressPriority.FROM_PROTOCOL)
  864. flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow
  865. self.c.client_conn.send(
  866. 'HTTP/1.1 200 Connection established\r\n' +
  867. ('Proxy-agent: %s\r\n' % self.c.server_version) +
  868. '\r\n'
  869. )
  870. self.ssl_upgrade()
  871. self.skip_authentication = True
  872. return False
  873. else:
  874. self.hook_reconnect(request)
  875. return True
  876. elif request.form_in == self.expected_form_in:
  877. if request.form_in == "absolute":
  878. if request.scheme != "http":
  879. raise http.HttpError(400, "Invalid request scheme: %s" % request.scheme)
  880. self.c.set_server_address((request.host, request.port),
  881. proxy.AddressPriority.FROM_PROTOCOL)
  882. flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow
  883. request.form_out = self.expected_form_out
  884. return True
  885. raise http.HttpError(400, "Invalid HTTP request form (expected: %s, got: %s)" %
  886. (self.expected_form_in, request.form_in))
  887. def authenticate(self, request):
  888. if self.c.config.authenticator:
  889. if self.c.config.authenticator.authenticate(request.headers):
  890. self.c.config.authenticator.clean(request.headers)
  891. else:
  892. raise HttpAuthenticationError(
  893. self.c.config.authenticator.auth_challenge_headers())
  894. return request.headers