PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/qp/http/request.py

https://bitbucket.org/pfw/durusworks
Python | 637 lines | 615 code | 6 blank | 16 comment | 17 complexity | dd13982a73492dcd7a7ef06cfa427a4e MD5 | raw file
  1. """
  2. open/DurusWorks/qp/http/request.py
  3. """
  4. from durus.utils import as_bytes, join_bytes, empty_byte_string, byte_string
  5. from qp.fill.html import url_quote, url_unquote
  6. from qp.lib.util import StringIO, message_from_file
  7. from qp.pub.common import get_publisher
  8. from shutil import move
  9. import os
  10. import re
  11. import sys
  12. import tempfile
  13. CRNL = as_bytes("\r\n")
  14. # Various regexes for parsing specific bits of HTTP, all from RFC 2616.
  15. # These are used by _parse_pref_header().
  16. # LWS is linear whitespace; the latter two assume that LWS has been removed.
  17. _http_lws_re = re.compile(r"(\r\n)?[ \t]+")
  18. _http_list_re = re.compile(r",+")
  19. _http_encoding_re = re.compile(r"([^;]+)(;q=([\d.]+))?$")
  20. def _decode_string(s, charset):
  21. if hasattr(s, 'decode'):
  22. return s.decode(charset)
  23. else:
  24. return s # assume already decoded
  25. def parse_header(line):
  26. """Parse a Content-type like header.
  27. Return the main content-type and a dictionary of options.
  28. """
  29. plist = [x.strip() for x in line.split(';')]
  30. key = plist.pop(0).lower()
  31. pdict = {}
  32. for p in plist:
  33. i = p.find('=')
  34. if i >= 0:
  35. name = p[:i].strip().lower()
  36. value = p[i+1:].strip()
  37. if len(value) >= 2 and value[0] == value[-1] == '"':
  38. value = value[1:-1]
  39. pdict[name] = value
  40. return key, pdict
  41. def parse_query(qs, charset):
  42. """(qs: string) -> {key:string, string|[string]}
  43. Parse a query given as a string argument and return a dictionary.
  44. """
  45. if isinstance(qs, byte_string):
  46. qs = qs.decode(charset)
  47. fields = {}
  48. if '&' in qs:
  49. ampersand = '&'
  50. else:
  51. ampersand = '&'
  52. for chunk in qs.split(ampersand):
  53. if not chunk:
  54. continue
  55. chunk = chunk.replace('+', ' ')
  56. chunk = url_unquote(chunk)
  57. if '=' in chunk:
  58. name, value = chunk.split('=', 1)
  59. else:
  60. name = chunk
  61. value = ''
  62. _add_field_value(fields, name, value)
  63. return fields
  64. def _add_field_value(fields, name, value):
  65. if name in fields:
  66. values = fields[name]
  67. if not isinstance(values, list):
  68. fields[name] = values = [values]
  69. values.append(value)
  70. else:
  71. fields[name] = value
  72. class HTTPRequest (object):
  73. """
  74. Model a single HTTP request and all associated data: environment
  75. variables, form variables, cookies, etc.
  76. To access environment variables associated with the request, use
  77. get_environ(): eg. request.get_environ('SERVER_PORT', 80).
  78. To access form variables, use get_field(), eg.
  79. request.get_field("name").
  80. To access cookies, use get_cookie().
  81. Various bits and pieces of the requested URL can be accessed with
  82. get_url(), get_path(), get_server()
  83. """
  84. # In qp, we will encode html pages using utf-8.
  85. # Unless the client specifies otherwise, we will assume that requests use
  86. # the same charset.
  87. DEFAULT_CHARSET = 'utf-8'
  88. def __init__(self, stdin, environ):
  89. self.stdin = stdin
  90. self.environ = environ
  91. self.fields = None
  92. self.cookies = parse_cookies(environ.get('HTTP_COOKIE', ''))
  93. if environ.get('HTTPS', 'off').lower() in ('on', '1', 'yes'):
  94. self.scheme = "https"
  95. else:
  96. self.scheme = "http"
  97. self.body = None
  98. def get_content_length(self):
  99. length = self.environ.get('CONTENT_LENGTH') or "0"
  100. try:
  101. return int(length)
  102. except ValueError:
  103. raise ValueError('invalid content-length header')
  104. def read_body(self):
  105. if self.body is not None:
  106. return self.body
  107. length = self.get_content_length()
  108. body = self.stdin.read(length)
  109. self.body = body
  110. if len(body) != length:
  111. raise ValueError(
  112. "read_body() read %s/%s bytes" % (len(body), length))
  113. return body
  114. def get_content_type(self):
  115. content_type = self.environ.get("CONTENT_TYPE")
  116. if content_type:
  117. return parse_header(content_type)
  118. else:
  119. return None, None
  120. def get_fields(self):
  121. if self.fields is None:
  122. self.fields = dict()
  123. query = self.get_query()
  124. if query:
  125. self.fields.update(parse_query(query, self.DEFAULT_CHARSET))
  126. if self.get_content_length() > 0:
  127. ctype, ctype_params = self.get_content_type()
  128. if ctype == 'application/x-www-form-urlencoded':
  129. self._process_urlencoded(ctype_params)
  130. elif ctype == 'multipart/form-data':
  131. self._process_multipart(ctype_params)
  132. return self.fields
  133. def _process_urlencoded(self, params):
  134. body = self.read_body()
  135. charset = params.get('charset', self.DEFAULT_CHARSET)
  136. self.fields.update(parse_query(body, charset))
  137. def _process_multipart(self, params):
  138. boundary = params.get('boundary')
  139. if not boundary:
  140. raise ValueError('multipart/form-data missing boundary')
  141. charset = params.get('charset')
  142. length = self.get_content_length()
  143. mimeinput = MIMEInput(self.stdin, boundary, length)
  144. try:
  145. for line in mimeinput.readpart():
  146. pass # discard lines up to first boundary
  147. while mimeinput.moreparts():
  148. self._process_multipart_body(mimeinput, charset)
  149. except EOFError:
  150. raise ValueError('unexpected end of multipart/form-data')
  151. def _process_multipart_body(self, mimeinput, charset):
  152. headers = StringIO()
  153. lines = mimeinput.readpart()
  154. for line in lines:
  155. headers.write(line.decode('latin1'))
  156. if line == CRNL:
  157. break
  158. headers.seek(0)
  159. headers = message_from_file(headers)
  160. ctype, ctype_params = parse_header(headers.get('content-type', ''))
  161. if ctype and 'charset' in ctype_params:
  162. charset = ctype_params['charset']
  163. cdisp, cdisp_params = parse_header(
  164. headers.get('content-disposition', ''))
  165. if not cdisp:
  166. raise ValueError('expected Content-Disposition header')
  167. name = cdisp_params.get('name')
  168. filename = cdisp_params.get('filename')
  169. if not (cdisp == 'form-data' and name):
  170. raise ValueError('expected Content-Disposition: form-data'
  171. 'with a "name" parameter: got %r' %
  172. headers.get('content-disposition', ''))
  173. # FIXME: should really to handle Content-Transfer-Encoding and other
  174. # MIME complexity here. See RFC2048 for the full horror story.
  175. if filename:
  176. # it might be large file upload so use a temporary file
  177. upload = Upload(filename, ctype, charset)
  178. upload.receive(lines)
  179. _add_field_value(self.fields, name, upload)
  180. else:
  181. value = _decode_string(join_bytes(lines),
  182. charset or self.DEFAULT_CHARSET)
  183. _add_field_value(self.fields, name, value)
  184. def get_header(self, name, default=None):
  185. """(name : str, default : str = None) -> str
  186. Return the named HTTP header, or an optional default argument
  187. (or None) if the header is not found. Note that both original
  188. and CGI-ified header names are recognized, e.g. 'Content-Type',
  189. 'CONTENT_TYPE' and 'HTTP_CONTENT_TYPE' should all return the
  190. Content-Type header, if available.
  191. """
  192. environ = self.environ
  193. name = name.replace('-', '_').upper()
  194. val = environ.get(name)
  195. if val is not None:
  196. return val
  197. if name[:5] != 'HTTP_':
  198. name = 'HTTP_' + name
  199. return environ.get(name, default)
  200. def get_cookie(self, cookie_name, default=None):
  201. return self.cookies.get(cookie_name, default)
  202. def get_cookies(self):
  203. return self.cookies
  204. def get_field(self, name, default=None):
  205. return self.get_fields().get(name, default)
  206. def get_method(self):
  207. """Returns the HTTP method for this request
  208. """
  209. return self.environ.get('REQUEST_METHOD', 'GET')
  210. def get_scheme(self):
  211. return self.scheme
  212. def get_server(self):
  213. """() -> str
  214. Return the server name with an optional port number, eg.
  215. "www.example.com" or "foo.bar.com:8000".
  216. """
  217. http_host = self.environ.get("HTTP_HOST")
  218. if http_host:
  219. return http_host
  220. server_name = self.environ["SERVER_NAME"].strip()
  221. server_port = self.environ.get("SERVER_PORT")
  222. if (not server_port or
  223. (self.get_scheme() == "http" and server_port == "80") or
  224. (self.get_scheme() == "https" and server_port == "443")):
  225. return server_name
  226. else:
  227. return server_name + ':' + server_port
  228. def get_script_name(self):
  229. return self.environ.get('SCRIPT_NAME', '')
  230. def get_path_info(self):
  231. return self.environ.get('PATH_INFO', '')
  232. def get_path(self):
  233. """() -> str"""
  234. path = self.get_script_name() + self.get_path_info()
  235. if path[:1] != '/':
  236. path = '/' + path
  237. return path
  238. def get_query(self):
  239. """() -> string
  240. Return the query component of the URL.
  241. """
  242. return self.environ.get('QUERY_STRING', '')
  243. def get_path_query(self):
  244. query = self.get_query()
  245. path = url_quote(self.get_path())
  246. if query:
  247. return path + '?' + query
  248. else:
  249. return path
  250. def get_url(self):
  251. """() -> str
  252. Return the URL of the current request.
  253. """
  254. return "%s://%s%s" % (self.get_scheme(), self.get_server(),
  255. self.get_path_query())
  256. def get_environ(self, key, default=None):
  257. """(key : string) -> str
  258. Fetch a CGI environment variable from the request environment.
  259. See http://hoohoo.ncsa.uiuc.edu/cgi/env.html
  260. for the variables specified by the CGI standard.
  261. """
  262. return self.environ.get(key, default)
  263. def get_remote_address(self):
  264. return self.get_environ('REMOTE_ADDR')
  265. def get_encoding(self, encodings):
  266. """(encodings : [string]) -> str
  267. Parse the "Accept-encoding" header. 'encodings' is a list of
  268. encodings supported by the server sorted in order of preference.
  269. The return value is one of 'encodings' or None if the client
  270. does not accept any of the encodings.
  271. """
  272. accept_encoding = self.get_header("accept-encoding") or ''
  273. found_encodings = self._parse_pref_header(accept_encoding)
  274. if found_encodings:
  275. for encoding in encodings:
  276. if encoding in found_encodings:
  277. return encoding
  278. return None
  279. def accepts_gzip_encoding(self):
  280. return bool(self.get_encoding(['gzip']))
  281. def get_range(self):
  282. """() -> None | ( int|None , int|None )"""
  283. range_header = self.get_header("range")
  284. if range_header is not None:
  285. try:
  286. units, range = range_header.split("=")
  287. # We don't support multiple ranges - just one.
  288. if units != 'bytes':
  289. return None
  290. if "," in range:
  291. return None
  292. first_byte, last_byte = range.split("-")
  293. if first_byte == "":
  294. start = None
  295. else:
  296. start = int(first_byte)
  297. if last_byte == "":
  298. end = None
  299. if start is None:
  300. return None
  301. else:
  302. end = int(last_byte)
  303. if start is None or end is None or start <= end:
  304. return (start, end)
  305. except ValueError:
  306. # If anything went wrong with parsing the Range header, pretend the client
  307. # didn't submit it. (according to spec)
  308. pass
  309. return None
  310. def get_accepted_types(self):
  311. """() -> {string:float}
  312. Return a dictionary mapping MIME types the client will accept
  313. to the corresponding quality value (1.0 if no value was specified).
  314. """
  315. accept_types = self.environ.get('HTTP_ACCEPT', '')
  316. return self._parse_pref_header(accept_types)
  317. def _parse_pref_header(self, S):
  318. """(S:str) -> {str:float}
  319. Parse a list of HTTP preferences (content types, encodings) and
  320. return a dictionary mapping strings to the quality value.
  321. """
  322. found = {}
  323. # remove all linear whitespace
  324. S = _http_lws_re.sub('', S)
  325. for coding in _http_list_re.split(S):
  326. m = _http_encoding_re.match(coding)
  327. if m:
  328. encoding = m.group(1).lower()
  329. q = m.group(3) or 1.0
  330. try:
  331. q = float(q)
  332. except ValueError:
  333. continue
  334. if encoding == '*':
  335. continue # stupid, ignore it
  336. if q > 0:
  337. found[encoding] = q
  338. return found
  339. # See RFC 2109 for details. Note that this parser is more liberal.
  340. _COOKIE_RE = re.compile(r"""
  341. \s*
  342. (?P<name>[^=;,\s]+)
  343. \s*
  344. (
  345. =
  346. \s*
  347. (
  348. (?P<qvalue> "(\\[\x00-\x7f] | [^"])*")
  349. |
  350. (?P<value> [^";,\s]*)
  351. )
  352. )?
  353. \s*
  354. [;,]?
  355. """, re.VERBOSE)
  356. def parse_cookies(text):
  357. result = {}
  358. for m in _COOKIE_RE.finditer(text):
  359. name = m.group('name')
  360. if name[0] == '$':
  361. # discard, we don't handle per cookie attributes (e.g. $Path)
  362. continue
  363. qvalue = m.group('qvalue')
  364. if qvalue:
  365. value = re.sub(r'\\(.)', r'\1', qvalue)[1:-1]
  366. else:
  367. value = m.group('value') or ''
  368. result[name] = value
  369. return result
  370. SAFE_CHARS = ('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  371. '0123456789-@&+=_., ')
  372. _safe_trans = None
  373. def make_safe_filename(s):
  374. global _safe_trans
  375. if _safe_trans is None:
  376. _safe_trans = ['_'] * 256
  377. for c in SAFE_CHARS:
  378. _safe_trans[ord(c)] = c
  379. _safe_trans = ''.join(_safe_trans)
  380. return s.translate(_safe_trans)
  381. class Upload (object):
  382. """
  383. Represents a single uploaded file.
  384. fp
  385. an open file containing the content of the upload. The file pointer
  386. points to the beginning of the file
  387. orig_filename
  388. the complete filename supplied by the user-agent in the
  389. request that uploaded this file. Depending on the browser,
  390. this might have the complete path of the original file
  391. on the client system, in the client system's syntax.
  392. base_filename
  393. the base component of orig_filename, shorn of MS-DOS,
  394. Mac OS, and Unix path components and with "unsafe"
  395. characters neutralized (see make_safe_filename())
  396. content_type
  397. the content type provided by the user-agent in the request
  398. that uploaded this file.
  399. charset
  400. the charset provide by the user-agent
  401. """
  402. def __init__(self, orig_filename, content_type=None, charset=None):
  403. if orig_filename:
  404. self.orig_filename = orig_filename
  405. separator_position = max(
  406. orig_filename.rfind('\\'),
  407. orig_filename.rfind(':'),
  408. orig_filename.rfind('/'))
  409. basename = orig_filename[separator_position + 1:]
  410. self.base_filename = make_safe_filename(basename)
  411. else:
  412. self.orig_filename = None
  413. self.base_filename = None
  414. self.content_type = content_type
  415. self.charset = charset
  416. self.fp = None
  417. def receive(self, lines):
  418. if get_publisher():
  419. tmp_dir = get_publisher().get_site().get_directory_for_temporary_files()
  420. else:
  421. tmp_dir = None
  422. self.fp = tempfile.NamedTemporaryFile("w+b", dir=tmp_dir, prefix="up.")
  423. for line in lines:
  424. self.fp.write(line)
  425. self.fp.seek(0)
  426. def read(self, n):
  427. return self.fp.read(n)
  428. def readline(self):
  429. return self.fp.readline()
  430. def readlines(self):
  431. return self.fp.readlines()
  432. def seek(self, *args):
  433. return self.fp.seek(*args)
  434. def tell(self):
  435. return self.fp.tell()
  436. def __iter__(self):
  437. return iter(self.fp)
  438. def close(self):
  439. self.fp.close()
  440. def get_size(self):
  441. """Return the size of the file, in bytes.
  442. """
  443. if self.fp is None:
  444. return 0
  445. else:
  446. return os.fstat(self.fp.fileno()).st_size
  447. def get_full_path(self):
  448. return self.fp.name
  449. def get_base_filename(self):
  450. return self.base_filename
  451. def get_orig_filename(self):
  452. return self.orig_filename
  453. def get_content_type(self):
  454. return self.content_type
  455. def get_charset(self):
  456. return self.get_charset
  457. def get_fp(self):
  458. return self.fp
  459. def move(self, new_path):
  460. print('move("%s", "%s")' % (self.get_full_path(), new_path))
  461. move(self.get_full_path(), new_path)
  462. if getattr(self.fp, 'delete', None):
  463. self.fp.delete = False # Already gone.
  464. self.fp = open(new_path)
  465. class LineInput (object):
  466. """
  467. A wrapper for an input stream that has the following properties:
  468. * lines are terminated by \r\n
  469. * lines shorter than 'maxlength' are always returned unbroken
  470. * lines longer than 'maxlength' are broken but the pair of
  471. characters \r\n are never split
  472. * no more than 'length' characters are read from the underlying
  473. stream
  474. * if the underlying stream does not produce at least 'length'
  475. characters then EOFError is raised
  476. """
  477. def __init__(self, fp, length):
  478. self.fp = fp
  479. self.length = length
  480. self.buf = empty_byte_string
  481. def readline(self, maxlength=4096):
  482. # fill buffer
  483. n = min(self.length, maxlength - len(self.buf))
  484. chunks = [self.buf]
  485. if n > 0:
  486. self.length -= n
  487. assert self.length >= 0
  488. chunk = self.fp.read(n)
  489. if len(chunk) != n:
  490. raise EOFError('unexpected end of input')
  491. chunks.append(chunk)
  492. self.buf = join_bytes(chunks)
  493. # split into lines
  494. buf = self.buf
  495. i = buf.find(CRNL)
  496. if i >= 0:
  497. i += 2
  498. self.buf = buf[i:]
  499. return buf[:i]
  500. elif buf[-1:] == CRNL[:1]:
  501. # avoid splitting CR NL pairs
  502. self.buf = buf[-1:]
  503. return buf[:-1]
  504. else:
  505. self.buf = empty_byte_string
  506. return buf
  507. class MIMEInput (object):
  508. """
  509. Split a MIME input stream into parts. Note that this class does not
  510. handle headers, transfer encoding, etc.
  511. """
  512. def __init__(self, fp, boundary, length):
  513. self.lineinput = LineInput(fp, length)
  514. self.pat = re.compile(
  515. as_bytes(r'--%s(--)?' % re.escape(boundary)))
  516. self.done = False
  517. def moreparts(self):
  518. """Return true if there are more parts to be read."""
  519. return not self.done
  520. def readpart(self):
  521. """Generate all the lines up to a MIME boundary. Note that you
  522. must exhaust the generator before calling this function again."""
  523. assert not self.done
  524. last_line = empty_byte_string
  525. while 1:
  526. line = self.lineinput.readline()
  527. if not line:
  528. # Hit EOF -- nothing more to read. This should *not* happen
  529. # in a well-formed MIME message.
  530. raise EOFError('MIME boundary not found (end of input)')
  531. if last_line[-2:] == CRNL or last_line == empty_byte_string:
  532. m = self.pat.match(line)
  533. if m:
  534. # If we hit the boundary line, return now. Forget
  535. # the current line *and* the CRNL ending of the
  536. # previous line.
  537. if m.group(1):
  538. # hit final boundary
  539. self.done = True
  540. yield last_line[:-2]
  541. return
  542. if last_line:
  543. yield last_line
  544. last_line = line