/qp/http/request.py
Python | 637 lines | 615 code | 6 blank | 16 comment | 17 complexity | dd13982a73492dcd7a7ef06cfa427a4e MD5 | raw file
- """
- open/DurusWorks/qp/http/request.py
- """
- from durus.utils import as_bytes, join_bytes, empty_byte_string, byte_string
- from qp.fill.html import url_quote, url_unquote
- from qp.lib.util import StringIO, message_from_file
- from qp.pub.common import get_publisher
- from shutil import move
- import os
- import re
- import sys
- import tempfile
- CRNL = as_bytes("\r\n")
- # Various regexes for parsing specific bits of HTTP, all from RFC 2616.
- # These are used by _parse_pref_header().
- # LWS is linear whitespace; the latter two assume that LWS has been removed.
- _http_lws_re = re.compile(r"(\r\n)?[ \t]+")
- _http_list_re = re.compile(r",+")
- _http_encoding_re = re.compile(r"([^;]+)(;q=([\d.]+))?$")
- def _decode_string(s, charset):
- if hasattr(s, 'decode'):
- return s.decode(charset)
- else:
- return s # assume already decoded
- def parse_header(line):
- """Parse a Content-type like header.
- Return the main content-type and a dictionary of options.
- """
- plist = [x.strip() for x in line.split(';')]
- key = plist.pop(0).lower()
- pdict = {}
- for p in plist:
- i = p.find('=')
- if i >= 0:
- name = p[:i].strip().lower()
- value = p[i+1:].strip()
- if len(value) >= 2 and value[0] == value[-1] == '"':
- value = value[1:-1]
- pdict[name] = value
- return key, pdict
- def parse_query(qs, charset):
- """(qs: string) -> {key:string, string|[string]}
- Parse a query given as a string argument and return a dictionary.
- """
- if isinstance(qs, byte_string):
- qs = qs.decode(charset)
- fields = {}
- if '&' in qs:
- ampersand = '&'
- else:
- ampersand = '&'
- for chunk in qs.split(ampersand):
- if not chunk:
- continue
- chunk = chunk.replace('+', ' ')
- chunk = url_unquote(chunk)
- if '=' in chunk:
- name, value = chunk.split('=', 1)
- else:
- name = chunk
- value = ''
- _add_field_value(fields, name, value)
- return fields
- def _add_field_value(fields, name, value):
- if name in fields:
- values = fields[name]
- if not isinstance(values, list):
- fields[name] = values = [values]
- values.append(value)
- else:
- fields[name] = value
- class HTTPRequest (object):
- """
- Model a single HTTP request and all associated data: environment
- variables, form variables, cookies, etc.
- To access environment variables associated with the request, use
- get_environ(): eg. request.get_environ('SERVER_PORT', 80).
- To access form variables, use get_field(), eg.
- request.get_field("name").
- To access cookies, use get_cookie().
- Various bits and pieces of the requested URL can be accessed with
- get_url(), get_path(), get_server()
- """
- # In qp, we will encode html pages using utf-8.
- # Unless the client specifies otherwise, we will assume that requests use
- # the same charset.
- DEFAULT_CHARSET = 'utf-8'
- def __init__(self, stdin, environ):
- self.stdin = stdin
- self.environ = environ
- self.fields = None
- self.cookies = parse_cookies(environ.get('HTTP_COOKIE', ''))
- if environ.get('HTTPS', 'off').lower() in ('on', '1', 'yes'):
- self.scheme = "https"
- else:
- self.scheme = "http"
- self.body = None
- def get_content_length(self):
- length = self.environ.get('CONTENT_LENGTH') or "0"
- try:
- return int(length)
- except ValueError:
- raise ValueError('invalid content-length header')
- def read_body(self):
- if self.body is not None:
- return self.body
- length = self.get_content_length()
- body = self.stdin.read(length)
- self.body = body
- if len(body) != length:
- raise ValueError(
- "read_body() read %s/%s bytes" % (len(body), length))
- return body
- def get_content_type(self):
- content_type = self.environ.get("CONTENT_TYPE")
- if content_type:
- return parse_header(content_type)
- else:
- return None, None
- def get_fields(self):
- if self.fields is None:
- self.fields = dict()
- query = self.get_query()
- if query:
- self.fields.update(parse_query(query, self.DEFAULT_CHARSET))
- if self.get_content_length() > 0:
- ctype, ctype_params = self.get_content_type()
- if ctype == 'application/x-www-form-urlencoded':
- self._process_urlencoded(ctype_params)
- elif ctype == 'multipart/form-data':
- self._process_multipart(ctype_params)
- return self.fields
- def _process_urlencoded(self, params):
- body = self.read_body()
- charset = params.get('charset', self.DEFAULT_CHARSET)
- self.fields.update(parse_query(body, charset))
- def _process_multipart(self, params):
- boundary = params.get('boundary')
- if not boundary:
- raise ValueError('multipart/form-data missing boundary')
- charset = params.get('charset')
- length = self.get_content_length()
- mimeinput = MIMEInput(self.stdin, boundary, length)
- try:
- for line in mimeinput.readpart():
- pass # discard lines up to first boundary
- while mimeinput.moreparts():
- self._process_multipart_body(mimeinput, charset)
- except EOFError:
- raise ValueError('unexpected end of multipart/form-data')
- def _process_multipart_body(self, mimeinput, charset):
- headers = StringIO()
- lines = mimeinput.readpart()
- for line in lines:
- headers.write(line.decode('latin1'))
- if line == CRNL:
- break
- headers.seek(0)
- headers = message_from_file(headers)
- ctype, ctype_params = parse_header(headers.get('content-type', ''))
- if ctype and 'charset' in ctype_params:
- charset = ctype_params['charset']
- cdisp, cdisp_params = parse_header(
- headers.get('content-disposition', ''))
- if not cdisp:
- raise ValueError('expected Content-Disposition header')
- name = cdisp_params.get('name')
- filename = cdisp_params.get('filename')
- if not (cdisp == 'form-data' and name):
- raise ValueError('expected Content-Disposition: form-data'
- 'with a "name" parameter: got %r' %
- headers.get('content-disposition', ''))
- # FIXME: should really to handle Content-Transfer-Encoding and other
- # MIME complexity here. See RFC2048 for the full horror story.
- if filename:
- # it might be large file upload so use a temporary file
- upload = Upload(filename, ctype, charset)
- upload.receive(lines)
- _add_field_value(self.fields, name, upload)
- else:
- value = _decode_string(join_bytes(lines),
- charset or self.DEFAULT_CHARSET)
- _add_field_value(self.fields, name, value)
- def get_header(self, name, default=None):
- """(name : str, default : str = None) -> str
- Return the named HTTP header, or an optional default argument
- (or None) if the header is not found. Note that both original
- and CGI-ified header names are recognized, e.g. 'Content-Type',
- 'CONTENT_TYPE' and 'HTTP_CONTENT_TYPE' should all return the
- Content-Type header, if available.
- """
- environ = self.environ
- name = name.replace('-', '_').upper()
- val = environ.get(name)
- if val is not None:
- return val
- if name[:5] != 'HTTP_':
- name = 'HTTP_' + name
- return environ.get(name, default)
- def get_cookie(self, cookie_name, default=None):
- return self.cookies.get(cookie_name, default)
- def get_cookies(self):
- return self.cookies
- def get_field(self, name, default=None):
- return self.get_fields().get(name, default)
- def get_method(self):
- """Returns the HTTP method for this request
- """
- return self.environ.get('REQUEST_METHOD', 'GET')
- def get_scheme(self):
- return self.scheme
- def get_server(self):
- """() -> str
- Return the server name with an optional port number, eg.
- "www.example.com" or "foo.bar.com:8000".
- """
- http_host = self.environ.get("HTTP_HOST")
- if http_host:
- return http_host
- server_name = self.environ["SERVER_NAME"].strip()
- server_port = self.environ.get("SERVER_PORT")
- if (not server_port or
- (self.get_scheme() == "http" and server_port == "80") or
- (self.get_scheme() == "https" and server_port == "443")):
- return server_name
- else:
- return server_name + ':' + server_port
- def get_script_name(self):
- return self.environ.get('SCRIPT_NAME', '')
- def get_path_info(self):
- return self.environ.get('PATH_INFO', '')
- def get_path(self):
- """() -> str"""
- path = self.get_script_name() + self.get_path_info()
- if path[:1] != '/':
- path = '/' + path
- return path
- def get_query(self):
- """() -> string
- Return the query component of the URL.
- """
- return self.environ.get('QUERY_STRING', '')
- def get_path_query(self):
- query = self.get_query()
- path = url_quote(self.get_path())
- if query:
- return path + '?' + query
- else:
- return path
- def get_url(self):
- """() -> str
- Return the URL of the current request.
- """
- return "%s://%s%s" % (self.get_scheme(), self.get_server(),
- self.get_path_query())
- def get_environ(self, key, default=None):
- """(key : string) -> str
- Fetch a CGI environment variable from the request environment.
- See http://hoohoo.ncsa.uiuc.edu/cgi/env.html
- for the variables specified by the CGI standard.
- """
- return self.environ.get(key, default)
- def get_remote_address(self):
- return self.get_environ('REMOTE_ADDR')
- def get_encoding(self, encodings):
- """(encodings : [string]) -> str
- Parse the "Accept-encoding" header. 'encodings' is a list of
- encodings supported by the server sorted in order of preference.
- The return value is one of 'encodings' or None if the client
- does not accept any of the encodings.
- """
- accept_encoding = self.get_header("accept-encoding") or ''
- found_encodings = self._parse_pref_header(accept_encoding)
- if found_encodings:
- for encoding in encodings:
- if encoding in found_encodings:
- return encoding
- return None
- def accepts_gzip_encoding(self):
- return bool(self.get_encoding(['gzip']))
- def get_range(self):
- """() -> None | ( int|None , int|None )"""
- range_header = self.get_header("range")
- if range_header is not None:
- try:
- units, range = range_header.split("=")
- # We don't support multiple ranges - just one.
- if units != 'bytes':
- return None
- if "," in range:
- return None
- first_byte, last_byte = range.split("-")
- if first_byte == "":
- start = None
- else:
- start = int(first_byte)
- if last_byte == "":
- end = None
- if start is None:
- return None
- else:
- end = int(last_byte)
- if start is None or end is None or start <= end:
- return (start, end)
- except ValueError:
- # If anything went wrong with parsing the Range header, pretend the client
- # didn't submit it. (according to spec)
- pass
- return None
- def get_accepted_types(self):
- """() -> {string:float}
- Return a dictionary mapping MIME types the client will accept
- to the corresponding quality value (1.0 if no value was specified).
- """
- accept_types = self.environ.get('HTTP_ACCEPT', '')
- return self._parse_pref_header(accept_types)
- def _parse_pref_header(self, S):
- """(S:str) -> {str:float}
- Parse a list of HTTP preferences (content types, encodings) and
- return a dictionary mapping strings to the quality value.
- """
- found = {}
- # remove all linear whitespace
- S = _http_lws_re.sub('', S)
- for coding in _http_list_re.split(S):
- m = _http_encoding_re.match(coding)
- if m:
- encoding = m.group(1).lower()
- q = m.group(3) or 1.0
- try:
- q = float(q)
- except ValueError:
- continue
- if encoding == '*':
- continue # stupid, ignore it
- if q > 0:
- found[encoding] = q
- return found
- # See RFC 2109 for details. Note that this parser is more liberal.
- _COOKIE_RE = re.compile(r"""
- \s*
- (?P<name>[^=;,\s]+)
- \s*
- (
- =
- \s*
- (
- (?P<qvalue> "(\\[\x00-\x7f] | [^"])*")
- |
- (?P<value> [^";,\s]*)
- )
- )?
- \s*
- [;,]?
- """, re.VERBOSE)
- def parse_cookies(text):
- result = {}
- for m in _COOKIE_RE.finditer(text):
- name = m.group('name')
- if name[0] == '$':
- # discard, we don't handle per cookie attributes (e.g. $Path)
- continue
- qvalue = m.group('qvalue')
- if qvalue:
- value = re.sub(r'\\(.)', r'\1', qvalue)[1:-1]
- else:
- value = m.group('value') or ''
- result[name] = value
- return result
- SAFE_CHARS = ('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- '0123456789-@&+=_., ')
- _safe_trans = None
- def make_safe_filename(s):
- global _safe_trans
- if _safe_trans is None:
- _safe_trans = ['_'] * 256
- for c in SAFE_CHARS:
- _safe_trans[ord(c)] = c
- _safe_trans = ''.join(_safe_trans)
- return s.translate(_safe_trans)
- class Upload (object):
- """
- Represents a single uploaded file.
-
- fp
- an open file containing the content of the upload. The file pointer
- points to the beginning of the file
- orig_filename
- the complete filename supplied by the user-agent in the
- request that uploaded this file. Depending on the browser,
- this might have the complete path of the original file
- on the client system, in the client system's syntax.
- base_filename
- the base component of orig_filename, shorn of MS-DOS,
- Mac OS, and Unix path components and with "unsafe"
- characters neutralized (see make_safe_filename())
- content_type
- the content type provided by the user-agent in the request
- that uploaded this file.
- charset
- the charset provide by the user-agent
- """
- def __init__(self, orig_filename, content_type=None, charset=None):
- if orig_filename:
- self.orig_filename = orig_filename
- separator_position = max(
- orig_filename.rfind('\\'),
- orig_filename.rfind(':'),
- orig_filename.rfind('/'))
- basename = orig_filename[separator_position + 1:]
- self.base_filename = make_safe_filename(basename)
- else:
- self.orig_filename = None
- self.base_filename = None
- self.content_type = content_type
- self.charset = charset
- self.fp = None
- def receive(self, lines):
- if get_publisher():
- tmp_dir = get_publisher().get_site().get_directory_for_temporary_files()
- else:
- tmp_dir = None
- self.fp = tempfile.NamedTemporaryFile("w+b", dir=tmp_dir, prefix="up.")
- for line in lines:
- self.fp.write(line)
- self.fp.seek(0)
- def read(self, n):
- return self.fp.read(n)
- def readline(self):
- return self.fp.readline()
- def readlines(self):
- return self.fp.readlines()
- def seek(self, *args):
- return self.fp.seek(*args)
- def tell(self):
- return self.fp.tell()
- def __iter__(self):
- return iter(self.fp)
- def close(self):
- self.fp.close()
- def get_size(self):
- """Return the size of the file, in bytes.
- """
- if self.fp is None:
- return 0
- else:
- return os.fstat(self.fp.fileno()).st_size
- def get_full_path(self):
- return self.fp.name
- def get_base_filename(self):
- return self.base_filename
- def get_orig_filename(self):
- return self.orig_filename
- def get_content_type(self):
- return self.content_type
- def get_charset(self):
- return self.get_charset
- def get_fp(self):
- return self.fp
- def move(self, new_path):
- print('move("%s", "%s")' % (self.get_full_path(), new_path))
- move(self.get_full_path(), new_path)
- if getattr(self.fp, 'delete', None):
- self.fp.delete = False # Already gone.
- self.fp = open(new_path)
- class LineInput (object):
- """
- A wrapper for an input stream that has the following properties:
- * lines are terminated by \r\n
- * lines shorter than 'maxlength' are always returned unbroken
- * lines longer than 'maxlength' are broken but the pair of
- characters \r\n are never split
- * no more than 'length' characters are read from the underlying
- stream
- * if the underlying stream does not produce at least 'length'
- characters then EOFError is raised
- """
- def __init__(self, fp, length):
- self.fp = fp
- self.length = length
- self.buf = empty_byte_string
- def readline(self, maxlength=4096):
- # fill buffer
- n = min(self.length, maxlength - len(self.buf))
- chunks = [self.buf]
- if n > 0:
- self.length -= n
- assert self.length >= 0
- chunk = self.fp.read(n)
- if len(chunk) != n:
- raise EOFError('unexpected end of input')
- chunks.append(chunk)
- self.buf = join_bytes(chunks)
- # split into lines
- buf = self.buf
- i = buf.find(CRNL)
- if i >= 0:
- i += 2
- self.buf = buf[i:]
- return buf[:i]
- elif buf[-1:] == CRNL[:1]:
- # avoid splitting CR NL pairs
- self.buf = buf[-1:]
- return buf[:-1]
- else:
- self.buf = empty_byte_string
- return buf
- class MIMEInput (object):
- """
- Split a MIME input stream into parts. Note that this class does not
- handle headers, transfer encoding, etc.
- """
- def __init__(self, fp, boundary, length):
- self.lineinput = LineInput(fp, length)
- self.pat = re.compile(
- as_bytes(r'--%s(--)?' % re.escape(boundary)))
- self.done = False
- def moreparts(self):
- """Return true if there are more parts to be read."""
- return not self.done
- def readpart(self):
- """Generate all the lines up to a MIME boundary. Note that you
- must exhaust the generator before calling this function again."""
- assert not self.done
- last_line = empty_byte_string
- while 1:
- line = self.lineinput.readline()
- if not line:
- # Hit EOF -- nothing more to read. This should *not* happen
- # in a well-formed MIME message.
- raise EOFError('MIME boundary not found (end of input)')
- if last_line[-2:] == CRNL or last_line == empty_byte_string:
- m = self.pat.match(line)
- if m:
- # If we hit the boundary line, return now. Forget
- # the current line *and* the CRNL ending of the
- # previous line.
- if m.group(1):
- # hit final boundary
- self.done = True
- yield last_line[:-2]
- return
- if last_line:
- yield last_line
- last_line = line