PageRenderTime 564ms CodeModel.GetById 116ms app.highlight 245ms RepoModel.GetById 142ms app.codeStats 1ms

/Lib/BaseHTTPServer.py

http://unladen-swallow.googlecode.com/
Python | 592 lines | 532 code | 8 blank | 52 comment | 4 complexity | b6fb7290e2e175fb03fb21da01089de8 MD5 | raw file
  1"""HTTP server base class.
  2
  3Note: the class in this module doesn't implement any HTTP request; see
  4SimpleHTTPServer for simple implementations of GET, HEAD and POST
  5(including CGI scripts).  It does, however, optionally implement HTTP/1.1
  6persistent connections, as of version 0.3.
  7
  8Contents:
  9
 10- BaseHTTPRequestHandler: HTTP request handler base class
 11- test: test function
 12
 13XXX To do:
 14
 15- log requests even later (to capture byte count)
 16- log user-agent header and other interesting goodies
 17- send error log to separate file
 18"""
 19
 20
 21# See also:
 22#
 23# HTTP Working Group                                        T. Berners-Lee
 24# INTERNET-DRAFT                                            R. T. Fielding
 25# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
 26# Expires September 8, 1995                                  March 8, 1995
 27#
 28# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
 29#
 30# and
 31#
 32# Network Working Group                                      R. Fielding
 33# Request for Comments: 2616                                       et al
 34# Obsoletes: 2068                                              June 1999
 35# Category: Standards Track
 36#
 37# URL: http://www.faqs.org/rfcs/rfc2616.html
 38
 39# Log files
 40# ---------
 41#
 42# Here's a quote from the NCSA httpd docs about log file format.
 43#
 44# | The logfile format is as follows. Each line consists of:
 45# |
 46# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
 47# |
 48# |        host: Either the DNS name or the IP number of the remote client
 49# |        rfc931: Any information returned by identd for this person,
 50# |                - otherwise.
 51# |        authuser: If user sent a userid for authentication, the user name,
 52# |                  - otherwise.
 53# |        DD: Day
 54# |        Mon: Month (calendar name)
 55# |        YYYY: Year
 56# |        hh: hour (24-hour format, the machine's timezone)
 57# |        mm: minutes
 58# |        ss: seconds
 59# |        request: The first line of the HTTP request as sent by the client.
 60# |        ddd: the status code returned by the server, - if not available.
 61# |        bbbb: the total number of bytes sent,
 62# |              *not including the HTTP/1.0 header*, - if not available
 63# |
 64# | You can determine the name of the file accessed through request.
 65#
 66# (Actually, the latter is only true if you know the server configuration
 67# at the time the request was made!)
 68
 69__version__ = "0.3"
 70
 71__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
 72
 73import sys
 74import time
 75import socket # For gethostbyaddr()
 76from warnings import filterwarnings, catch_warnings
 77with catch_warnings():
 78    if sys.py3kwarning:
 79        filterwarnings("ignore", ".*mimetools has been removed",
 80                        DeprecationWarning)
 81    import mimetools
 82import SocketServer
 83
 84# Default error message template
 85DEFAULT_ERROR_MESSAGE = """\
 86<head>
 87<title>Error response</title>
 88</head>
 89<body>
 90<h1>Error response</h1>
 91<p>Error code %(code)d.
 92<p>Message: %(message)s.
 93<p>Error code explanation: %(code)s = %(explain)s.
 94</body>
 95"""
 96
 97DEFAULT_ERROR_CONTENT_TYPE = "text/html"
 98
 99def _quote_html(html):
100    return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
101
102class HTTPServer(SocketServer.TCPServer):
103
104    allow_reuse_address = 1    # Seems to make sense in testing environment
105
106    def server_bind(self):
107        """Override server_bind to store the server name."""
108        SocketServer.TCPServer.server_bind(self)
109        host, port = self.socket.getsockname()[:2]
110        self.server_name = socket.getfqdn(host)
111        self.server_port = port
112
113
114class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
115
116    """HTTP request handler base class.
117
118    The following explanation of HTTP serves to guide you through the
119    code as well as to expose any misunderstandings I may have about
120    HTTP (so you don't need to read the code to figure out I'm wrong
121    :-).
122
123    HTTP (HyperText Transfer Protocol) is an extensible protocol on
124    top of a reliable stream transport (e.g. TCP/IP).  The protocol
125    recognizes three parts to a request:
126
127    1. One line identifying the request type and path
128    2. An optional set of RFC-822-style headers
129    3. An optional data part
130
131    The headers and data are separated by a blank line.
132
133    The first line of the request has the form
134
135    <command> <path> <version>
136
137    where <command> is a (case-sensitive) keyword such as GET or POST,
138    <path> is a string containing path information for the request,
139    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
140    <path> is encoded using the URL encoding scheme (using %xx to signify
141    the ASCII character with hex code xx).
142
143    The specification specifies that lines are separated by CRLF but
144    for compatibility with the widest range of clients recommends
145    servers also handle LF.  Similarly, whitespace in the request line
146    is treated sensibly (allowing multiple spaces between components
147    and allowing trailing whitespace).
148
149    Similarly, for output, lines ought to be separated by CRLF pairs
150    but most clients grok LF characters just fine.
151
152    If the first line of the request has the form
153
154    <command> <path>
155
156    (i.e. <version> is left out) then this is assumed to be an HTTP
157    0.9 request; this form has no optional headers and data part and
158    the reply consists of just the data.
159
160    The reply form of the HTTP 1.x protocol again has three parts:
161
162    1. One line giving the response code
163    2. An optional set of RFC-822-style headers
164    3. The data
165
166    Again, the headers and data are separated by a blank line.
167
168    The response code line has the form
169
170    <version> <responsecode> <responsestring>
171
172    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
173    <responsecode> is a 3-digit response code indicating success or
174    failure of the request, and <responsestring> is an optional
175    human-readable string explaining what the response code means.
176
177    This server parses the request and the headers, and then calls a
178    function specific to the request type (<command>).  Specifically,
179    a request SPAM will be handled by a method do_SPAM().  If no
180    such method exists the server sends an error response to the
181    client.  If it exists, it is called with no arguments:
182
183    do_SPAM()
184
185    Note that the request name is case sensitive (i.e. SPAM and spam
186    are different requests).
187
188    The various request details are stored in instance variables:
189
190    - client_address is the client IP address in the form (host,
191    port);
192
193    - command, path and version are the broken-down request line;
194
195    - headers is an instance of mimetools.Message (or a derived
196    class) containing the header information;
197
198    - rfile is a file object open for reading positioned at the
199    start of the optional input data part;
200
201    - wfile is a file object open for writing.
202
203    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
204
205    The first thing to be written must be the response line.  Then
206    follow 0 or more header lines, then a blank line, and then the
207    actual data (if any).  The meaning of the header lines depends on
208    the command executed by the server; in most cases, when data is
209    returned, there should be at least one header line of the form
210
211    Content-type: <type>/<subtype>
212
213    where <type> and <subtype> should be registered MIME types,
214    e.g. "text/html" or "text/plain".
215
216    """
217
218    # The Python system version, truncated to its first component.
219    sys_version = "Python/" + sys.version.split()[0]
220
221    # The server software version.  You may want to override this.
222    # The format is multiple whitespace-separated strings,
223    # where each string is of the form name[/version].
224    server_version = "BaseHTTP/" + __version__
225
226    # The default request version.  This only affects responses up until
227    # the point where the request line is parsed, so it mainly decides what
228    # the client gets back when sending a malformed request line.
229    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
230    default_request_version = "HTTP/0.9"
231
232    def parse_request(self):
233        """Parse a request (internal).
234
235        The request should be stored in self.raw_requestline; the results
236        are in self.command, self.path, self.request_version and
237        self.headers.
238
239        Return True for success, False for failure; on failure, an
240        error is sent back.
241
242        """
243        self.command = None  # set in case of error on the first line
244        self.request_version = version = self.default_request_version
245        self.close_connection = 1
246        requestline = self.raw_requestline
247        if requestline[-2:] == '\r\n':
248            requestline = requestline[:-2]
249        elif requestline[-1:] == '\n':
250            requestline = requestline[:-1]
251        self.requestline = requestline
252        words = requestline.split()
253        if len(words) == 3:
254            [command, path, version] = words
255            if version[:5] != 'HTTP/':
256                self.send_error(400, "Bad request version (%r)" % version)
257                return False
258            try:
259                base_version_number = version.split('/', 1)[1]
260                version_number = base_version_number.split(".")
261                # RFC 2145 section 3.1 says there can be only one "." and
262                #   - major and minor numbers MUST be treated as
263                #      separate integers;
264                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
265                #      turn is lower than HTTP/12.3;
266                #   - Leading zeros MUST be ignored by recipients.
267                if len(version_number) != 2:
268                    raise ValueError
269                version_number = int(version_number[0]), int(version_number[1])
270            except (ValueError, IndexError):
271                self.send_error(400, "Bad request version (%r)" % version)
272                return False
273            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
274                self.close_connection = 0
275            if version_number >= (2, 0):
276                self.send_error(505,
277                          "Invalid HTTP Version (%s)" % base_version_number)
278                return False
279        elif len(words) == 2:
280            [command, path] = words
281            self.close_connection = 1
282            if command != 'GET':
283                self.send_error(400,
284                                "Bad HTTP/0.9 request type (%r)" % command)
285                return False
286        elif not words:
287            return False
288        else:
289            self.send_error(400, "Bad request syntax (%r)" % requestline)
290            return False
291        self.command, self.path, self.request_version = command, path, version
292
293        # Examine the headers and look for a Connection directive
294        self.headers = self.MessageClass(self.rfile, 0)
295
296        conntype = self.headers.get('Connection', "")
297        if conntype.lower() == 'close':
298            self.close_connection = 1
299        elif (conntype.lower() == 'keep-alive' and
300              self.protocol_version >= "HTTP/1.1"):
301            self.close_connection = 0
302        return True
303
304    def handle_one_request(self):
305        """Handle a single HTTP request.
306
307        You normally don't need to override this method; see the class
308        __doc__ string for information on how to handle specific HTTP
309        commands such as GET and POST.
310
311        """
312        self.raw_requestline = self.rfile.readline()
313        if not self.raw_requestline:
314            self.close_connection = 1
315            return
316        if not self.parse_request(): # An error code has been sent, just exit
317            return
318        mname = 'do_' + self.command
319        if not hasattr(self, mname):
320            self.send_error(501, "Unsupported method (%r)" % self.command)
321            return
322        method = getattr(self, mname)
323        method()
324
325    def handle(self):
326        """Handle multiple requests if necessary."""
327        self.close_connection = 1
328
329        self.handle_one_request()
330        while not self.close_connection:
331            self.handle_one_request()
332
333    def send_error(self, code, message=None):
334        """Send and log an error reply.
335
336        Arguments are the error code, and a detailed message.
337        The detailed message defaults to the short entry matching the
338        response code.
339
340        This sends an error response (so it must be called before any
341        output has been generated), logs the error, and finally sends
342        a piece of HTML explaining the error to the user.
343
344        """
345
346        try:
347            short, long = self.responses[code]
348        except KeyError:
349            short, long = '???', '???'
350        if message is None:
351            message = short
352        explain = long
353        self.log_error("code %d, message %s", code, message)
354        # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
355        content = (self.error_message_format %
356                   {'code': code, 'message': _quote_html(message), 'explain': explain})
357        self.send_response(code, message)
358        self.send_header("Content-Type", self.error_content_type)
359        self.send_header('Connection', 'close')
360        self.end_headers()
361        if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
362            self.wfile.write(content)
363
364    error_message_format = DEFAULT_ERROR_MESSAGE
365    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
366
367    def send_response(self, code, message=None):
368        """Send the response header and log the response code.
369
370        Also send two standard headers with the server software
371        version and the current date.
372
373        """
374        self.log_request(code)
375        if message is None:
376            if code in self.responses:
377                message = self.responses[code][0]
378            else:
379                message = ''
380        if self.request_version != 'HTTP/0.9':
381            self.wfile.write("%s %d %s\r\n" %
382                             (self.protocol_version, code, message))
383            # print (self.protocol_version, code, message)
384        self.send_header('Server', self.version_string())
385        self.send_header('Date', self.date_time_string())
386
387    def send_header(self, keyword, value):
388        """Send a MIME header."""
389        if self.request_version != 'HTTP/0.9':
390            self.wfile.write("%s: %s\r\n" % (keyword, value))
391
392        if keyword.lower() == 'connection':
393            if value.lower() == 'close':
394                self.close_connection = 1
395            elif value.lower() == 'keep-alive':
396                self.close_connection = 0
397
398    def end_headers(self):
399        """Send the blank line ending the MIME headers."""
400        if self.request_version != 'HTTP/0.9':
401            self.wfile.write("\r\n")
402
403    def log_request(self, code='-', size='-'):
404        """Log an accepted request.
405
406        This is called by send_response().
407
408        """
409
410        self.log_message('"%s" %s %s',
411                         self.requestline, str(code), str(size))
412
413    def log_error(self, format, *args):
414        """Log an error.
415
416        This is called when a request cannot be fulfilled.  By
417        default it passes the message on to log_message().
418
419        Arguments are the same as for log_message().
420
421        XXX This should go to the separate error log.
422
423        """
424
425        self.log_message(format, *args)
426
427    def log_message(self, format, *args):
428        """Log an arbitrary message.
429
430        This is used by all other logging functions.  Override
431        it if you have specific logging wishes.
432
433        The first argument, FORMAT, is a format string for the
434        message to be logged.  If the format string contains
435        any % escapes requiring parameters, they should be
436        specified as subsequent arguments (it's just like
437        printf!).
438
439        The client host and current date/time are prefixed to
440        every message.
441
442        """
443
444        sys.stderr.write("%s - - [%s] %s\n" %
445                         (self.address_string(),
446                          self.log_date_time_string(),
447                          format%args))
448
449    def version_string(self):
450        """Return the server software version string."""
451        return self.server_version + ' ' + self.sys_version
452
453    def date_time_string(self, timestamp=None):
454        """Return the current date and time formatted for a message header."""
455        if timestamp is None:
456            timestamp = time.time()
457        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
458        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
459                self.weekdayname[wd],
460                day, self.monthname[month], year,
461                hh, mm, ss)
462        return s
463
464    def log_date_time_string(self):
465        """Return the current time formatted for logging."""
466        now = time.time()
467        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
468        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
469                day, self.monthname[month], year, hh, mm, ss)
470        return s
471
472    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
473
474    monthname = [None,
475                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
476                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
477
478    def address_string(self):
479        """Return the client address formatted for logging.
480
481        This version looks up the full hostname using gethostbyaddr(),
482        and tries to find a name that contains at least one dot.
483
484        """
485
486        host, port = self.client_address[:2]
487        return socket.getfqdn(host)
488
489    # Essentially static class variables
490
491    # The version of the HTTP protocol we support.
492    # Set this to HTTP/1.1 to enable automatic keepalive
493    protocol_version = "HTTP/1.0"
494
495    # The Message-like class used to parse headers
496    MessageClass = mimetools.Message
497
498    # Table mapping response codes to messages; entries have the
499    # form {code: (shortmessage, longmessage)}.
500    # See RFC 2616.
501    responses = {
502        100: ('Continue', 'Request received, please continue'),
503        101: ('Switching Protocols',
504              'Switching to new protocol; obey Upgrade header'),
505
506        200: ('OK', 'Request fulfilled, document follows'),
507        201: ('Created', 'Document created, URL follows'),
508        202: ('Accepted',
509              'Request accepted, processing continues off-line'),
510        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
511        204: ('No Content', 'Request fulfilled, nothing follows'),
512        205: ('Reset Content', 'Clear input form for further input.'),
513        206: ('Partial Content', 'Partial content follows.'),
514
515        300: ('Multiple Choices',
516              'Object has several resources -- see URI list'),
517        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
518        302: ('Found', 'Object moved temporarily -- see URI list'),
519        303: ('See Other', 'Object moved -- see Method and URL list'),
520        304: ('Not Modified',
521              'Document has not changed since given time'),
522        305: ('Use Proxy',
523              'You must use proxy specified in Location to access this '
524              'resource.'),
525        307: ('Temporary Redirect',
526              'Object moved temporarily -- see URI list'),
527
528        400: ('Bad Request',
529              'Bad request syntax or unsupported method'),
530        401: ('Unauthorized',
531              'No permission -- see authorization schemes'),
532        402: ('Payment Required',
533              'No payment -- see charging schemes'),
534        403: ('Forbidden',
535              'Request forbidden -- authorization will not help'),
536        404: ('Not Found', 'Nothing matches the given URI'),
537        405: ('Method Not Allowed',
538              'Specified method is invalid for this server.'),
539        406: ('Not Acceptable', 'URI not available in preferred format.'),
540        407: ('Proxy Authentication Required', 'You must authenticate with '
541              'this proxy before proceeding.'),
542        408: ('Request Timeout', 'Request timed out; try again later.'),
543        409: ('Conflict', 'Request conflict.'),
544        410: ('Gone',
545              'URI no longer exists and has been permanently removed.'),
546        411: ('Length Required', 'Client must specify Content-Length.'),
547        412: ('Precondition Failed', 'Precondition in headers is false.'),
548        413: ('Request Entity Too Large', 'Entity is too large.'),
549        414: ('Request-URI Too Long', 'URI is too long.'),
550        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
551        416: ('Requested Range Not Satisfiable',
552              'Cannot satisfy request range.'),
553        417: ('Expectation Failed',
554              'Expect condition could not be satisfied.'),
555
556        500: ('Internal Server Error', 'Server got itself in trouble'),
557        501: ('Not Implemented',
558              'Server does not support this operation'),
559        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
560        503: ('Service Unavailable',
561              'The server cannot process the request due to a high load'),
562        504: ('Gateway Timeout',
563              'The gateway server did not receive a timely response'),
564        505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
565        }
566
567
568def test(HandlerClass = BaseHTTPRequestHandler,
569         ServerClass = HTTPServer, protocol="HTTP/1.0"):
570    """Test the HTTP request handler class.
571
572    This runs an HTTP server on port 8000 (or the first command line
573    argument).
574
575    """
576
577    if sys.argv[1:]:
578        port = int(sys.argv[1])
579    else:
580        port = 8000
581    server_address = ('', port)
582
583    HandlerClass.protocol_version = protocol
584    httpd = ServerClass(server_address, HandlerClass)
585
586    sa = httpd.socket.getsockname()
587    print "Serving HTTP on", sa[0], "port", sa[1], "..."
588    httpd.serve_forever()
589
590
591if __name__ == '__main__':
592    test()