PageRenderTime 269ms CodeModel.GetById 80ms app.highlight 107ms RepoModel.GetById 74ms app.codeStats 0ms

/src/atom/http_core.py

https://code.google.com/p/gdata-python-client/
Python | 601 lines | 528 code | 20 blank | 53 comment | 6 complexity | 178c2ec31ca84fc8f7120136c74fb094 MD5 | raw file
  1#!/usr/bin/env python
  2#
  3#    Copyright (C) 2009 Google Inc.
  4#
  5#   Licensed under the Apache License, Version 2.0 (the "License");
  6#   you may not use this file except in compliance with the License.
  7#   You may obtain a copy of the License at
  8#
  9#       http://www.apache.org/licenses/LICENSE-2.0
 10#
 11#   Unless required by applicable law or agreed to in writing, software
 12#   distributed under the License is distributed on an "AS IS" BASIS,
 13#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14#   See the License for the specific language governing permissions and
 15#   limitations under the License.
 16
 17
 18# This module is used for version 2 of the Google Data APIs.
 19# TODO: add proxy handling.
 20
 21
 22__author__ = 'j.s@google.com (Jeff Scudder)'
 23
 24
 25import os
 26import StringIO
 27import urlparse
 28import urllib
 29import httplib
 30ssl = None
 31try:
 32  import ssl
 33except ImportError:
 34  pass
 35
 36
 37
 38class Error(Exception):
 39  pass
 40
 41
 42class UnknownSize(Error):
 43  pass
 44
 45
 46class ProxyError(Error):
 47  pass
 48
 49
 50MIME_BOUNDARY = 'END_OF_PART'
 51
 52
 53def get_headers(http_response):
 54  """Retrieves all HTTP headers from an HTTP response from the server.
 55
 56  This method is provided for backwards compatibility for Python2.2 and 2.3.
 57  The httplib.HTTPResponse object in 2.2 and 2.3 does not have a getheaders
 58  method so this function will use getheaders if available, but if not it
 59  will retrieve a few using getheader.
 60  """
 61  if hasattr(http_response, 'getheaders'):
 62    return http_response.getheaders()
 63  else:
 64    headers = []
 65    for header in (
 66        'location', 'content-type', 'content-length', 'age', 'allow',
 67        'cache-control', 'content-location', 'content-encoding', 'date',
 68        'etag', 'expires', 'last-modified', 'pragma', 'server',
 69        'set-cookie', 'transfer-encoding', 'vary', 'via', 'warning',
 70        'www-authenticate', 'gdata-version'):
 71      value = http_response.getheader(header, None)
 72      if value is not None:
 73        headers.append((header, value))
 74    return headers
 75
 76
 77class HttpRequest(object):
 78  """Contains all of the parameters for an HTTP 1.1 request.
 79
 80  The HTTP headers are represented by a dictionary, and it is the
 81  responsibility of the user to ensure that duplicate field names are combined
 82  into one header value according to the rules in section 4.2 of RFC 2616.
 83  """
 84  method = None
 85  uri = None
 86
 87  def __init__(self, uri=None, method=None, headers=None):
 88    """Construct an HTTP request.
 89
 90    Args:
 91      uri: The full path or partial path as a Uri object or a string.
 92      method: The HTTP method for the request, examples include 'GET', 'POST',
 93              etc.
 94      headers: dict of strings The HTTP headers to include in the request.
 95    """
 96    self.headers = headers or {}
 97    self._body_parts = []
 98    if method is not None:
 99      self.method = method
100    if isinstance(uri, (str, unicode)):
101      uri = Uri.parse_uri(uri)
102    self.uri = uri or Uri()
103
104
105  def add_body_part(self, data, mime_type, size=None):
106    """Adds data to the HTTP request body.
107
108    If more than one part is added, this is assumed to be a mime-multipart
109    request. This method is designed to create MIME 1.0 requests as specified
110    in RFC 1341.
111
112    Args:
113      data: str or a file-like object containing a part of the request body.
114      mime_type: str The MIME type describing the data
115      size: int Required if the data is a file like object. If the data is a
116            string, the size is calculated so this parameter is ignored.
117    """
118    if isinstance(data, str):
119      size = len(data)
120    if size is None:
121      # TODO: support chunked transfer if some of the body is of unknown size.
122      raise UnknownSize('Each part of the body must have a known size.')
123    if 'Content-Length' in self.headers:
124      content_length = int(self.headers['Content-Length'])
125    else:
126      content_length = 0
127    # If this is the first part added to the body, then this is not a multipart
128    # request.
129    if len(self._body_parts) == 0:
130      self.headers['Content-Type'] = mime_type
131      content_length = size
132      self._body_parts.append(data)
133    elif len(self._body_parts) == 1:
134      # This is the first member in a mime-multipart request, so change the
135      # _body_parts list to indicate a multipart payload.
136      self._body_parts.insert(0, 'Media multipart posting')
137      boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,)
138      content_length += len(boundary_string) + size
139      self._body_parts.insert(1, boundary_string)
140      content_length += len('Media multipart posting')
141      # Put the content type of the first part of the body into the multipart
142      # payload.
143      original_type_string = 'Content-Type: %s\r\n\r\n' % (
144          self.headers['Content-Type'],)
145      self._body_parts.insert(2, original_type_string)
146      content_length += len(original_type_string)
147      boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,)
148      self._body_parts.append(boundary_string)
149      content_length += len(boundary_string)
150      # Change the headers to indicate this is now a mime multipart request.
151      self.headers['Content-Type'] = 'multipart/related; boundary="%s"' % (
152          MIME_BOUNDARY,)
153      self.headers['MIME-version'] = '1.0'
154      # Include the mime type of this part.
155      type_string = 'Content-Type: %s\r\n\r\n' % (mime_type)
156      self._body_parts.append(type_string)
157      content_length += len(type_string)
158      self._body_parts.append(data)
159      ending_boundary_string = '\r\n--%s--' % (MIME_BOUNDARY,)
160      self._body_parts.append(ending_boundary_string)
161      content_length += len(ending_boundary_string)
162    else:
163      # This is a mime multipart request.
164      boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,)
165      self._body_parts.insert(-1, boundary_string)
166      content_length += len(boundary_string) + size
167      # Include the mime type of this part.
168      type_string = 'Content-Type: %s\r\n\r\n' % (mime_type)
169      self._body_parts.insert(-1, type_string)
170      content_length += len(type_string)
171      self._body_parts.insert(-1, data)
172    self.headers['Content-Length'] = str(content_length)
173  # I could add an "append_to_body_part" method as well.
174
175  AddBodyPart = add_body_part
176
177  def add_form_inputs(self, form_data,
178                      mime_type='application/x-www-form-urlencoded'):
179    """Form-encodes and adds data to the request body.
180
181    Args:
182      form_data: dict or sequnce or two member tuples which contains the
183                 form keys and values.
184      mime_type: str The MIME type of the form data being sent. Defaults
185                 to 'application/x-www-form-urlencoded'.
186    """
187    body = urllib.urlencode(form_data)
188    self.add_body_part(body, mime_type)
189
190  AddFormInputs = add_form_inputs
191
192  def _copy(self):
193    """Creates a deep copy of this request."""
194    copied_uri = Uri(self.uri.scheme, self.uri.host, self.uri.port,
195                     self.uri.path, self.uri.query.copy())
196    new_request = HttpRequest(uri=copied_uri, method=self.method,
197                              headers=self.headers.copy())
198    new_request._body_parts = self._body_parts[:]
199    return new_request
200
201  def _dump(self):
202    """Converts to a printable string for debugging purposes.
203
204    In order to preserve the request, it does not read from file-like objects
205    in the body.
206    """
207    output =  'HTTP Request\n  method: %s\n  url: %s\n  headers:\n' % (
208        self.method, str(self.uri))
209    for header, value in self.headers.iteritems():
210      output += '    %s: %s\n' % (header, value)
211    output += '  body sections:\n'
212    i = 0
213    for part in self._body_parts:
214      if isinstance(part, (str, unicode)):
215        output += '    %s: %s\n' % (i, part)
216      else:
217        output += '    %s: <file like object>\n' % i
218      i += 1
219    return output
220
221
222def _apply_defaults(http_request):
223  if http_request.uri.scheme is None:
224    if http_request.uri.port == 443:
225      http_request.uri.scheme = 'https'
226    else:
227      http_request.uri.scheme = 'http'
228
229
230class Uri(object):
231  """A URI as used in HTTP 1.1"""
232  scheme = None
233  host = None
234  port = None
235  path = None
236
237  def __init__(self, scheme=None, host=None, port=None, path=None, query=None):
238    """Constructor for a URI.
239
240    Args:
241      scheme: str This is usually 'http' or 'https'.
242      host: str The host name or IP address of the desired server.
243      post: int The server's port number.
244      path: str The path of the resource following the host. This begins with
245            a /, example: '/calendar/feeds/default/allcalendars/full'
246      query: dict of strings The URL query parameters. The keys and values are
247             both escaped so this dict should contain the unescaped values.
248             For example {'my key': 'val', 'second': '!!!'} will become
249             '?my+key=val&second=%21%21%21' which is appended to the path.
250    """
251    self.query = query or {}
252    if scheme is not None:
253      self.scheme = scheme
254    if host is not None:
255      self.host = host
256    if port is not None:
257      self.port = port
258    if path:
259      self.path = path
260
261  def _get_query_string(self):
262    param_pairs = []
263    for key, value in self.query.iteritems():
264      quoted_key = urllib.quote_plus(str(key))
265      if value is None:
266        param_pairs.append(quoted_key)
267      else:
268        quoted_value = urllib.quote_plus(str(value))
269        param_pairs.append('%s=%s' % (quoted_key, quoted_value))
270    return '&'.join(param_pairs)
271
272  def _get_relative_path(self):
273    """Returns the path with the query parameters escaped and appended."""
274    param_string = self._get_query_string()
275    if self.path is None:
276      path = '/'
277    else:
278      path = self.path
279    if param_string:
280      return '?'.join([path, param_string])
281    else:
282      return path
283
284  def _to_string(self):
285    if self.scheme is None and self.port == 443:
286      scheme = 'https'
287    elif self.scheme is None:
288      scheme = 'http'
289    else:
290      scheme = self.scheme
291    if self.path is None:
292      path = '/'
293    else:
294      path = self.path
295    if self.port is None:
296      return '%s://%s%s' % (scheme, self.host, self._get_relative_path())
297    else:
298      return '%s://%s:%s%s' % (scheme, self.host, str(self.port),
299                               self._get_relative_path())
300
301  def __str__(self):
302    return self._to_string()
303
304  def modify_request(self, http_request=None):
305    """Sets HTTP request components based on the URI."""
306    if http_request is None:
307      http_request = HttpRequest()
308    if http_request.uri is None:
309      http_request.uri = Uri()
310    # Determine the correct scheme.
311    if self.scheme:
312      http_request.uri.scheme = self.scheme
313    if self.port:
314      http_request.uri.port = self.port
315    if self.host:
316      http_request.uri.host = self.host
317    # Set the relative uri path
318    if self.path:
319      http_request.uri.path = self.path
320    if self.query:
321      http_request.uri.query = self.query.copy()
322    return http_request
323
324  ModifyRequest = modify_request
325
326  def parse_uri(uri_string):
327    """Creates a Uri object which corresponds to the URI string.
328
329    This method can accept partial URIs, but it will leave missing
330    members of the Uri unset.
331    """
332    parts = urlparse.urlparse(uri_string)
333    uri = Uri()
334    if parts[0]:
335      uri.scheme = parts[0]
336    if parts[1]:
337      host_parts = parts[1].split(':')
338      if host_parts[0]:
339        uri.host = host_parts[0]
340      if len(host_parts) > 1:
341        uri.port = int(host_parts[1])
342    if parts[2]:
343      uri.path = parts[2]
344    if parts[4]:
345      param_pairs = parts[4].split('&')
346      for pair in param_pairs:
347        pair_parts = pair.split('=')
348        if len(pair_parts) > 1:
349          uri.query[urllib.unquote_plus(pair_parts[0])] = (
350              urllib.unquote_plus(pair_parts[1]))
351        elif len(pair_parts) == 1:
352          uri.query[urllib.unquote_plus(pair_parts[0])] = None
353    return uri
354
355  parse_uri = staticmethod(parse_uri)
356
357  ParseUri = parse_uri
358
359
360parse_uri = Uri.parse_uri
361
362
363ParseUri = Uri.parse_uri
364
365
366class HttpResponse(object):
367  status = None
368  reason = None
369  _body = None
370
371  def __init__(self, status=None, reason=None, headers=None, body=None):
372    self._headers = headers or {}
373    if status is not None:
374      self.status = status
375    if reason is not None:
376      self.reason = reason
377    if body is not None:
378      if hasattr(body, 'read'):
379        self._body = body
380      else:
381        self._body = StringIO.StringIO(body)
382
383  def getheader(self, name, default=None):
384    if name in self._headers:
385      return self._headers[name]
386    else:
387      return default
388
389  def getheaders(self):
390    return self._headers
391
392  def read(self, amt=None):
393    if self._body is None:
394      return None
395    if not amt:
396      return self._body.read()
397    else:
398      return self._body.read(amt)
399
400
401def _dump_response(http_response):
402  """Converts to a string for printing debug messages.
403
404  Does not read the body since that may consume the content.
405  """
406  output = 'HttpResponse\n  status: %s\n  reason: %s\n  headers:' % (
407      http_response.status, http_response.reason)
408  headers = get_headers(http_response)
409  if isinstance(headers, dict):
410    for header, value in headers.iteritems():
411      output += '    %s: %s\n' % (header, value)
412  else:
413    for pair in headers:
414      output += '    %s: %s\n' % (pair[0], pair[1])
415  return output
416
417
418class HttpClient(object):
419  """Performs HTTP requests using httplib."""
420  debug = None
421
422  def request(self, http_request):
423    return self._http_request(http_request.method, http_request.uri,
424                              http_request.headers, http_request._body_parts)
425
426  Request = request
427
428  def _get_connection(self, uri, headers=None):
429    """Opens a socket connection to the server to set up an HTTP request.
430
431    Args:
432      uri: The full URL for the request as a Uri object.
433      headers: A dict of string pairs containing the HTTP headers for the
434          request.
435    """
436    connection = None
437    if uri.scheme == 'https':
438      if not uri.port:
439        connection = httplib.HTTPSConnection(uri.host)
440      else:
441        connection = httplib.HTTPSConnection(uri.host, int(uri.port))
442    else:
443      if not uri.port:
444        connection = httplib.HTTPConnection(uri.host)
445      else:
446        connection = httplib.HTTPConnection(uri.host, int(uri.port))
447    return connection
448
449  def _http_request(self, method, uri, headers=None, body_parts=None):
450    """Makes an HTTP request using httplib.
451
452    Args:
453      method: str example: 'GET', 'POST', 'PUT', 'DELETE', etc.
454      uri: str or atom.http_core.Uri
455      headers: dict of strings mapping to strings which will be sent as HTTP
456               headers in the request.
457      body_parts: list of strings, objects with a read method, or objects
458                  which can be converted to strings using str. Each of these
459                  will be sent in order as the body of the HTTP request.
460    """
461    if isinstance(uri, (str, unicode)):
462      uri = Uri.parse_uri(uri)
463
464    connection = self._get_connection(uri, headers=headers)
465
466    if self.debug:
467      connection.debuglevel = 1
468
469    if connection.host != uri.host:
470      connection.putrequest(method, str(uri))
471    else:
472      connection.putrequest(method, uri._get_relative_path())
473
474    # Overcome a bug in Python 2.4 and 2.5
475    # httplib.HTTPConnection.putrequest adding
476    # HTTP request header 'Host: www.google.com:443' instead of
477    # 'Host: www.google.com', and thus resulting the error message
478    # 'Token invalid - AuthSub token has wrong scope' in the HTTP response.
479    if (uri.scheme == 'https' and int(uri.port or 443) == 443 and
480        hasattr(connection, '_buffer') and
481        isinstance(connection._buffer, list)):
482      header_line = 'Host: %s:443' % uri.host
483      replacement_header_line = 'Host: %s' % uri.host
484      try:
485        connection._buffer[connection._buffer.index(header_line)] = (
486            replacement_header_line)
487      except ValueError:  # header_line missing from connection._buffer
488        pass
489
490    # Send the HTTP headers.
491    for header_name, value in headers.iteritems():
492      connection.putheader(header_name, value)
493    connection.endheaders()
494
495    # If there is data, send it in the request.
496    if body_parts and filter(lambda x: x != '', body_parts):
497      for part in body_parts:
498        _send_data_part(part, connection)
499
500    # Return the HTTP Response from the server.
501    return connection.getresponse()
502
503
504def _send_data_part(data, connection):
505  if isinstance(data, (str, unicode)):
506    # I might want to just allow str, not unicode.
507    connection.send(data)
508    return
509  # Check to see if data is a file-like object that has a read method.
510  elif hasattr(data, 'read'):
511    # Read the file and send it a chunk at a time.
512    while 1:
513      binarydata = data.read(100000)
514      if binarydata == '': break
515      connection.send(binarydata)
516    return
517  else:
518    # The data object was not a file.
519    # Try to convert to a string and send the data.
520    connection.send(str(data))
521    return
522
523
524class ProxiedHttpClient(HttpClient):
525
526  def _get_connection(self, uri, headers=None):
527    # Check to see if there are proxy settings required for this request.
528    proxy = None
529    if uri.scheme == 'https':
530      proxy = os.environ.get('https_proxy')
531    elif uri.scheme == 'http':
532      proxy = os.environ.get('http_proxy')
533    if not proxy:
534      return HttpClient._get_connection(self, uri, headers=headers)
535    # Now we have the URL of the appropriate proxy server.
536    # Get a username and password for the proxy if required.
537    proxy_auth = _get_proxy_auth()
538    if uri.scheme == 'https':
539      import socket
540      if proxy_auth:
541        proxy_auth = 'Proxy-authorization: %s' % proxy_auth
542      # Construct the proxy connect command.
543      port = uri.port
544      if not port:
545        port = 443
546      proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % (uri.host, port)
547      # Set the user agent to send to the proxy
548      user_agent = ''
549      if headers and 'User-Agent' in headers:
550        user_agent = 'User-Agent: %s\r\n' % (headers['User-Agent'])
551      proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth, user_agent)
552      # Find the proxy host and port.
553      proxy_uri = Uri.parse_uri(proxy)
554      if not proxy_uri.port:
555        proxy_uri.port = '80'
556      # Connect to the proxy server, very simple recv and error checking
557      p_sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
558      p_sock.connect((proxy_uri.host, int(proxy_uri.port)))
559      p_sock.sendall(proxy_pieces)
560      response = ''
561      # Wait for the full response.
562      while response.find("\r\n\r\n") == -1:
563        response += p_sock.recv(8192)
564      p_status = response.split()[1]
565      if p_status != str(200):
566        raise ProxyError('Error status=%s' % str(p_status))
567      # Trivial setup for ssl socket.
568      sslobj = None
569      if ssl is not None:
570        sslobj = ssl.wrap_socket(p_sock, None, None)
571      else:
572        sock_ssl = socket.ssl(p_sock, None, Nonesock_)
573        sslobj = httplib.FakeSocket(p_sock, sock_ssl)
574      # Initalize httplib and replace with the proxy socket.
575      connection = httplib.HTTPConnection(proxy_uri.host)
576      connection.sock = sslobj
577      return connection
578    elif uri.scheme == 'http':
579      proxy_uri = Uri.parse_uri(proxy)
580      if not proxy_uri.port:
581        proxy_uri.port = '80'
582      if proxy_auth:
583        headers['Proxy-Authorization'] = proxy_auth.strip()
584      return httplib.HTTPConnection(proxy_uri.host, int(proxy_uri.port))
585    return None
586
587
588def _get_proxy_auth():
589  import base64
590  proxy_username = os.environ.get('proxy-username')
591  if not proxy_username:
592    proxy_username = os.environ.get('proxy_username')
593  proxy_password = os.environ.get('proxy-password')
594  if not proxy_password:
595    proxy_password = os.environ.get('proxy_password')
596  if proxy_username:
597    user_auth = base64.b64encode('%s:%s' % (proxy_username,
598                                            proxy_password))
599    return 'Basic %s\r\n' % (user_auth.strip())
600  else:
601    return ''