/src/atom/http_core.py
Python | 601 lines | 528 code | 20 blank | 53 comment | 6 complexity | 178c2ec31ca84fc8f7120136c74fb094 MD5 | raw file
1#!/usr/bin/env python 2# 3# Copyright (C) 2009 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17 18# This module is used for version 2 of the Google Data APIs. 19# TODO: add proxy handling. 20 21 22__author__ = 'j.s@google.com (Jeff Scudder)' 23 24 25import os 26import StringIO 27import urlparse 28import urllib 29import httplib 30ssl = None 31try: 32 import ssl 33except ImportError: 34 pass 35 36 37 38class Error(Exception): 39 pass 40 41 42class UnknownSize(Error): 43 pass 44 45 46class ProxyError(Error): 47 pass 48 49 50MIME_BOUNDARY = 'END_OF_PART' 51 52 53def get_headers(http_response): 54 """Retrieves all HTTP headers from an HTTP response from the server. 55 56 This method is provided for backwards compatibility for Python2.2 and 2.3. 57 The httplib.HTTPResponse object in 2.2 and 2.3 does not have a getheaders 58 method so this function will use getheaders if available, but if not it 59 will retrieve a few using getheader. 60 """ 61 if hasattr(http_response, 'getheaders'): 62 return http_response.getheaders() 63 else: 64 headers = [] 65 for header in ( 66 'location', 'content-type', 'content-length', 'age', 'allow', 67 'cache-control', 'content-location', 'content-encoding', 'date', 68 'etag', 'expires', 'last-modified', 'pragma', 'server', 69 'set-cookie', 'transfer-encoding', 'vary', 'via', 'warning', 70 'www-authenticate', 'gdata-version'): 71 value = http_response.getheader(header, None) 72 if value is not None: 73 headers.append((header, value)) 74 return headers 75 76 77class HttpRequest(object): 78 """Contains all of the parameters for an HTTP 1.1 request. 79 80 The HTTP headers are represented by a dictionary, and it is the 81 responsibility of the user to ensure that duplicate field names are combined 82 into one header value according to the rules in section 4.2 of RFC 2616. 83 """ 84 method = None 85 uri = None 86 87 def __init__(self, uri=None, method=None, headers=None): 88 """Construct an HTTP request. 89 90 Args: 91 uri: The full path or partial path as a Uri object or a string. 92 method: The HTTP method for the request, examples include 'GET', 'POST', 93 etc. 94 headers: dict of strings The HTTP headers to include in the request. 95 """ 96 self.headers = headers or {} 97 self._body_parts = [] 98 if method is not None: 99 self.method = method 100 if isinstance(uri, (str, unicode)): 101 uri = Uri.parse_uri(uri) 102 self.uri = uri or Uri() 103 104 105 def add_body_part(self, data, mime_type, size=None): 106 """Adds data to the HTTP request body. 107 108 If more than one part is added, this is assumed to be a mime-multipart 109 request. This method is designed to create MIME 1.0 requests as specified 110 in RFC 1341. 111 112 Args: 113 data: str or a file-like object containing a part of the request body. 114 mime_type: str The MIME type describing the data 115 size: int Required if the data is a file like object. If the data is a 116 string, the size is calculated so this parameter is ignored. 117 """ 118 if isinstance(data, str): 119 size = len(data) 120 if size is None: 121 # TODO: support chunked transfer if some of the body is of unknown size. 122 raise UnknownSize('Each part of the body must have a known size.') 123 if 'Content-Length' in self.headers: 124 content_length = int(self.headers['Content-Length']) 125 else: 126 content_length = 0 127 # If this is the first part added to the body, then this is not a multipart 128 # request. 129 if len(self._body_parts) == 0: 130 self.headers['Content-Type'] = mime_type 131 content_length = size 132 self._body_parts.append(data) 133 elif len(self._body_parts) == 1: 134 # This is the first member in a mime-multipart request, so change the 135 # _body_parts list to indicate a multipart payload. 136 self._body_parts.insert(0, 'Media multipart posting') 137 boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,) 138 content_length += len(boundary_string) + size 139 self._body_parts.insert(1, boundary_string) 140 content_length += len('Media multipart posting') 141 # Put the content type of the first part of the body into the multipart 142 # payload. 143 original_type_string = 'Content-Type: %s\r\n\r\n' % ( 144 self.headers['Content-Type'],) 145 self._body_parts.insert(2, original_type_string) 146 content_length += len(original_type_string) 147 boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,) 148 self._body_parts.append(boundary_string) 149 content_length += len(boundary_string) 150 # Change the headers to indicate this is now a mime multipart request. 151 self.headers['Content-Type'] = 'multipart/related; boundary="%s"' % ( 152 MIME_BOUNDARY,) 153 self.headers['MIME-version'] = '1.0' 154 # Include the mime type of this part. 155 type_string = 'Content-Type: %s\r\n\r\n' % (mime_type) 156 self._body_parts.append(type_string) 157 content_length += len(type_string) 158 self._body_parts.append(data) 159 ending_boundary_string = '\r\n--%s--' % (MIME_BOUNDARY,) 160 self._body_parts.append(ending_boundary_string) 161 content_length += len(ending_boundary_string) 162 else: 163 # This is a mime multipart request. 164 boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,) 165 self._body_parts.insert(-1, boundary_string) 166 content_length += len(boundary_string) + size 167 # Include the mime type of this part. 168 type_string = 'Content-Type: %s\r\n\r\n' % (mime_type) 169 self._body_parts.insert(-1, type_string) 170 content_length += len(type_string) 171 self._body_parts.insert(-1, data) 172 self.headers['Content-Length'] = str(content_length) 173 # I could add an "append_to_body_part" method as well. 174 175 AddBodyPart = add_body_part 176 177 def add_form_inputs(self, form_data, 178 mime_type='application/x-www-form-urlencoded'): 179 """Form-encodes and adds data to the request body. 180 181 Args: 182 form_data: dict or sequnce or two member tuples which contains the 183 form keys and values. 184 mime_type: str The MIME type of the form data being sent. Defaults 185 to 'application/x-www-form-urlencoded'. 186 """ 187 body = urllib.urlencode(form_data) 188 self.add_body_part(body, mime_type) 189 190 AddFormInputs = add_form_inputs 191 192 def _copy(self): 193 """Creates a deep copy of this request.""" 194 copied_uri = Uri(self.uri.scheme, self.uri.host, self.uri.port, 195 self.uri.path, self.uri.query.copy()) 196 new_request = HttpRequest(uri=copied_uri, method=self.method, 197 headers=self.headers.copy()) 198 new_request._body_parts = self._body_parts[:] 199 return new_request 200 201 def _dump(self): 202 """Converts to a printable string for debugging purposes. 203 204 In order to preserve the request, it does not read from file-like objects 205 in the body. 206 """ 207 output = 'HTTP Request\n method: %s\n url: %s\n headers:\n' % ( 208 self.method, str(self.uri)) 209 for header, value in self.headers.iteritems(): 210 output += ' %s: %s\n' % (header, value) 211 output += ' body sections:\n' 212 i = 0 213 for part in self._body_parts: 214 if isinstance(part, (str, unicode)): 215 output += ' %s: %s\n' % (i, part) 216 else: 217 output += ' %s: <file like object>\n' % i 218 i += 1 219 return output 220 221 222def _apply_defaults(http_request): 223 if http_request.uri.scheme is None: 224 if http_request.uri.port == 443: 225 http_request.uri.scheme = 'https' 226 else: 227 http_request.uri.scheme = 'http' 228 229 230class Uri(object): 231 """A URI as used in HTTP 1.1""" 232 scheme = None 233 host = None 234 port = None 235 path = None 236 237 def __init__(self, scheme=None, host=None, port=None, path=None, query=None): 238 """Constructor for a URI. 239 240 Args: 241 scheme: str This is usually 'http' or 'https'. 242 host: str The host name or IP address of the desired server. 243 post: int The server's port number. 244 path: str The path of the resource following the host. This begins with 245 a /, example: '/calendar/feeds/default/allcalendars/full' 246 query: dict of strings The URL query parameters. The keys and values are 247 both escaped so this dict should contain the unescaped values. 248 For example {'my key': 'val', 'second': '!!!'} will become 249 '?my+key=val&second=%21%21%21' which is appended to the path. 250 """ 251 self.query = query or {} 252 if scheme is not None: 253 self.scheme = scheme 254 if host is not None: 255 self.host = host 256 if port is not None: 257 self.port = port 258 if path: 259 self.path = path 260 261 def _get_query_string(self): 262 param_pairs = [] 263 for key, value in self.query.iteritems(): 264 quoted_key = urllib.quote_plus(str(key)) 265 if value is None: 266 param_pairs.append(quoted_key) 267 else: 268 quoted_value = urllib.quote_plus(str(value)) 269 param_pairs.append('%s=%s' % (quoted_key, quoted_value)) 270 return '&'.join(param_pairs) 271 272 def _get_relative_path(self): 273 """Returns the path with the query parameters escaped and appended.""" 274 param_string = self._get_query_string() 275 if self.path is None: 276 path = '/' 277 else: 278 path = self.path 279 if param_string: 280 return '?'.join([path, param_string]) 281 else: 282 return path 283 284 def _to_string(self): 285 if self.scheme is None and self.port == 443: 286 scheme = 'https' 287 elif self.scheme is None: 288 scheme = 'http' 289 else: 290 scheme = self.scheme 291 if self.path is None: 292 path = '/' 293 else: 294 path = self.path 295 if self.port is None: 296 return '%s://%s%s' % (scheme, self.host, self._get_relative_path()) 297 else: 298 return '%s://%s:%s%s' % (scheme, self.host, str(self.port), 299 self._get_relative_path()) 300 301 def __str__(self): 302 return self._to_string() 303 304 def modify_request(self, http_request=None): 305 """Sets HTTP request components based on the URI.""" 306 if http_request is None: 307 http_request = HttpRequest() 308 if http_request.uri is None: 309 http_request.uri = Uri() 310 # Determine the correct scheme. 311 if self.scheme: 312 http_request.uri.scheme = self.scheme 313 if self.port: 314 http_request.uri.port = self.port 315 if self.host: 316 http_request.uri.host = self.host 317 # Set the relative uri path 318 if self.path: 319 http_request.uri.path = self.path 320 if self.query: 321 http_request.uri.query = self.query.copy() 322 return http_request 323 324 ModifyRequest = modify_request 325 326 def parse_uri(uri_string): 327 """Creates a Uri object which corresponds to the URI string. 328 329 This method can accept partial URIs, but it will leave missing 330 members of the Uri unset. 331 """ 332 parts = urlparse.urlparse(uri_string) 333 uri = Uri() 334 if parts[0]: 335 uri.scheme = parts[0] 336 if parts[1]: 337 host_parts = parts[1].split(':') 338 if host_parts[0]: 339 uri.host = host_parts[0] 340 if len(host_parts) > 1: 341 uri.port = int(host_parts[1]) 342 if parts[2]: 343 uri.path = parts[2] 344 if parts[4]: 345 param_pairs = parts[4].split('&') 346 for pair in param_pairs: 347 pair_parts = pair.split('=') 348 if len(pair_parts) > 1: 349 uri.query[urllib.unquote_plus(pair_parts[0])] = ( 350 urllib.unquote_plus(pair_parts[1])) 351 elif len(pair_parts) == 1: 352 uri.query[urllib.unquote_plus(pair_parts[0])] = None 353 return uri 354 355 parse_uri = staticmethod(parse_uri) 356 357 ParseUri = parse_uri 358 359 360parse_uri = Uri.parse_uri 361 362 363ParseUri = Uri.parse_uri 364 365 366class HttpResponse(object): 367 status = None 368 reason = None 369 _body = None 370 371 def __init__(self, status=None, reason=None, headers=None, body=None): 372 self._headers = headers or {} 373 if status is not None: 374 self.status = status 375 if reason is not None: 376 self.reason = reason 377 if body is not None: 378 if hasattr(body, 'read'): 379 self._body = body 380 else: 381 self._body = StringIO.StringIO(body) 382 383 def getheader(self, name, default=None): 384 if name in self._headers: 385 return self._headers[name] 386 else: 387 return default 388 389 def getheaders(self): 390 return self._headers 391 392 def read(self, amt=None): 393 if self._body is None: 394 return None 395 if not amt: 396 return self._body.read() 397 else: 398 return self._body.read(amt) 399 400 401def _dump_response(http_response): 402 """Converts to a string for printing debug messages. 403 404 Does not read the body since that may consume the content. 405 """ 406 output = 'HttpResponse\n status: %s\n reason: %s\n headers:' % ( 407 http_response.status, http_response.reason) 408 headers = get_headers(http_response) 409 if isinstance(headers, dict): 410 for header, value in headers.iteritems(): 411 output += ' %s: %s\n' % (header, value) 412 else: 413 for pair in headers: 414 output += ' %s: %s\n' % (pair[0], pair[1]) 415 return output 416 417 418class HttpClient(object): 419 """Performs HTTP requests using httplib.""" 420 debug = None 421 422 def request(self, http_request): 423 return self._http_request(http_request.method, http_request.uri, 424 http_request.headers, http_request._body_parts) 425 426 Request = request 427 428 def _get_connection(self, uri, headers=None): 429 """Opens a socket connection to the server to set up an HTTP request. 430 431 Args: 432 uri: The full URL for the request as a Uri object. 433 headers: A dict of string pairs containing the HTTP headers for the 434 request. 435 """ 436 connection = None 437 if uri.scheme == 'https': 438 if not uri.port: 439 connection = httplib.HTTPSConnection(uri.host) 440 else: 441 connection = httplib.HTTPSConnection(uri.host, int(uri.port)) 442 else: 443 if not uri.port: 444 connection = httplib.HTTPConnection(uri.host) 445 else: 446 connection = httplib.HTTPConnection(uri.host, int(uri.port)) 447 return connection 448 449 def _http_request(self, method, uri, headers=None, body_parts=None): 450 """Makes an HTTP request using httplib. 451 452 Args: 453 method: str example: 'GET', 'POST', 'PUT', 'DELETE', etc. 454 uri: str or atom.http_core.Uri 455 headers: dict of strings mapping to strings which will be sent as HTTP 456 headers in the request. 457 body_parts: list of strings, objects with a read method, or objects 458 which can be converted to strings using str. Each of these 459 will be sent in order as the body of the HTTP request. 460 """ 461 if isinstance(uri, (str, unicode)): 462 uri = Uri.parse_uri(uri) 463 464 connection = self._get_connection(uri, headers=headers) 465 466 if self.debug: 467 connection.debuglevel = 1 468 469 if connection.host != uri.host: 470 connection.putrequest(method, str(uri)) 471 else: 472 connection.putrequest(method, uri._get_relative_path()) 473 474 # Overcome a bug in Python 2.4 and 2.5 475 # httplib.HTTPConnection.putrequest adding 476 # HTTP request header 'Host: www.google.com:443' instead of 477 # 'Host: www.google.com', and thus resulting the error message 478 # 'Token invalid - AuthSub token has wrong scope' in the HTTP response. 479 if (uri.scheme == 'https' and int(uri.port or 443) == 443 and 480 hasattr(connection, '_buffer') and 481 isinstance(connection._buffer, list)): 482 header_line = 'Host: %s:443' % uri.host 483 replacement_header_line = 'Host: %s' % uri.host 484 try: 485 connection._buffer[connection._buffer.index(header_line)] = ( 486 replacement_header_line) 487 except ValueError: # header_line missing from connection._buffer 488 pass 489 490 # Send the HTTP headers. 491 for header_name, value in headers.iteritems(): 492 connection.putheader(header_name, value) 493 connection.endheaders() 494 495 # If there is data, send it in the request. 496 if body_parts and filter(lambda x: x != '', body_parts): 497 for part in body_parts: 498 _send_data_part(part, connection) 499 500 # Return the HTTP Response from the server. 501 return connection.getresponse() 502 503 504def _send_data_part(data, connection): 505 if isinstance(data, (str, unicode)): 506 # I might want to just allow str, not unicode. 507 connection.send(data) 508 return 509 # Check to see if data is a file-like object that has a read method. 510 elif hasattr(data, 'read'): 511 # Read the file and send it a chunk at a time. 512 while 1: 513 binarydata = data.read(100000) 514 if binarydata == '': break 515 connection.send(binarydata) 516 return 517 else: 518 # The data object was not a file. 519 # Try to convert to a string and send the data. 520 connection.send(str(data)) 521 return 522 523 524class ProxiedHttpClient(HttpClient): 525 526 def _get_connection(self, uri, headers=None): 527 # Check to see if there are proxy settings required for this request. 528 proxy = None 529 if uri.scheme == 'https': 530 proxy = os.environ.get('https_proxy') 531 elif uri.scheme == 'http': 532 proxy = os.environ.get('http_proxy') 533 if not proxy: 534 return HttpClient._get_connection(self, uri, headers=headers) 535 # Now we have the URL of the appropriate proxy server. 536 # Get a username and password for the proxy if required. 537 proxy_auth = _get_proxy_auth() 538 if uri.scheme == 'https': 539 import socket 540 if proxy_auth: 541 proxy_auth = 'Proxy-authorization: %s' % proxy_auth 542 # Construct the proxy connect command. 543 port = uri.port 544 if not port: 545 port = 443 546 proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % (uri.host, port) 547 # Set the user agent to send to the proxy 548 user_agent = '' 549 if headers and 'User-Agent' in headers: 550 user_agent = 'User-Agent: %s\r\n' % (headers['User-Agent']) 551 proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth, user_agent) 552 # Find the proxy host and port. 553 proxy_uri = Uri.parse_uri(proxy) 554 if not proxy_uri.port: 555 proxy_uri.port = '80' 556 # Connect to the proxy server, very simple recv and error checking 557 p_sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM) 558 p_sock.connect((proxy_uri.host, int(proxy_uri.port))) 559 p_sock.sendall(proxy_pieces) 560 response = '' 561 # Wait for the full response. 562 while response.find("\r\n\r\n") == -1: 563 response += p_sock.recv(8192) 564 p_status = response.split()[1] 565 if p_status != str(200): 566 raise ProxyError('Error status=%s' % str(p_status)) 567 # Trivial setup for ssl socket. 568 sslobj = None 569 if ssl is not None: 570 sslobj = ssl.wrap_socket(p_sock, None, None) 571 else: 572 sock_ssl = socket.ssl(p_sock, None, Nonesock_) 573 sslobj = httplib.FakeSocket(p_sock, sock_ssl) 574 # Initalize httplib and replace with the proxy socket. 575 connection = httplib.HTTPConnection(proxy_uri.host) 576 connection.sock = sslobj 577 return connection 578 elif uri.scheme == 'http': 579 proxy_uri = Uri.parse_uri(proxy) 580 if not proxy_uri.port: 581 proxy_uri.port = '80' 582 if proxy_auth: 583 headers['Proxy-Authorization'] = proxy_auth.strip() 584 return httplib.HTTPConnection(proxy_uri.host, int(proxy_uri.port)) 585 return None 586 587 588def _get_proxy_auth(): 589 import base64 590 proxy_username = os.environ.get('proxy-username') 591 if not proxy_username: 592 proxy_username = os.environ.get('proxy_username') 593 proxy_password = os.environ.get('proxy-password') 594 if not proxy_password: 595 proxy_password = os.environ.get('proxy_password') 596 if proxy_username: 597 user_auth = base64.b64encode('%s:%s' % (proxy_username, 598 proxy_password)) 599 return 'Basic %s\r\n' % (user_auth.strip()) 600 else: 601 return ''