/thirdparty/google_appengine/google/appengine/api/urlfetch_stub.py
Python | 452 lines | 355 code | 62 blank | 35 comment | 23 complexity | e883b376eb0c216a147373cadcbef6c2 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, BSD-2-Clause, LGPL-2.1, GPL-2.0, MIT
- #!/usr/bin/env python
- #
- # Copyright 2007 Google Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- """Stub version of the urlfetch API, based on httplib."""
- _successfully_imported_fancy_urllib = False
- _fancy_urllib_InvalidCertException = None
- _fancy_urllib_SSLError = None
- try:
- import fancy_urllib
- _successfully_imported_fancy_urllib = True
- _fancy_urllib_InvalidCertException = fancy_urllib.InvalidCertificateException
- _fancy_urllib_SSLError = fancy_urllib.SSLError
- except ImportError:
- pass
- import gzip
- import httplib
- import logging
- import os
- import socket
- import StringIO
- import sys
- import urllib
- import urlparse
- from google.appengine.api import apiproxy_stub
- from google.appengine.api import urlfetch
- from google.appengine.api import urlfetch_errors
- from google.appengine.api import urlfetch_service_pb
- from google.appengine.runtime import apiproxy_errors
- MAX_REQUEST_SIZE = 5 << 20
- MAX_RESPONSE_SIZE = 2 ** 25
- MAX_REDIRECTS = urlfetch.MAX_REDIRECTS
- REDIRECT_STATUSES = frozenset([
- httplib.MOVED_PERMANENTLY,
- httplib.FOUND,
- httplib.SEE_OTHER,
- httplib.TEMPORARY_REDIRECT,
- ])
- _API_CALL_DEADLINE = 5.0
- _API_CALL_VALIDATE_CERTIFICATE_DEFAULT = True
- _CONNECTION_SUPPORTS_TIMEOUT = sys.version_info >= (2, 6)
- _UNTRUSTED_REQUEST_HEADERS = frozenset([
- 'content-length',
- 'host',
- 'vary',
- 'via',
- 'x-forwarded-for',
- ])
- _MAX_URL_LENGTH = 2048
- def _CanValidateCerts():
- return (_successfully_imported_fancy_urllib and
- fancy_urllib.can_validate_certs())
- def _SetupSSL(path):
- global CERT_PATH
- if os.path.exists(path):
- CERT_PATH = path
- else:
- CERT_PATH = None
- logging.warning('%s missing; without this urlfetch will not be able to '
- 'validate SSL certificates.', path)
- if not _CanValidateCerts():
- logging.warning('No ssl package found. urlfetch will not be able to '
- 'validate SSL certificates.')
- _SetupSSL(os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..',
- '..', 'lib', 'cacerts',
- 'urlfetch_cacerts.txt')))
- def _IsAllowedPort(port):
- if port is None:
- return True
- try:
- port = int(port)
- except ValueError, e:
- return False
- if ((port >= 80 and port <= 90) or
- (port >= 440 and port <= 450) or
- port >= 1024):
- return True
- return False
- class URLFetchServiceStub(apiproxy_stub.APIProxyStub):
- """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
- def __init__(self, service_name='urlfetch'):
- """Initializer.
- Args:
- service_name: Service name expected for all calls.
- """
- super(URLFetchServiceStub, self).__init__(service_name,
- max_request_size=MAX_REQUEST_SIZE)
- def _Dynamic_Fetch(self, request, response):
- """Trivial implementation of URLFetchService::Fetch().
- Args:
- request: the fetch to perform, a URLFetchRequest
- response: the fetch response, a URLFetchResponse
- """
- if len(request.url()) >= _MAX_URL_LENGTH:
- logging.error('URL is too long: %s...' % request.url()[:50])
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
- (protocol, host, path, query, fragment) = urlparse.urlsplit(request.url())
- payload = None
- if request.method() == urlfetch_service_pb.URLFetchRequest.GET:
- method = 'GET'
- elif request.method() == urlfetch_service_pb.URLFetchRequest.POST:
- method = 'POST'
- payload = request.payload()
- elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
- method = 'HEAD'
- elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT:
- method = 'PUT'
- payload = request.payload()
- elif request.method() == urlfetch_service_pb.URLFetchRequest.DELETE:
- method = 'DELETE'
- else:
- logging.error('Invalid method: %s', request.method())
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
- if not (protocol == 'http' or protocol == 'https'):
- logging.error('Invalid protocol: %s', protocol)
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
- if not host:
- logging.error('Missing host.')
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
- self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS,
- request.header_list())
- deadline = _API_CALL_DEADLINE
- if request.has_deadline():
- deadline = request.deadline()
- validate_certificate = _API_CALL_VALIDATE_CERTIFICATE_DEFAULT
- if request.has_mustvalidateservercertificate():
- validate_certificate = request.mustvalidateservercertificate()
- self._RetrieveURL(request.url(), payload, method,
- request.header_list(), request, response,
- follow_redirects=request.followredirects(),
- deadline=deadline,
- validate_certificate=validate_certificate)
- def _RetrieveURL(self, url, payload, method, headers, request, response,
- follow_redirects=True, deadline=_API_CALL_DEADLINE,
- validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
- """Retrieves a URL.
- Args:
- url: String containing the URL to access.
- payload: Request payload to send, if any; None if no payload.
- If the payload is unicode, we assume it is utf-8.
- method: HTTP method to use (e.g., 'GET')
- headers: List of additional header objects to use for the request.
- request: Request object from original request.
- response: Response object to populate with the response data.
- follow_redirects: optional setting (defaulting to True) for whether or not
- we should transparently follow redirects (up to MAX_REDIRECTS)
- deadline: Number of seconds to wait for the urlfetch to finish.
- validate_certificate: If true, do not send request to server unless the
- certificate is valid, signed by a trusted CA and the hostname matches
- the certificate.
- Raises:
- Raises an apiproxy_errors.ApplicationError exception with
- INVALID_URL_ERROR in cases where:
- - The protocol of the redirected URL is bad or missing.
- - The port is not in the allowable range of ports.
- Raises an apiproxy_errors.ApplicationError exception with
- TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
- """
- last_protocol = ''
- last_host = ''
- if isinstance(payload, unicode):
- payload = payload.encode('utf-8')
- for redirect_number in xrange(MAX_REDIRECTS + 1):
- parsed = urlparse.urlsplit(url)
- protocol, host, path, query, fragment = parsed
- port = urllib.splitport(urllib.splituser(host)[1])[1]
- if not _IsAllowedPort(port):
- logging.error(
- 'urlfetch received %s ; port %s is not allowed in production!' %
- (url, port))
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
- if protocol and not host:
- logging.error('Missing host on redirect; target url is %s' % url)
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
- if not host and not protocol:
- host = last_host
- protocol = last_protocol
- adjusted_headers = {
- 'User-Agent':
- 'AppEngine-Google; (+http://code.google.com/appengine)',
- 'Host': host,
- 'Accept-Encoding': 'gzip',
- }
- if payload is not None:
- adjusted_headers['Content-Length'] = str(len(payload))
- if method == 'POST' and payload:
- adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'
- passthrough_content_encoding = False
- for header in headers:
- if header.key().title().lower() == 'user-agent':
- adjusted_headers['User-Agent'] = (
- '%s %s' %
- (header.value(), adjusted_headers['User-Agent']))
- else:
- if header.key().lower() == 'accept-encoding':
- passthrough_content_encoding = True
- adjusted_headers[header.key().title()] = header.value()
- if payload is not None:
- escaped_payload = payload.encode('string_escape')
- else:
- escaped_payload = ''
- logging.debug('Making HTTP request: host = %r, '
- 'url = %r, payload = %.1000r, headers = %r',
- host, url, escaped_payload, adjusted_headers)
- try:
- if protocol == 'http':
- connection_class = httplib.HTTPConnection
- elif protocol == 'https':
- if (validate_certificate and _CanValidateCerts() and
- CERT_PATH):
- connection_class = fancy_urllib.create_fancy_connection(
- ca_certs=CERT_PATH)
- else:
- connection_class = httplib.HTTPSConnection
- else:
- error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
- logging.error(error_msg)
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)
- if _CONNECTION_SUPPORTS_TIMEOUT:
- connection = connection_class(host, timeout=deadline)
- else:
- connection = connection_class(host)
- last_protocol = protocol
- last_host = host
- if query != '':
- full_path = path + '?' + query
- else:
- full_path = path
- if not _CONNECTION_SUPPORTS_TIMEOUT:
- orig_timeout = socket.getdefaulttimeout()
- try:
- if not _CONNECTION_SUPPORTS_TIMEOUT:
- socket.setdefaulttimeout(deadline)
- connection.request(method, full_path, payload, adjusted_headers)
- http_response = connection.getresponse()
- if method == 'HEAD':
- http_response_data = ''
- else:
- http_response_data = http_response.read()
- finally:
- if not _CONNECTION_SUPPORTS_TIMEOUT:
- socket.setdefaulttimeout(orig_timeout)
- connection.close()
- except (_fancy_urllib_InvalidCertException,
- _fancy_urllib_SSLError), e:
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
- str(e))
- except socket.timeout, e:
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
- except (httplib.error, socket.error, IOError), e:
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
- if http_response.status in REDIRECT_STATUSES and follow_redirects:
- url = http_response.getheader('Location', None)
- if url is None:
- error_msg = 'Redirecting response was missing "Location" header'
- logging.error(error_msg)
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY,
- error_msg)
- else:
- response.set_statuscode(http_response.status)
- if (http_response.getheader('content-encoding') == 'gzip' and
- not passthrough_content_encoding):
- gzip_stream = StringIO.StringIO(http_response_data)
- gzip_file = gzip.GzipFile(fileobj=gzip_stream)
- http_response_data = gzip_file.read()
- response.set_content(http_response_data[:MAX_RESPONSE_SIZE])
- for header_key in http_response.msg.keys():
- for header_value in http_response.msg.getheaders(header_key):
- if (header_key.lower() == 'content-encoding' and
- header_value == 'gzip' and
- not passthrough_content_encoding):
- continue
- if header_key.lower() == 'content-length' and method != 'HEAD':
- header_value = str(len(response.content()))
- header_proto = response.add_header()
- header_proto.set_key(header_key)
- header_proto.set_value(header_value)
- if len(http_response_data) > MAX_RESPONSE_SIZE:
- response.set_contentwastruncated(True)
- if request.url() != url:
- response.set_finalurl(url)
- break
- else:
- error_msg = 'Too many repeated redirects'
- logging.error(error_msg)
- raise apiproxy_errors.ApplicationError(
- urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS,
- error_msg)
- def _SanitizeHttpHeaders(self, untrusted_headers, headers):
- """Cleans "unsafe" headers from the HTTP request, in place.
- Args:
- untrusted_headers: Set of untrusted headers names (all lowercase).
- headers: List of Header objects. The list is modified in place.
- """
- prohibited_headers = [h.key() for h in headers
- if h.key().lower() in untrusted_headers]
- if prohibited_headers:
- logging.warn('Stripped prohibited headers from URLFetch request: %s',
- prohibited_headers)
- for index in reversed(xrange(len(headers))):
- if headers[index].key().lower() in untrusted_headers:
- del headers[index]