PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/thirdparty/google_appengine/google/appengine/api/urlfetch_stub.py

https://code.google.com/
Python | 452 lines | 355 code | 62 blank | 35 comment | 23 complexity | e883b376eb0c216a147373cadcbef6c2 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, BSD-2-Clause, LGPL-2.1, GPL-2.0, MIT
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2007 Google Inc.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """Stub version of the urlfetch API, based on httplib."""
  18. _successfully_imported_fancy_urllib = False
  19. _fancy_urllib_InvalidCertException = None
  20. _fancy_urllib_SSLError = None
  21. try:
  22. import fancy_urllib
  23. _successfully_imported_fancy_urllib = True
  24. _fancy_urllib_InvalidCertException = fancy_urllib.InvalidCertificateException
  25. _fancy_urllib_SSLError = fancy_urllib.SSLError
  26. except ImportError:
  27. pass
  28. import gzip
  29. import httplib
  30. import logging
  31. import os
  32. import socket
  33. import StringIO
  34. import sys
  35. import urllib
  36. import urlparse
  37. from google.appengine.api import apiproxy_stub
  38. from google.appengine.api import urlfetch
  39. from google.appengine.api import urlfetch_errors
  40. from google.appengine.api import urlfetch_service_pb
  41. from google.appengine.runtime import apiproxy_errors
  42. MAX_REQUEST_SIZE = 5 << 20
  43. MAX_RESPONSE_SIZE = 2 ** 25
  44. MAX_REDIRECTS = urlfetch.MAX_REDIRECTS
  45. REDIRECT_STATUSES = frozenset([
  46. httplib.MOVED_PERMANENTLY,
  47. httplib.FOUND,
  48. httplib.SEE_OTHER,
  49. httplib.TEMPORARY_REDIRECT,
  50. ])
  51. _API_CALL_DEADLINE = 5.0
  52. _API_CALL_VALIDATE_CERTIFICATE_DEFAULT = True
  53. _CONNECTION_SUPPORTS_TIMEOUT = sys.version_info >= (2, 6)
  54. _UNTRUSTED_REQUEST_HEADERS = frozenset([
  55. 'content-length',
  56. 'host',
  57. 'vary',
  58. 'via',
  59. 'x-forwarded-for',
  60. ])
  61. _MAX_URL_LENGTH = 2048
  62. def _CanValidateCerts():
  63. return (_successfully_imported_fancy_urllib and
  64. fancy_urllib.can_validate_certs())
  65. def _SetupSSL(path):
  66. global CERT_PATH
  67. if os.path.exists(path):
  68. CERT_PATH = path
  69. else:
  70. CERT_PATH = None
  71. logging.warning('%s missing; without this urlfetch will not be able to '
  72. 'validate SSL certificates.', path)
  73. if not _CanValidateCerts():
  74. logging.warning('No ssl package found. urlfetch will not be able to '
  75. 'validate SSL certificates.')
  76. _SetupSSL(os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..',
  77. '..', 'lib', 'cacerts',
  78. 'urlfetch_cacerts.txt')))
  79. def _IsAllowedPort(port):
  80. if port is None:
  81. return True
  82. try:
  83. port = int(port)
  84. except ValueError, e:
  85. return False
  86. if ((port >= 80 and port <= 90) or
  87. (port >= 440 and port <= 450) or
  88. port >= 1024):
  89. return True
  90. return False
  91. class URLFetchServiceStub(apiproxy_stub.APIProxyStub):
  92. """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
  93. def __init__(self, service_name='urlfetch'):
  94. """Initializer.
  95. Args:
  96. service_name: Service name expected for all calls.
  97. """
  98. super(URLFetchServiceStub, self).__init__(service_name,
  99. max_request_size=MAX_REQUEST_SIZE)
  100. def _Dynamic_Fetch(self, request, response):
  101. """Trivial implementation of URLFetchService::Fetch().
  102. Args:
  103. request: the fetch to perform, a URLFetchRequest
  104. response: the fetch response, a URLFetchResponse
  105. """
  106. if len(request.url()) >= _MAX_URL_LENGTH:
  107. logging.error('URL is too long: %s...' % request.url()[:50])
  108. raise apiproxy_errors.ApplicationError(
  109. urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
  110. (protocol, host, path, query, fragment) = urlparse.urlsplit(request.url())
  111. payload = None
  112. if request.method() == urlfetch_service_pb.URLFetchRequest.GET:
  113. method = 'GET'
  114. elif request.method() == urlfetch_service_pb.URLFetchRequest.POST:
  115. method = 'POST'
  116. payload = request.payload()
  117. elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
  118. method = 'HEAD'
  119. elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT:
  120. method = 'PUT'
  121. payload = request.payload()
  122. elif request.method() == urlfetch_service_pb.URLFetchRequest.DELETE:
  123. method = 'DELETE'
  124. else:
  125. logging.error('Invalid method: %s', request.method())
  126. raise apiproxy_errors.ApplicationError(
  127. urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
  128. if not (protocol == 'http' or protocol == 'https'):
  129. logging.error('Invalid protocol: %s', protocol)
  130. raise apiproxy_errors.ApplicationError(
  131. urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
  132. if not host:
  133. logging.error('Missing host.')
  134. raise apiproxy_errors.ApplicationError(
  135. urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
  136. self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS,
  137. request.header_list())
  138. deadline = _API_CALL_DEADLINE
  139. if request.has_deadline():
  140. deadline = request.deadline()
  141. validate_certificate = _API_CALL_VALIDATE_CERTIFICATE_DEFAULT
  142. if request.has_mustvalidateservercertificate():
  143. validate_certificate = request.mustvalidateservercertificate()
  144. self._RetrieveURL(request.url(), payload, method,
  145. request.header_list(), request, response,
  146. follow_redirects=request.followredirects(),
  147. deadline=deadline,
  148. validate_certificate=validate_certificate)
  149. def _RetrieveURL(self, url, payload, method, headers, request, response,
  150. follow_redirects=True, deadline=_API_CALL_DEADLINE,
  151. validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
  152. """Retrieves a URL.
  153. Args:
  154. url: String containing the URL to access.
  155. payload: Request payload to send, if any; None if no payload.
  156. If the payload is unicode, we assume it is utf-8.
  157. method: HTTP method to use (e.g., 'GET')
  158. headers: List of additional header objects to use for the request.
  159. request: Request object from original request.
  160. response: Response object to populate with the response data.
  161. follow_redirects: optional setting (defaulting to True) for whether or not
  162. we should transparently follow redirects (up to MAX_REDIRECTS)
  163. deadline: Number of seconds to wait for the urlfetch to finish.
  164. validate_certificate: If true, do not send request to server unless the
  165. certificate is valid, signed by a trusted CA and the hostname matches
  166. the certificate.
  167. Raises:
  168. Raises an apiproxy_errors.ApplicationError exception with
  169. INVALID_URL_ERROR in cases where:
  170. - The protocol of the redirected URL is bad or missing.
  171. - The port is not in the allowable range of ports.
  172. Raises an apiproxy_errors.ApplicationError exception with
  173. TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
  174. """
  175. last_protocol = ''
  176. last_host = ''
  177. if isinstance(payload, unicode):
  178. payload = payload.encode('utf-8')
  179. for redirect_number in xrange(MAX_REDIRECTS + 1):
  180. parsed = urlparse.urlsplit(url)
  181. protocol, host, path, query, fragment = parsed
  182. port = urllib.splitport(urllib.splituser(host)[1])[1]
  183. if not _IsAllowedPort(port):
  184. logging.error(
  185. 'urlfetch received %s ; port %s is not allowed in production!' %
  186. (url, port))
  187. raise apiproxy_errors.ApplicationError(
  188. urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
  189. if protocol and not host:
  190. logging.error('Missing host on redirect; target url is %s' % url)
  191. raise apiproxy_errors.ApplicationError(
  192. urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
  193. if not host and not protocol:
  194. host = last_host
  195. protocol = last_protocol
  196. adjusted_headers = {
  197. 'User-Agent':
  198. 'AppEngine-Google; (+http://code.google.com/appengine)',
  199. 'Host': host,
  200. 'Accept-Encoding': 'gzip',
  201. }
  202. if payload is not None:
  203. adjusted_headers['Content-Length'] = str(len(payload))
  204. if method == 'POST' and payload:
  205. adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'
  206. passthrough_content_encoding = False
  207. for header in headers:
  208. if header.key().title().lower() == 'user-agent':
  209. adjusted_headers['User-Agent'] = (
  210. '%s %s' %
  211. (header.value(), adjusted_headers['User-Agent']))
  212. else:
  213. if header.key().lower() == 'accept-encoding':
  214. passthrough_content_encoding = True
  215. adjusted_headers[header.key().title()] = header.value()
  216. if payload is not None:
  217. escaped_payload = payload.encode('string_escape')
  218. else:
  219. escaped_payload = ''
  220. logging.debug('Making HTTP request: host = %r, '
  221. 'url = %r, payload = %.1000r, headers = %r',
  222. host, url, escaped_payload, adjusted_headers)
  223. try:
  224. if protocol == 'http':
  225. connection_class = httplib.HTTPConnection
  226. elif protocol == 'https':
  227. if (validate_certificate and _CanValidateCerts() and
  228. CERT_PATH):
  229. connection_class = fancy_urllib.create_fancy_connection(
  230. ca_certs=CERT_PATH)
  231. else:
  232. connection_class = httplib.HTTPSConnection
  233. else:
  234. error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
  235. logging.error(error_msg)
  236. raise apiproxy_errors.ApplicationError(
  237. urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)
  238. if _CONNECTION_SUPPORTS_TIMEOUT:
  239. connection = connection_class(host, timeout=deadline)
  240. else:
  241. connection = connection_class(host)
  242. last_protocol = protocol
  243. last_host = host
  244. if query != '':
  245. full_path = path + '?' + query
  246. else:
  247. full_path = path
  248. if not _CONNECTION_SUPPORTS_TIMEOUT:
  249. orig_timeout = socket.getdefaulttimeout()
  250. try:
  251. if not _CONNECTION_SUPPORTS_TIMEOUT:
  252. socket.setdefaulttimeout(deadline)
  253. connection.request(method, full_path, payload, adjusted_headers)
  254. http_response = connection.getresponse()
  255. if method == 'HEAD':
  256. http_response_data = ''
  257. else:
  258. http_response_data = http_response.read()
  259. finally:
  260. if not _CONNECTION_SUPPORTS_TIMEOUT:
  261. socket.setdefaulttimeout(orig_timeout)
  262. connection.close()
  263. except (_fancy_urllib_InvalidCertException,
  264. _fancy_urllib_SSLError), e:
  265. raise apiproxy_errors.ApplicationError(
  266. urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
  267. str(e))
  268. except socket.timeout, e:
  269. raise apiproxy_errors.ApplicationError(
  270. urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
  271. except (httplib.error, socket.error, IOError), e:
  272. raise apiproxy_errors.ApplicationError(
  273. urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
  274. if http_response.status in REDIRECT_STATUSES and follow_redirects:
  275. url = http_response.getheader('Location', None)
  276. if url is None:
  277. error_msg = 'Redirecting response was missing "Location" header'
  278. logging.error(error_msg)
  279. raise apiproxy_errors.ApplicationError(
  280. urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY,
  281. error_msg)
  282. else:
  283. response.set_statuscode(http_response.status)
  284. if (http_response.getheader('content-encoding') == 'gzip' and
  285. not passthrough_content_encoding):
  286. gzip_stream = StringIO.StringIO(http_response_data)
  287. gzip_file = gzip.GzipFile(fileobj=gzip_stream)
  288. http_response_data = gzip_file.read()
  289. response.set_content(http_response_data[:MAX_RESPONSE_SIZE])
  290. for header_key in http_response.msg.keys():
  291. for header_value in http_response.msg.getheaders(header_key):
  292. if (header_key.lower() == 'content-encoding' and
  293. header_value == 'gzip' and
  294. not passthrough_content_encoding):
  295. continue
  296. if header_key.lower() == 'content-length' and method != 'HEAD':
  297. header_value = str(len(response.content()))
  298. header_proto = response.add_header()
  299. header_proto.set_key(header_key)
  300. header_proto.set_value(header_value)
  301. if len(http_response_data) > MAX_RESPONSE_SIZE:
  302. response.set_contentwastruncated(True)
  303. if request.url() != url:
  304. response.set_finalurl(url)
  305. break
  306. else:
  307. error_msg = 'Too many repeated redirects'
  308. logging.error(error_msg)
  309. raise apiproxy_errors.ApplicationError(
  310. urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS,
  311. error_msg)
  312. def _SanitizeHttpHeaders(self, untrusted_headers, headers):
  313. """Cleans "unsafe" headers from the HTTP request, in place.
  314. Args:
  315. untrusted_headers: Set of untrusted headers names (all lowercase).
  316. headers: List of Header objects. The list is modified in place.
  317. """
  318. prohibited_headers = [h.key() for h in headers
  319. if h.key().lower() in untrusted_headers]
  320. if prohibited_headers:
  321. logging.warn('Stripped prohibited headers from URLFetch request: %s',
  322. prohibited_headers)
  323. for index in reversed(xrange(len(headers))):
  324. if headers[index].key().lower() in untrusted_headers:
  325. del headers[index]