PageRenderTime 70ms CodeModel.GetById 41ms RepoModel.GetById 1ms app.codeStats 0ms

/tornado/httpclient.py

https://github.com/joetyson/tornado
Python | 306 lines | 298 code | 2 blank | 6 comment | 0 complexity | e928ff3adfeb8598012a7234989a7121 MD5 | raw file
  1. import calendar
  2. import email.utils
  3. import httplib
  4. import os
  5. import time
  6. import weakref
  7. from tornado.escape import utf8
  8. from tornado import httputil
  9. from tornado.ioloop import IOLoop
  10. from tornado.util import import_object, bytes_type
  11. class HTTPClient(object):
  12. """A blocking HTTP client.
  13. Typical usage looks like this:
  14. http_client = httpclient.HTTPClient()
  15. try:
  16. response = http_client.fetch("http://www.google.com/")
  17. print response.body
  18. except httpclient.HTTPError, e:
  19. print "Error:", e
  20. fetch() can take a string URL or an HTTPRequest instance, which offers
  21. more options, like executing POST/PUT/DELETE requests.
  22. """
  23. def __init__(self):
  24. self._io_loop = IOLoop()
  25. self._async_client = AsyncHTTPClient(self._io_loop)
  26. self._response = None
  27. def __del__(self):
  28. self._async_client.close()
  29. def fetch(self, request, **kwargs):
  30. """Executes an HTTPRequest, returning an HTTPResponse.
  31. If an error occurs during the fetch, we raise an HTTPError.
  32. """
  33. def callback(response):
  34. self._response = response
  35. self._io_loop.stop()
  36. self._async_client.fetch(request, callback, **kwargs)
  37. self._io_loop.start()
  38. response = self._response
  39. self._response = None
  40. response.rethrow()
  41. return response
  42. class AsyncHTTPClient(object):
  43. """An non-blocking HTTP client.
  44. Example usage:
  45. import ioloop
  46. def handle_request(response):
  47. if response.error:
  48. print "Error:", response.error
  49. else:
  50. print response.body
  51. ioloop.IOLoop.instance().stop()
  52. http_client = httpclient.AsyncHTTPClient()
  53. http_client.fetch("http://www.google.com/", handle_request)
  54. ioloop.IOLoop.instance().start()
  55. fetch() can take a string URL or an HTTPRequest instance, which offers
  56. more options, like executing POST/PUT/DELETE requests.
  57. The constructor for this class is magic in several respects: It actually
  58. creates an instance of an implementation-specific subclass, and instances
  59. are reused as a kind of pseudo-singleton (one per IOLoop). The keyword
  60. argument force_instance=True can be used to suppress this singleton
  61. behavior. Constructor arguments other than io_loop and force_instance
  62. are deprecated. The implementation subclass as well as arguments to
  63. its constructor can be set with the static method configure()
  64. """
  65. _async_clients = weakref.WeakKeyDictionary()
  66. _impl_class = None
  67. _impl_kwargs = None
  68. def __new__(cls, io_loop=None, max_clients=10, force_instance=False,
  69. **kwargs):
  70. io_loop = io_loop or IOLoop.instance()
  71. if io_loop in cls._async_clients and not force_instance:
  72. return cls._async_clients[io_loop]
  73. else:
  74. if cls is AsyncHTTPClient:
  75. if cls._impl_class is None:
  76. from tornado.simple_httpclient import SimpleAsyncHTTPClient
  77. AsyncHTTPClient._impl_class = SimpleAsyncHTTPClient
  78. impl = cls._impl_class
  79. else:
  80. impl = cls
  81. instance = super(AsyncHTTPClient, cls).__new__(impl)
  82. args = {}
  83. if cls._impl_kwargs:
  84. args.update(cls._impl_kwargs)
  85. args.update(kwargs)
  86. instance.initialize(io_loop, max_clients, **args)
  87. if not force_instance:
  88. cls._async_clients[io_loop] = instance
  89. return instance
  90. def close(self):
  91. """Destroys this http client, freeing any file descriptors used.
  92. Not needed in normal use, but may be helpful in unittests that
  93. create and destroy http clients. No other methods may be called
  94. on the AsyncHTTPClient after close().
  95. """
  96. if self._async_clients[self.io_loop] is self:
  97. del self._async_clients[self.io_loop]
  98. def fetch(self, request, callback, **kwargs):
  99. """Executes an HTTPRequest, calling callback with an HTTPResponse.
  100. If an error occurs during the fetch, the HTTPResponse given to the
  101. callback has a non-None error attribute that contains the exception
  102. encountered during the request. You can call response.rethrow() to
  103. throw the exception (if any) in the callback.
  104. """
  105. raise NotImplementedError()
  106. @staticmethod
  107. def configure(impl, **kwargs):
  108. """Configures the AsyncHTTPClient subclass to use.
  109. AsyncHTTPClient() actually creates an instance of a subclass.
  110. This method may be called with either a class object or the
  111. fully-qualified name of such a class (or None to use the default,
  112. SimpleAsyncHTTPClient)
  113. If additional keyword arguments are given, they will be passed
  114. to the constructor of each subclass instance created. The
  115. keyword argument max_clients determines the maximum number of
  116. simultaneous fetch() operations that can execute in parallel
  117. on each IOLoop. Additional arguments may be supported depending
  118. on the implementation class in use.
  119. """
  120. if isinstance(impl, (unicode, bytes_type)):
  121. impl = import_object(impl)
  122. if impl is not None and not issubclass(impl, AsyncHTTPClient):
  123. raise ValueError("Invalid AsyncHTTPClient implementation")
  124. AsyncHTTPClient._impl_class = impl
  125. AsyncHTTPClient._impl_kwargs = kwargs
  126. class HTTPRequest(object):
  127. def __init__(self, url, method="GET", headers=None, body=None,
  128. auth_username=None, auth_password=None,
  129. connect_timeout=20.0, request_timeout=20.0,
  130. if_modified_since=None, follow_redirects=True,
  131. max_redirects=5, user_agent=None, use_gzip=True,
  132. network_interface=None, streaming_callback=None,
  133. header_callback=None, prepare_curl_callback=None,
  134. proxy_host=None, proxy_port=None, proxy_username=None,
  135. proxy_password='', allow_nonstandard_methods=False,
  136. validate_cert=True, ca_certs=None,
  137. allow_ipv6=None):
  138. if headers is None:
  139. headers = httputil.HTTPHeaders()
  140. if if_modified_since:
  141. timestamp = calendar.timegm(if_modified_since.utctimetuple())
  142. headers["If-Modified-Since"] = email.utils.formatdate(
  143. timestamp, localtime=False, usegmt=True)
  144. # Proxy support: proxy_host and proxy_port must be set to connect via
  145. # proxy. The username and password credentials are optional.
  146. self.proxy_host = proxy_host
  147. self.proxy_port = proxy_port
  148. self.proxy_username = proxy_username
  149. self.proxy_password = proxy_password
  150. self.url = url
  151. self.method = method
  152. self.headers = headers
  153. self.body = utf8(body)
  154. self.auth_username = auth_username
  155. self.auth_password = auth_password
  156. self.connect_timeout = connect_timeout
  157. self.request_timeout = request_timeout
  158. self.follow_redirects = follow_redirects
  159. self.max_redirects = max_redirects
  160. self.user_agent = user_agent
  161. self.use_gzip = use_gzip
  162. self.network_interface = network_interface
  163. self.streaming_callback = streaming_callback
  164. self.header_callback = header_callback
  165. self.prepare_curl_callback = prepare_curl_callback
  166. self.allow_nonstandard_methods = allow_nonstandard_methods
  167. # SSL certificate validation:
  168. # validate_cert: boolean, set to False to disable validation
  169. # ca_certs: filename of CA certificates in PEM format, or
  170. # None to use defaults
  171. # Note that in the curl-based HTTP client, if any request
  172. # uses a custom ca_certs file, they all must (they don't have to
  173. # all use the same ca_certs, but it's not possible to mix requests
  174. # with ca_certs and requests that use the defaults).
  175. # SimpleAsyncHTTPClient does not have this limitation.
  176. self.validate_cert = validate_cert
  177. self.ca_certs = ca_certs
  178. # allow_ipv6 may be True, False, or None for default behavior
  179. # that varies by httpclient implementation.
  180. self.allow_ipv6 = allow_ipv6
  181. self.start_time = time.time()
  182. class HTTPResponse(object):
  183. """HTTP Response object.
  184. Attributes:
  185. * request: HTTPRequest object
  186. * code: numeric HTTP status code, e.g. 200 or 404
  187. * headers: httputil.HTTPHeaders object
  188. * buffer: cStringIO object for response body
  189. * body: respose body as string (created on demand from self.buffer)
  190. * error: Exception object, if any
  191. * request_time: seconds from request start to finish
  192. * time_info: dictionary of diagnostic timing information from the request.
  193. Available data are subject to change, but currently uses timings
  194. available from http://curl.haxx.se/libcurl/c/curl_easy_getinfo.html,
  195. plus 'queue', which is the delay (if any) introduced by waiting for
  196. a slot under AsyncHTTPClient's max_clients setting.
  197. """
  198. def __init__(self, request, code, headers={}, buffer=None,
  199. effective_url=None, error=None, request_time=None,
  200. time_info={}):
  201. self.request = request
  202. self.code = code
  203. self.headers = headers
  204. self.buffer = buffer
  205. self._body = None
  206. if effective_url is None:
  207. self.effective_url = request.url
  208. else:
  209. self.effective_url = effective_url
  210. if error is None:
  211. if self.code < 200 or self.code >= 300:
  212. self.error = HTTPError(self.code, response=self)
  213. else:
  214. self.error = None
  215. else:
  216. self.error = error
  217. self.request_time = request_time
  218. self.time_info = time_info
  219. def _get_body(self):
  220. if self.buffer is None:
  221. return None
  222. elif self._body is None:
  223. self._body = self.buffer.getvalue()
  224. return self._body
  225. body = property(_get_body)
  226. def rethrow(self):
  227. if self.error:
  228. raise self.error
  229. def __repr__(self):
  230. args = ",".join("%s=%r" % i for i in self.__dict__.iteritems())
  231. return "%s(%s)" % (self.__class__.__name__, args)
  232. class HTTPError(Exception):
  233. """Exception thrown for an unsuccessful HTTP request.
  234. Attributes:
  235. code - HTTP error integer error code, e.g. 404. Error code 599 is
  236. used when no HTTP response was received, e.g. for a timeout.
  237. response - HTTPResponse object, if any.
  238. Note that if follow_redirects is False, redirects become HTTPErrors,
  239. and you can look at error.response.headers['Location'] to see the
  240. destination of the redirect.
  241. """
  242. def __init__(self, code, message=None, response=None):
  243. self.code = code
  244. message = message or httplib.responses.get(code, "Unknown")
  245. self.response = response
  246. Exception.__init__(self, "HTTP %d: %s" % (self.code, message))
  247. def main():
  248. from tornado.options import define, options, parse_command_line
  249. define("print_headers", type=bool, default=False)
  250. define("print_body", type=bool, default=True)
  251. define("follow_redirects", type=bool, default=True)
  252. args = parse_command_line()
  253. client = HTTPClient()
  254. for arg in args:
  255. try:
  256. response = client.fetch(arg,
  257. follow_redirects=options.follow_redirects)
  258. except HTTPError, e:
  259. if e.response is not None:
  260. response = e.response
  261. else:
  262. raise
  263. if options.print_headers:
  264. print response.headers
  265. if options.print_body:
  266. print response.body
  267. if __name__ == "__main__":
  268. main()