/apiary/http/http.py

https://bitbucket.org/lindenlab/apiary/ · Python · 341 lines · 231 code · 51 blank · 59 comment · 37 complexity · 85c971c334a5f185c3d3409932d70fb6 MD5 · raw file

  1. #
  2. # $LicenseInfo:firstyear=2010&license=mit$
  3. #
  4. # Copyright (c) 2010, Linden Research, Inc.
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining a copy
  7. # of this software and associated documentation files (the "Software"), to deal
  8. # in the Software without restriction, including without limitation the rights
  9. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. # copies of the Software, and to permit persons to whom the Software is
  11. # furnished to do so, subject to the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be included in
  14. # all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. # THE SOFTWARE.
  23. # $/LicenseInfo$
  24. #
  25. from optparse import OptionParser
  26. import re
  27. import time
  28. import socket
  29. import apiary
  30. from apiary.tools.codec import Message
  31. from apiary.tools.timer import Timer
  32. from apiary.tools.stattools import StatValue
  33. from apiary.tools.span import Span, SpanSequence, SlidingWindowSequence
  34. from apiary.tools.debug import debug, traced_method
  35. from apiary.tools.dummyfile import DummyFile
  36. class HTTPWorkerBee(apiary.WorkerBee):
  37. _fake_results = ['HTTP/1.0 200 OK\r\nSome-Header: some value\r\n\r\n',
  38. 'HTTP/1.0 404 Not Found\r\nAnother-Header: another value\r\n\r\n']
  39. def __init__(self, options, arguments):
  40. apiary.WorkerBee.__init__(self, options, arguments)
  41. self._host = options.http_host
  42. if self._host == 'dummy':
  43. self._host = None
  44. self._port = options.http_port
  45. self._conn = None
  46. # _result is either None or (valid, details)
  47. # valid is a bool specifying whether the HTTP response was successfully parsed.
  48. # If valid is True, details contains the HTTP response.
  49. # If valid is False, details contains a generic error message.
  50. self._result = None
  51. self._timer = None
  52. self._worker_hostname = socket.getfqdn()
  53. #@traced_method
  54. def start(self):
  55. assert self._conn is None, 'Precondition violation: start called without ending previous session.'
  56. self._record_socket_errors(self._raw_start)
  57. def _raw_start(self):
  58. self._timer = Timer()
  59. if self._host:
  60. self._conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  61. try:
  62. self._conn.connect((self._host, self._port))
  63. finally:
  64. # Record a 'connect' time regardless of exception since analysis code depends on its presence:
  65. self._timer.event('connect')
  66. #@traced_method
  67. def event(self, request):
  68. # Precondition:
  69. if self._result is not None:
  70. assert self._result[0] == False, 'Precondition violation: expected failure result; got: %r' % (self._result,)
  71. return # Connection failure.
  72. self._record_socket_errors(self._raw_event, request)
  73. # Postcondition:
  74. assert type(self._result) is not None, `self._result`
  75. def _raw_event(self, request, bufsize=2**14):
  76. reqlen = len(request)
  77. if self._host:
  78. assert self._conn, 'Precodingion violation: event called without starting a session.'
  79. self._timer.event('start-send')
  80. while request:
  81. written = self._conn.send(request)
  82. request = request[written:]
  83. self._timer.event('start-recv')
  84. inbuf = ''
  85. bytes = self._conn.recv(bufsize)
  86. i = -1
  87. while bytes and i == -1:
  88. inbuf += bytes
  89. i = inbuf.find('\r\n')
  90. bytes = self._conn.recv(bufsize)
  91. if i >= 0:
  92. response = inbuf[:i]
  93. # Read until the socket closes:
  94. resplen = len(inbuf)
  95. chunk = self._conn.recv(bufsize)
  96. while chunk:
  97. resplen += len(chunk)
  98. chunk = self._conn.recv(bufsize)
  99. self._set_result_from_http_response(inbuf[:i],
  100. reqlen,
  101. resplen)
  102. self._timer.event('parse-response')
  103. if self._result is None:
  104. self._timer.event('parse-response-fail')
  105. self._error_result('HTTP Response line not found: %r', inbuf[:256])
  106. self._timer.event('finish-recv')
  107. else:
  108. self._set_result_from_http_response(self._fake_results.pop(0))
  109. self._fake_results.append(self._result)
  110. #@traced_method
  111. def end(self):
  112. assert self._result is not None, 'Precondition violation: .end() precondition failed, no result.'
  113. if self._host and self._conn is not None:
  114. self._conn.close()
  115. self._timer.event('close')
  116. self._conn = None
  117. tdict = dict(self._timer.intervals)
  118. assert tdict.has_key('connect') and tdict.has_key('close'), 'Postcondition violation, missing timings: %r' % (tdict,)
  119. validresponse, details = self._result[:2]
  120. lengthinfo = {}
  121. if validresponse:
  122. reqlen, resplen = self._result[2:]
  123. lengthinfo['request_length'] = reqlen
  124. lengthinfo['response_length'] = resplen
  125. self._result = None
  126. msg = Message(details,
  127. worker=self._worker_hostname,
  128. valid_response=validresponse,
  129. timings=self._timer.intervals,
  130. **lengthinfo)
  131. return msg.encode_to_string()
  132. def _set_result_from_http_response(self, result, reqlen, resplen):
  133. m = self._HTTPStatusPattern.match(result)
  134. if m is None:
  135. self._result = (False, 'Failed to parse HTTP Response.', reqlen, resplen)
  136. else:
  137. self._result = (True, result, reqlen, resplen)
  138. def _error_result(self, tmpl, *args):
  139. self._result = (False, tmpl % args)
  140. def _record_socket_errors(self, f, *args):
  141. try:
  142. return f(*args)
  143. except socket.error, e:
  144. self._timer.event('close')
  145. self._conn = None
  146. # _result may already be set, for example, if we've parsed a
  147. # response and we are in the middle of reading the headers/body.
  148. if self._result is None:
  149. self._error_result('socket.error: %r', e.args)
  150. _HTTPStatusPattern = re.compile(r'(HTTP/\d\.\d) (\d{3}) (.*?)$', re.MULTILINE)
  151. class HTTPQueenBee(apiary.QueenBee):
  152. def __init__(self, options, arguments, updateperiod=5):
  153. apiary.QueenBee.__init__(self, options, arguments)
  154. try:
  155. [self._inpath] = arguments
  156. except ValueError, e:
  157. raise SystemExit('Usage error: HTTPQueenBee needs an events data file.')
  158. dumppath = options.http_dump
  159. if dumppath is None:
  160. self._dumpfile = DummyFile()
  161. else:
  162. self._dumpfile = open(dumppath, 'wb')
  163. self._updateperiod = updateperiod
  164. self._fp = None
  165. self._eventgen = None
  166. self._jobid = 0
  167. self._histogram = {} # { HTTPStatus -> absolute_frequency }
  168. self._timingstats = {} # { timingtag -> StatValue }
  169. self._rps = StatValue()
  170. self._cps = StatValue() # "concurrency-per-second"
  171. self._roundtrip = StatValue()
  172. self._slwin = SlidingWindowSequence(updateperiod+0.5)
  173. self._allspans = SpanSequence()
  174. self._tally_time = 0
  175. def next(self):
  176. if self._fp is None:
  177. assert self._eventgen is None, 'Invariant violation: _fp set, but _eventgen is None.'
  178. self._fp = open(self._inpath, 'rb')
  179. self._eventgen = Message.decode_many_from_file(self._fp)
  180. try:
  181. msg = self._eventgen.next().body
  182. except StopIteration:
  183. return False
  184. jobid = self._next_jobid()
  185. self.start(jobid)
  186. self.event(jobid, msg)
  187. self.end(jobid)
  188. return True
  189. def result(self, seq, msgenc):
  190. msg = Message.decode_from_string(msgenc)
  191. msg.headers['seq'] = seq
  192. msg.encode_to_file(self._dumpfile)
  193. self._update_histogram(msg)
  194. self._record_timing_stats(msg)
  195. if time.time() > self._tally_time:
  196. self.print_tally()
  197. def print_tally(self):
  198. totalcount = reduce(lambda a, b: a+b,
  199. self._histogram.values())
  200. print
  201. print " count - frequency - message"
  202. print "------------ --------- ---------------------------------------"
  203. for k, v in sorted(self._histogram.items()):
  204. relfreq = 100.0 * float(v) / float(totalcount)
  205. print ("%12d - %6.2f%% - %s" % (v, relfreq, k))
  206. print
  207. print " timing event - stats"
  208. print "-------------- ---------------------------------------"
  209. for event, stat in sorted(self._timingstats.items()):
  210. print '%14s %s' % (event, stat.format())
  211. print
  212. print "RPS, Concurrency, and Response Time"
  213. print "-----------------------------------"
  214. self._update_timing_stats()
  215. print '%14s %s' % ('rps', self._rps.format())
  216. print '%14s %s' % ('concurrency', self._cps.format())
  217. print '%14s %s' % ('roundtrip', self._roundtrip.format())
  218. self._tally_time = time.time() + self._updateperiod
  219. def main(self):
  220. t = - time.time()
  221. c = - time.clock()
  222. apiary.QueenBee.main(self)
  223. c += time.clock()
  224. t += time.time()
  225. self.print_tally()
  226. print ("Timing: %f process clock, %f wall clock" % (c, t))
  227. def _next_jobid(self):
  228. jobid = str(self._jobid)
  229. self._jobid += 1
  230. return jobid
  231. def _update_histogram(self, msg):
  232. k = msg.body
  233. self._histogram[k] = 1 + self._histogram.get(k, 0)
  234. def _record_timing_stats(self, msg):
  235. timings = msg.headers['timings']
  236. tdict = dict(timings)
  237. connect = tdict['connect']
  238. span = Span(connect, tdict['close'])
  239. self._slwin.insert(span)
  240. self._allspans.insert(span)
  241. for tag, t in timings:
  242. delta = t - connect
  243. self._timingstats.setdefault(tag, StatValue()).sample(delta)
  244. def _update_timing_stats(self):
  245. '''
  246. These stats use a simplistic second-wide bucket histogram.
  247. The concurrency statistic is sampled for every concurrency
  248. count in a given 1-second window throwing away the time
  249. information (even though it is recorded).
  250. Ex: Consider this sequence of connection spans:
  251. [(0.1, 0.9),
  252. (1.4, 1.5),
  253. (2.0, 3.1),
  254. (3.0, 3.1)]
  255. -then the concurrency windows would look a sequence of these
  256. samples (without times):
  257. [[0, 1],
  258. [0, 1],
  259. [1],
  260. [2, 0]]
  261. '''
  262. concvec = list(self._slwin.concurrency_vector())
  263. for window, subseq in self._slwin.as_bins(binwidth=1.0):
  264. self._rps.sample(len(subseq))
  265. for span in subseq:
  266. self._roundtrip.sample(span.magnitude)
  267. while concvec and window.contains(concvec[0][0]):
  268. _, _, q = concvec.pop(0)
  269. self._cps.sample(len(q))
  270. class ProtocolError (Exception):
  271. def __init__(self, tmpl, *args):
  272. Exception.__init__(self, tmpl % args)
  273. # Plugin interface:
  274. queenbee_cls = HTTPQueenBee
  275. workerbee_cls = HTTPWorkerBee
  276. def add_options(parser):
  277. parser.add_option('--host', default='dummy', dest='http_host',
  278. help=("Connect to the target HTTP host."
  279. " The value 'dummy' (which is default)"
  280. " does not connect to any server and behaves"
  281. " as if an HTTP 200 OK response was received"
  282. " for all requests."))
  283. parser.add_option('--port', default=80, dest='http_port', type='int',
  284. help="Connect to the target HTTP port.")
  285. parser.add_option('--dump', default=None, dest='http_dump',
  286. help="Results dump file.")