/apiary/http/http.py
Python | 341 lines | 244 code | 41 blank | 56 comment | 24 complexity | 85c971c334a5f185c3d3409932d70fb6 MD5 | raw file
1# 2# $LicenseInfo:firstyear=2010&license=mit$ 3# 4# Copyright (c) 2010, Linden Research, Inc. 5# 6# Permission is hereby granted, free of charge, to any person obtaining a copy 7# of this software and associated documentation files (the "Software"), to deal 8# in the Software without restriction, including without limitation the rights 9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10# copies of the Software, and to permit persons to whom the Software is 11# furnished to do so, subject to the following conditions: 12# 13# The above copyright notice and this permission notice shall be included in 14# all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22# THE SOFTWARE. 23# $/LicenseInfo$ 24# 25 26from optparse import OptionParser 27import re 28import time 29import socket 30 31import apiary 32from apiary.tools.codec import Message 33from apiary.tools.timer import Timer 34from apiary.tools.stattools import StatValue 35from apiary.tools.span import Span, SpanSequence, SlidingWindowSequence 36from apiary.tools.debug import debug, traced_method 37from apiary.tools.dummyfile import DummyFile 38 39 40class HTTPWorkerBee(apiary.WorkerBee): 41 _fake_results = ['HTTP/1.0 200 OK\r\nSome-Header: some value\r\n\r\n', 42 'HTTP/1.0 404 Not Found\r\nAnother-Header: another value\r\n\r\n'] 43 44 def __init__(self, options, arguments): 45 apiary.WorkerBee.__init__(self, options, arguments) 46 self._host = options.http_host 47 if self._host == 'dummy': 48 self._host = None 49 self._port = options.http_port 50 self._conn = None 51 # _result is either None or (valid, details) 52 # valid is a bool specifying whether the HTTP response was successfully parsed. 53 # If valid is True, details contains the HTTP response. 54 # If valid is False, details contains a generic error message. 55 self._result = None 56 self._timer = None 57 self._worker_hostname = socket.getfqdn() 58 59 #@traced_method 60 def start(self): 61 assert self._conn is None, 'Precondition violation: start called without ending previous session.' 62 self._record_socket_errors(self._raw_start) 63 64 def _raw_start(self): 65 self._timer = Timer() 66 if self._host: 67 self._conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 68 try: 69 self._conn.connect((self._host, self._port)) 70 finally: 71 # Record a 'connect' time regardless of exception since analysis code depends on its presence: 72 self._timer.event('connect') 73 74 #@traced_method 75 def event(self, request): 76 # Precondition: 77 if self._result is not None: 78 assert self._result[0] == False, 'Precondition violation: expected failure result; got: %r' % (self._result,) 79 return # Connection failure. 80 self._record_socket_errors(self._raw_event, request) 81 # Postcondition: 82 assert type(self._result) is not None, `self._result` 83 84 def _raw_event(self, request, bufsize=2**14): 85 reqlen = len(request) 86 87 if self._host: 88 assert self._conn, 'Precodingion violation: event called without starting a session.' 89 self._timer.event('start-send') 90 while request: 91 written = self._conn.send(request) 92 request = request[written:] 93 self._timer.event('start-recv') 94 inbuf = '' 95 bytes = self._conn.recv(bufsize) 96 i = -1 97 while bytes and i == -1: 98 inbuf += bytes 99 i = inbuf.find('\r\n') 100 bytes = self._conn.recv(bufsize) 101 if i >= 0: 102 response = inbuf[:i] 103 # Read until the socket closes: 104 resplen = len(inbuf) 105 chunk = self._conn.recv(bufsize) 106 while chunk: 107 resplen += len(chunk) 108 chunk = self._conn.recv(bufsize) 109 self._set_result_from_http_response(inbuf[:i], 110 reqlen, 111 resplen) 112 self._timer.event('parse-response') 113 if self._result is None: 114 self._timer.event('parse-response-fail') 115 self._error_result('HTTP Response line not found: %r', inbuf[:256]) 116 self._timer.event('finish-recv') 117 else: 118 self._set_result_from_http_response(self._fake_results.pop(0)) 119 self._fake_results.append(self._result) 120 121 #@traced_method 122 def end(self): 123 assert self._result is not None, 'Precondition violation: .end() precondition failed, no result.' 124 125 if self._host and self._conn is not None: 126 self._conn.close() 127 self._timer.event('close') 128 self._conn = None 129 130 tdict = dict(self._timer.intervals) 131 assert tdict.has_key('connect') and tdict.has_key('close'), 'Postcondition violation, missing timings: %r' % (tdict,) 132 133 validresponse, details = self._result[:2] 134 135 lengthinfo = {} 136 if validresponse: 137 reqlen, resplen = self._result[2:] 138 lengthinfo['request_length'] = reqlen 139 lengthinfo['response_length'] = resplen 140 141 self._result = None 142 143 msg = Message(details, 144 worker=self._worker_hostname, 145 valid_response=validresponse, 146 timings=self._timer.intervals, 147 **lengthinfo) 148 return msg.encode_to_string() 149 150 def _set_result_from_http_response(self, result, reqlen, resplen): 151 m = self._HTTPStatusPattern.match(result) 152 if m is None: 153 self._result = (False, 'Failed to parse HTTP Response.', reqlen, resplen) 154 else: 155 self._result = (True, result, reqlen, resplen) 156 157 def _error_result(self, tmpl, *args): 158 self._result = (False, tmpl % args) 159 160 def _record_socket_errors(self, f, *args): 161 try: 162 return f(*args) 163 except socket.error, e: 164 self._timer.event('close') 165 self._conn = None 166 # _result may already be set, for example, if we've parsed a 167 # response and we are in the middle of reading the headers/body. 168 if self._result is None: 169 self._error_result('socket.error: %r', e.args) 170 171 _HTTPStatusPattern = re.compile(r'(HTTP/\d\.\d) (\d{3}) (.*?)$', re.MULTILINE) 172 173 174class HTTPQueenBee(apiary.QueenBee): 175 def __init__(self, options, arguments, updateperiod=5): 176 apiary.QueenBee.__init__(self, options, arguments) 177 try: 178 [self._inpath] = arguments 179 except ValueError, e: 180 raise SystemExit('Usage error: HTTPQueenBee needs an events data file.') 181 dumppath = options.http_dump 182 if dumppath is None: 183 self._dumpfile = DummyFile() 184 else: 185 self._dumpfile = open(dumppath, 'wb') 186 187 self._updateperiod = updateperiod 188 self._fp = None 189 self._eventgen = None 190 self._jobid = 0 191 self._histogram = {} # { HTTPStatus -> absolute_frequency } 192 self._timingstats = {} # { timingtag -> StatValue } 193 self._rps = StatValue() 194 self._cps = StatValue() # "concurrency-per-second" 195 self._roundtrip = StatValue() 196 self._slwin = SlidingWindowSequence(updateperiod+0.5) 197 self._allspans = SpanSequence() 198 self._tally_time = 0 199 200 def next(self): 201 if self._fp is None: 202 assert self._eventgen is None, 'Invariant violation: _fp set, but _eventgen is None.' 203 self._fp = open(self._inpath, 'rb') 204 self._eventgen = Message.decode_many_from_file(self._fp) 205 try: 206 msg = self._eventgen.next().body 207 except StopIteration: 208 return False 209 210 jobid = self._next_jobid() 211 self.start(jobid) 212 self.event(jobid, msg) 213 self.end(jobid) 214 return True 215 216 def result(self, seq, msgenc): 217 msg = Message.decode_from_string(msgenc) 218 219 msg.headers['seq'] = seq 220 msg.encode_to_file(self._dumpfile) 221 222 self._update_histogram(msg) 223 self._record_timing_stats(msg) 224 if time.time() > self._tally_time: 225 self.print_tally() 226 227 def print_tally(self): 228 totalcount = reduce(lambda a, b: a+b, 229 self._histogram.values()) 230 print 231 print " count - frequency - message" 232 print "------------ --------- ---------------------------------------" 233 for k, v in sorted(self._histogram.items()): 234 relfreq = 100.0 * float(v) / float(totalcount) 235 print ("%12d - %6.2f%% - %s" % (v, relfreq, k)) 236 237 print 238 print " timing event - stats" 239 print "-------------- ---------------------------------------" 240 for event, stat in sorted(self._timingstats.items()): 241 print '%14s %s' % (event, stat.format()) 242 243 print 244 print "RPS, Concurrency, and Response Time" 245 print "-----------------------------------" 246 247 self._update_timing_stats() 248 print '%14s %s' % ('rps', self._rps.format()) 249 print '%14s %s' % ('concurrency', self._cps.format()) 250 print '%14s %s' % ('roundtrip', self._roundtrip.format()) 251 252 self._tally_time = time.time() + self._updateperiod 253 254 def main(self): 255 t = - time.time() 256 c = - time.clock() 257 apiary.QueenBee.main(self) 258 c += time.clock() 259 t += time.time() 260 261 self.print_tally() 262 print ("Timing: %f process clock, %f wall clock" % (c, t)) 263 264 def _next_jobid(self): 265 jobid = str(self._jobid) 266 self._jobid += 1 267 return jobid 268 269 def _update_histogram(self, msg): 270 k = msg.body 271 self._histogram[k] = 1 + self._histogram.get(k, 0) 272 273 def _record_timing_stats(self, msg): 274 timings = msg.headers['timings'] 275 tdict = dict(timings) 276 connect = tdict['connect'] 277 span = Span(connect, tdict['close']) 278 self._slwin.insert(span) 279 self._allspans.insert(span) 280 281 for tag, t in timings: 282 delta = t - connect 283 self._timingstats.setdefault(tag, StatValue()).sample(delta) 284 285 def _update_timing_stats(self): 286 ''' 287 These stats use a simplistic second-wide bucket histogram. 288 289 The concurrency statistic is sampled for every concurrency 290 count in a given 1-second window throwing away the time 291 information (even though it is recorded). 292 293 Ex: Consider this sequence of connection spans: 294 295 [(0.1, 0.9), 296 (1.4, 1.5), 297 (2.0, 3.1), 298 (3.0, 3.1)] 299 300 -then the concurrency windows would look a sequence of these 301 samples (without times): 302 [[0, 1], 303 [0, 1], 304 [1], 305 [2, 0]] 306 ''' 307 concvec = list(self._slwin.concurrency_vector()) 308 309 for window, subseq in self._slwin.as_bins(binwidth=1.0): 310 self._rps.sample(len(subseq)) 311 for span in subseq: 312 self._roundtrip.sample(span.magnitude) 313 while concvec and window.contains(concvec[0][0]): 314 _, _, q = concvec.pop(0) 315 self._cps.sample(len(q)) 316 317 318class ProtocolError (Exception): 319 def __init__(self, tmpl, *args): 320 Exception.__init__(self, tmpl % args) 321 322 323# Plugin interface: 324queenbee_cls = HTTPQueenBee 325workerbee_cls = HTTPWorkerBee 326 327 328def add_options(parser): 329 parser.add_option('--host', default='dummy', dest='http_host', 330 help=("Connect to the target HTTP host." 331 " The value 'dummy' (which is default)" 332 " does not connect to any server and behaves" 333 " as if an HTTP 200 OK response was received" 334 " for all requests.")) 335 336 parser.add_option('--port', default=80, dest='http_port', type='int', 337 help="Connect to the target HTTP port.") 338 339 parser.add_option('--dump', default=None, dest='http_dump', 340 help="Results dump file.") 341