PageRenderTime 153ms CodeModel.GetById 118ms app.highlight 29ms RepoModel.GetById 1ms app.codeStats 1ms

/apiary/http/http.py

https://bitbucket.org/lindenlab/apiary/
Python | 341 lines | 244 code | 41 blank | 56 comment | 24 complexity | 85c971c334a5f185c3d3409932d70fb6 MD5 | raw file
  1#
  2# $LicenseInfo:firstyear=2010&license=mit$
  3# 
  4# Copyright (c) 2010, Linden Research, Inc.
  5# 
  6# Permission is hereby granted, free of charge, to any person obtaining a copy
  7# of this software and associated documentation files (the "Software"), to deal
  8# in the Software without restriction, including without limitation the rights
  9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10# copies of the Software, and to permit persons to whom the Software is
 11# furnished to do so, subject to the following conditions:
 12# 
 13# The above copyright notice and this permission notice shall be included in
 14# all copies or substantial portions of the Software.
 15# 
 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22# THE SOFTWARE.
 23# $/LicenseInfo$
 24#
 25
 26from optparse import OptionParser
 27import re
 28import time
 29import socket
 30
 31import apiary
 32from apiary.tools.codec import Message
 33from apiary.tools.timer import Timer
 34from apiary.tools.stattools import StatValue
 35from apiary.tools.span import Span, SpanSequence, SlidingWindowSequence
 36from apiary.tools.debug import debug, traced_method
 37from apiary.tools.dummyfile import DummyFile
 38
 39
 40class HTTPWorkerBee(apiary.WorkerBee):
 41    _fake_results = ['HTTP/1.0 200 OK\r\nSome-Header: some value\r\n\r\n',
 42                     'HTTP/1.0 404 Not Found\r\nAnother-Header: another value\r\n\r\n']
 43
 44    def __init__(self, options, arguments):
 45        apiary.WorkerBee.__init__(self, options, arguments)
 46        self._host = options.http_host
 47        if self._host == 'dummy':
 48            self._host = None
 49        self._port = options.http_port
 50        self._conn = None
 51        # _result is either None or (valid, details)
 52        # valid is a bool specifying whether the HTTP response was successfully parsed.
 53        # If valid is True, details contains the HTTP response.
 54        # If valid is False, details contains a generic error message.
 55        self._result = None
 56        self._timer = None
 57        self._worker_hostname = socket.getfqdn()
 58    
 59    #@traced_method
 60    def start(self):
 61        assert self._conn is None, 'Precondition violation: start called without ending previous session.'
 62        self._record_socket_errors(self._raw_start)
 63        
 64    def _raw_start(self):
 65        self._timer = Timer()
 66        if self._host:
 67            self._conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 68            try:
 69                self._conn.connect((self._host, self._port))
 70            finally:
 71                # Record a 'connect' time regardless of exception since analysis code depends on its presence:
 72                self._timer.event('connect')
 73        
 74    #@traced_method
 75    def event(self, request):
 76        # Precondition:
 77        if self._result is not None:
 78            assert self._result[0] == False, 'Precondition violation: expected failure result; got: %r' % (self._result,)
 79            return # Connection failure.
 80        self._record_socket_errors(self._raw_event, request)
 81        # Postcondition:
 82        assert type(self._result) is not None, `self._result`
 83                
 84    def _raw_event(self, request, bufsize=2**14):
 85        reqlen = len(request)
 86        
 87        if self._host:
 88            assert self._conn, 'Precodingion violation: event called without starting a session.'
 89            self._timer.event('start-send')
 90            while request:
 91                written = self._conn.send(request)
 92                request = request[written:]
 93            self._timer.event('start-recv')
 94            inbuf = ''
 95            bytes = self._conn.recv(bufsize)
 96            i = -1
 97            while bytes and i == -1:
 98                inbuf += bytes
 99                i = inbuf.find('\r\n')
100                bytes = self._conn.recv(bufsize)
101            if i >= 0:
102                response = inbuf[:i]
103                # Read until the socket closes:
104                resplen = len(inbuf)
105                chunk = self._conn.recv(bufsize)
106                while chunk:
107                    resplen += len(chunk)
108                    chunk = self._conn.recv(bufsize)
109                self._set_result_from_http_response(inbuf[:i],
110                                                    reqlen,
111                                                    resplen)
112                self._timer.event('parse-response')
113            if self._result is None:
114                self._timer.event('parse-response-fail')
115                self._error_result('HTTP Response line not found: %r', inbuf[:256])
116            self._timer.event('finish-recv')
117        else:
118            self._set_result_from_http_response(self._fake_results.pop(0))
119            self._fake_results.append(self._result)
120            
121    #@traced_method
122    def end(self):
123        assert self._result is not None, 'Precondition violation: .end() precondition failed, no result.'
124        
125        if self._host and self._conn is not None:
126            self._conn.close()
127            self._timer.event('close')
128            self._conn = None
129        
130        tdict = dict(self._timer.intervals)
131        assert tdict.has_key('connect') and tdict.has_key('close'), 'Postcondition violation, missing timings: %r' % (tdict,)
132
133        validresponse, details = self._result[:2]
134
135        lengthinfo = {}
136        if validresponse:
137            reqlen, resplen = self._result[2:]
138            lengthinfo['request_length'] = reqlen
139            lengthinfo['response_length'] = resplen
140
141        self._result = None
142
143        msg = Message(details,
144                      worker=self._worker_hostname,
145                      valid_response=validresponse,
146                      timings=self._timer.intervals,
147                      **lengthinfo)
148        return msg.encode_to_string()
149
150    def _set_result_from_http_response(self, result, reqlen, resplen):
151        m = self._HTTPStatusPattern.match(result)
152        if m is None:
153            self._result = (False, 'Failed to parse HTTP Response.', reqlen, resplen)
154        else:
155            self._result = (True, result, reqlen, resplen)
156            
157    def _error_result(self, tmpl, *args):
158        self._result = (False, tmpl % args)
159
160    def _record_socket_errors(self, f, *args):
161        try:
162            return f(*args)
163        except socket.error, e:
164            self._timer.event('close')
165            self._conn = None
166            # _result may already be set, for example, if we've parsed a
167            # response and we are in the middle of reading the headers/body.
168            if self._result is None:
169                self._error_result('socket.error: %r', e.args)
170
171    _HTTPStatusPattern = re.compile(r'(HTTP/\d\.\d) (\d{3}) (.*?)$', re.MULTILINE)
172
173
174class HTTPQueenBee(apiary.QueenBee):
175    def __init__(self, options, arguments, updateperiod=5):
176        apiary.QueenBee.__init__(self, options, arguments)
177        try:
178            [self._inpath] = arguments
179        except ValueError, e:
180            raise SystemExit('Usage error: HTTPQueenBee needs an events data file.')
181        dumppath = options.http_dump
182        if dumppath is None:
183            self._dumpfile = DummyFile()
184        else:
185            self._dumpfile = open(dumppath, 'wb')
186            
187        self._updateperiod = updateperiod
188        self._fp = None
189        self._eventgen = None
190        self._jobid = 0
191        self._histogram = {} # { HTTPStatus -> absolute_frequency }
192        self._timingstats = {} # { timingtag -> StatValue }
193        self._rps = StatValue()
194        self._cps = StatValue() # "concurrency-per-second"
195        self._roundtrip = StatValue()
196        self._slwin = SlidingWindowSequence(updateperiod+0.5)
197        self._allspans = SpanSequence()
198        self._tally_time = 0
199        
200    def next(self):
201        if self._fp is None:
202            assert self._eventgen is None, 'Invariant violation: _fp set, but _eventgen is None.'
203            self._fp = open(self._inpath, 'rb')
204            self._eventgen = Message.decode_many_from_file(self._fp)
205        try:
206            msg = self._eventgen.next().body
207        except StopIteration:
208            return False
209
210        jobid = self._next_jobid()
211        self.start(jobid)
212        self.event(jobid, msg)
213        self.end(jobid)
214        return True
215    
216    def result(self, seq, msgenc):
217        msg = Message.decode_from_string(msgenc)
218
219        msg.headers['seq'] = seq
220        msg.encode_to_file(self._dumpfile)
221
222        self._update_histogram(msg)
223        self._record_timing_stats(msg)
224        if time.time() > self._tally_time:
225            self.print_tally()
226
227    def print_tally(self):
228        totalcount = reduce(lambda a, b: a+b,
229                            self._histogram.values())
230        print
231        print "       count - frequency - message"
232        print "------------   ---------   ---------------------------------------"
233        for k, v in sorted(self._histogram.items()):
234            relfreq = 100.0 * float(v) / float(totalcount)
235            print ("%12d -   %6.2f%% - %s" % (v, relfreq, k))
236
237        print
238        print "  timing event - stats"
239        print "--------------   ---------------------------------------"
240        for event, stat in sorted(self._timingstats.items()):
241            print '%14s   %s' % (event, stat.format())
242            
243        print
244        print "RPS, Concurrency, and Response Time"
245        print "-----------------------------------"
246
247        self._update_timing_stats()
248        print '%14s   %s' % ('rps', self._rps.format())
249        print '%14s   %s' % ('concurrency', self._cps.format())
250        print '%14s   %s' % ('roundtrip', self._roundtrip.format())
251 
252        self._tally_time = time.time() + self._updateperiod
253        
254    def main(self):
255        t = - time.time()
256        c = - time.clock()
257        apiary.QueenBee.main(self)
258        c += time.clock()
259        t += time.time()
260
261        self.print_tally()
262        print ("Timing: %f process clock, %f wall clock" % (c, t))
263
264    def _next_jobid(self):
265        jobid = str(self._jobid)
266        self._jobid += 1
267        return jobid
268
269    def _update_histogram(self, msg):
270        k = msg.body
271        self._histogram[k] = 1 + self._histogram.get(k, 0)
272
273    def _record_timing_stats(self, msg):
274        timings = msg.headers['timings']
275        tdict = dict(timings)
276        connect = tdict['connect']
277        span = Span(connect, tdict['close'])
278        self._slwin.insert(span)
279        self._allspans.insert(span)
280                
281        for tag, t in timings:
282            delta = t - connect
283            self._timingstats.setdefault(tag, StatValue()).sample(delta)
284
285    def _update_timing_stats(self):
286        '''
287        These stats use a simplistic second-wide bucket histogram.
288
289        The concurrency statistic is sampled for every concurrency
290        count in a given 1-second window throwing away the time
291        information (even though it is recorded).
292
293        Ex: Consider this sequence of connection spans:
294
295        [(0.1, 0.9),
296         (1.4, 1.5),
297         (2.0, 3.1),
298         (3.0, 3.1)]
299
300        -then the concurrency windows would look a sequence of these
301         samples (without times):
302        [[0, 1],
303         [0, 1],
304         [1],
305         [2, 0]]
306        '''
307        concvec = list(self._slwin.concurrency_vector())
308
309        for window, subseq in self._slwin.as_bins(binwidth=1.0):
310            self._rps.sample(len(subseq))
311            for span in subseq:
312                self._roundtrip.sample(span.magnitude)
313            while concvec and window.contains(concvec[0][0]):
314                _, _, q = concvec.pop(0)
315                self._cps.sample(len(q))
316        
317
318class ProtocolError (Exception):
319    def __init__(self, tmpl, *args):
320        Exception.__init__(self, tmpl % args)
321
322
323# Plugin interface:
324queenbee_cls = HTTPQueenBee
325workerbee_cls = HTTPWorkerBee
326
327
328def add_options(parser):
329    parser.add_option('--host', default='dummy', dest='http_host',
330                        help=("Connect to the target HTTP host."
331                              " The value 'dummy' (which is default)"
332                              " does not connect to any server and behaves"
333                              " as if an HTTP 200 OK response was received"
334                              " for all requests."))
335
336    parser.add_option('--port', default=80, dest='http_port', type='int',
337                        help="Connect to the target HTTP port.")
338
339    parser.add_option('--dump', default=None, dest='http_dump',
340                        help="Results dump file.")
341