PageRenderTime 85ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/src/contrib/hedwig/scripts/analyze.py

https://github.com/ravidontharaju/zookeeper
Python | 201 lines | 155 code | 21 blank | 25 comment | 42 complexity | 3407acc0808b052429c22b9300501964 MD5 | raw file
  1. #!/usr/bin/env python
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. from __future__ import with_statement
  17. import sys, os, glob, re, collections, math, subprocess, unittest
  18. from numpy import *
  19. from commons import seqs, startup, strs, structs
  20. import cairo
  21. from pycha.bar import *
  22. def sub():
  23. #
  24. # Parse/aggregate.
  25. #
  26. tputs = collections.defaultdict(list)
  27. lats = collections.defaultdict(list)
  28. for fname in glob.glob('sync-*-count-*-npar-*-rep-*.out'):
  29. m = re.match(r'sync-(\d+)-count-(\d+)-npar-(\d+)-rep-(\d+)\.out', fname)
  30. sync, count, npar, rep = map(int, m.groups())
  31. with file(fname) as f:
  32. m = re.search(r'finished subs, tput = ([\d\.]+) ops/s, avg latency = (\d+)', f.readlines()[-2])
  33. tput, lat = map(float, m.groups())
  34. tputs[sync, count, npar].append(tput)
  35. lats[sync, count, npar].append(lat)
  36. for d in tputs, lats:
  37. for k in d:
  38. d[k] = array(d[k]).mean(), array(d[k]).std()
  39. print k, d[k]
  40. #
  41. # Plot.
  42. #
  43. for title, ylabel, fname, d in [ ('Subscription throughput over three trials', 'Subscriptions per second', 'tput', tputs),
  44. ('Subscription latency over three trials', 'Round-trip time in ms', 'lat', lats) ]:
  45. means = dict((k, v[0]) for k,v in d.iteritems())
  46. sdevs = dict((k, v[1]) for k,v in d.iteritems())
  47. surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 500, 400)
  48. syncs, counts, npars = [ sorted(set(x[i] for x in means))
  49. for i in xrange(3) ]
  50. print syncs, counts, npars
  51. dataset = [ ( '%d topics, %s' %
  52. ( count, 'synchronous' if sync else 'asynchronous' ),
  53. [ ( npar/10, means[sync, count, npar], sdevs[sync, count, npar] )
  54. for npar in npars ] )
  55. for count in counts
  56. for sync in syncs ]
  57. options = {'legend.position':
  58. {'top': None, 'left': None, 'bottom': 100, 'right': 20},
  59. 'axis.x.ticks': [{'v': x, 'label': max(1,10*x)}
  60. for i,(x,y,e) in enumerate(dataset[0][1])],
  61. 'axis.x.label': 'Number of outstanding subscription requests',
  62. 'axis.y.label': ylabel,
  63. 'padding.left': 50,
  64. 'title': title,
  65. 'background.color': '#f0f0f0'}
  66. chart = VerticalBarChart(surface, structs.sparse_dict(options))
  67. chart.addDataset(dataset)
  68. chart.render()
  69. surface.write_to_png(fname + '.png')
  70. def pub():
  71. def helper(do_pubs):
  72. def subhelper(keyname, pat, xlabel):
  73. #
  74. # Parse/aggregate.
  75. #
  76. print 'Analyzing', keyname, 'for', 'publishers' if do_pubs else 'receivers'
  77. print '========================'
  78. print
  79. tputs = collections.defaultdict(list)
  80. lats = collections.defaultdict(list)
  81. fnames = [ ( fname, tuple(map(int, m.groups())) )
  82. for fname, m in filter( lambda m: m[1] is not None,
  83. ( ( fname, re.match(pat, fname) )
  84. for fname in os.listdir('.') ) ) ]
  85. tup2fname = dict( (tup, fname) for fname, tup in fnames )
  86. keys, reps, nodes = map(lambda xs: sorted(set(xs)),
  87. zip(*(tup for fname, tup in fnames)))
  88. raw_table = []
  89. print '== raw data =='
  90. raw_table.append( [ keyname, 'rep', 'node', 'tput' ] + ( ['lat'] if do_pubs else [] ) + ['sum/mean tput', 'mean lat'] )
  91. for key in keys:
  92. for rep in reps:
  93. tmptputs = []
  94. tmplats = []
  95. for node in nodes:
  96. if (key, rep, node) in tup2fname:
  97. with file(tup2fname[key, rep, node]) as f:
  98. try:
  99. if do_pubs:
  100. m = re.search(r'finished acked pubs, tput = ([\d\.]+) ops/s, avg latency = (\d+)', f.readlines()[-2])
  101. tput, lat = map(float, m.groups())
  102. else:
  103. m = re.search(r'finished recvs, tput = ([\d\.]+) ops/s', f.read())
  104. [tput] = map(float, m.groups())
  105. except AttributeError:
  106. print >> sys.stderr, "While processing", tup2fname[key, rep, node]
  107. raise
  108. raw_table.append( [ key, rep, node, tput ] + ( [lat] if do_pubs else [] ) + ['',''] )
  109. tmptputs.append(tput)
  110. if do_pubs: tmplats.append(lat)
  111. if keyname == 'npubs': tputs[key].append(sum(tmptputs))
  112. else: tputs[key].append(array(tmptputs).mean())
  113. if do_pubs: lats[key].append(array(tmplats).mean())
  114. if len(nodes) > 1:
  115. raw_table.append( [''] * (len(raw_table[0]) - 2) + [tputs[key][-1]] + ( [lats[key][-1]] if do_pubs else [] ) )
  116. print strs.show_table_by_rows(raw_table)
  117. print
  118. print '== aggregated over reps =='
  119. agg_table = []
  120. agg_table.append( ( keyname, 'mean', 'sd' ) )
  121. for d in tputs, lats:
  122. for k in d:
  123. d[k] = array(d[k]).mean(), array(d[k]).std()
  124. agg_table.append( ( k, d[k][0], d[k][1] ) )
  125. print strs.show_table_by_rows(agg_table)
  126. print
  127. #
  128. # Plot.
  129. #
  130. if do_pubs:
  131. plots = [ ('Publishing throughput over three trials', 'Publishes per second', '%s-pub-tput' % keyname, tputs),
  132. ('Publishing latency over three trials', 'Round-trip time in ms', '%s-pub-lat' % keyname, lats) ]
  133. else:
  134. plots = [ ('Receiving throughput over three trials', 'Receives per second', '%s-recv-tput' % keyname, tputs) ]
  135. for title, ylabel, fname, d in plots:
  136. means = dict((k, v[0]) for k,v in d.iteritems())
  137. sdevs = dict((k, v[1]) for k,v in d.iteritems())
  138. surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 500, 400)
  139. dataset = [ ( 'main',
  140. [ ( i, means[key], sdevs[key] )
  141. for i, key in enumerate(keys) ] ) ]
  142. options = {'legend.position':
  143. {'top': None, 'left': None, 'bottom': 100, 'right': 20},
  144. 'axis.x.ticks': [{'v': x, 'label': k}
  145. for k,(x,y,e) in zip(keys, dataset[0][1])],
  146. 'axis.x.label': xlabel,
  147. 'axis.y.label': ylabel,
  148. 'padding.left': 50,
  149. 'title': title,
  150. 'background.color': '#f0f0f0'}
  151. chart = VerticalBarChart(surface, structs.sparse_dict(options))
  152. chart.addDataset(dataset)
  153. chart.render()
  154. surface.write_to_png(fname + '.png')
  155. print
  156. print
  157. print
  158. nodetype = 'pub' if do_pubs else 'recv'
  159. mode_npar = seqs.mode(
  160. int(m.group(1)) for m in
  161. [re.search('npar-(\d+)', fname) for fname in os.listdir('.')]
  162. if m is not None )
  163. subhelper('nrecvs', 'nrecvs-(\d+)-npubs-1-npar-%s-rep-(\d+)-%s-(\d+)' % (mode_npar, nodetype),
  164. 'Number of receivers')
  165. subhelper('npubs', 'nrecvs-1-npubs-(\d+)-npar-%s-rep-(\d+)-%s-(\d+)' % (mode_npar, nodetype),
  166. 'Number of publishers')
  167. subhelper('npar', 'nrecvs-1-npubs-1-npar-(\d+)-rep-(\d+)-%s-(0)' % nodetype,
  168. 'Number of outstanding publish requests')
  169. helper(True)
  170. helper(False)
  171. def main(argv):
  172. if argv[1] == 'sub': sub()
  173. elif argv[1] == 'pub': pub()
  174. else: return unittest.main()
  175. startup.run_main()
  176. # vim: et sw=2 ts=2