PageRenderTime 47ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/gcycsbplot.py

https://gitlab.com/abushoeb/gc-v1
Python | 332 lines | 254 code | 63 blank | 15 comment | 44 complexity | a23622a71f7db90e43d527944297ebb3 MD5 | raw file
  1. #!/usr/bin/python
  2. import numpy
  3. import matplotlib.pyplot as pyplot
  4. import re
  5. import sys
  6. from gcplotter import *
  7. from datetime import *
  8. class AbsoluteResults(object):
  9. def __init__(self):
  10. pass
  11. def getAbsoluteValues(self):
  12. pass
  13. class TimeseriesResults(object):
  14. def __init__(self):
  15. pass
  16. def getTimestamps(self):
  17. pass
  18. def getValues(self):
  19. pass
  20. class YCSBLatencyResults(AbsoluteResults):
  21. READ = 1
  22. UPDATE = 2
  23. junk_lines = 7
  24. #values = list()
  25. def __init__(self, filename, transaction_type=1, min_bin=0, max_bin=1000):
  26. super(YCSBLatencyResults, self).__init__()
  27. self.min_bin = min_bin
  28. self.max_bin = max_bin
  29. self.values = None
  30. self.uniqueValues = None
  31. f = open(filename, 'r')
  32. ycsb_file = f.read()
  33. f.close()
  34. if transaction_type == self.READ:
  35. regex = "READ],"
  36. else:
  37. regex = "UPDATE],"
  38. line_re = re.compile(regex, re.I)
  39. lines = ycsb_file.split("\n")
  40. self.data_lines = list()
  41. for line in lines:
  42. if line_re.search(line):
  43. self.data_lines.append(line)
  44. #for line in self.data_lines:
  45. # print line
  46. for i in xrange(0, self.junk_lines + 1):
  47. self.data_lines.pop(0)
  48. def getPercentileLatency(self, percentile):
  49. bins = dict()
  50. total_ops = 0
  51. for line in self.data_lines:
  52. fields = line.split(", ")
  53. if fields[1] == ">1000":
  54. continue
  55. elif int(fields[1]) < self.min_bin or int(fields[1]) > self.max_bin:
  56. continue
  57. latency_bin = int(fields[1])
  58. latency_count = int(fields[2])
  59. bins[latency_bin] = latency_count;
  60. total_ops += latency_count
  61. index = int(math.ceil(percentile * total_ops) + 1)
  62. for b in bins.keys():
  63. if bins[b] < index:
  64. index -= bins[b]
  65. else:
  66. return b
  67. def processAbsoluteValues(self):
  68. self.values = list()
  69. self.uniqueValues = list()
  70. for line in self.data_lines:
  71. fields = line.split(", ")
  72. if fields[1] == ">1000":
  73. val = 1000
  74. elif int(fields[1]) < self.min_bin or int(fields[1]) > self.max_bin:
  75. continue
  76. else:
  77. val = int(fields[1])
  78. num = int(fields[2])
  79. for i in xrange(0, num):
  80. self.values.append(val)
  81. self.uniqueValues.append(val)
  82. def getAbsoluteValues(self):
  83. if self.values == None:
  84. self.processAbsoluteValues()
  85. return self.values
  86. def getUniqueAbsoluteValues(self):
  87. if self.uniqueValues == None:
  88. self.processAbsoluteValues()
  89. return self.uniqueValues
  90. class YCSBThroughputResults(TimeseriesResults):
  91. def __init__(self, filename):
  92. f = open(filename, 'r')
  93. ycsb_file = f.read()
  94. f.close()
  95. regex = "ops/sec;"
  96. line_re = re.compile(regex, re.I)
  97. lines = ycsb_file.split("\n")
  98. data_lines = list()
  99. for line in lines:
  100. if line_re.search(line):
  101. data_lines.append(line)
  102. self.timestamps = []
  103. self.values = []
  104. prev_timestamp = None
  105. for line in data_lines:
  106. fields = line.strip().split(" ")
  107. secs = fields[0]
  108. throughput = float(fields[4])
  109. if secs == prev_timestamp:
  110. continue
  111. self.timestamps.append(secs)
  112. self.values.append(throughput)
  113. def getTimestamps(self):
  114. return self.timestamps
  115. def getValues(self):
  116. return self.values
  117. class CDFPlot(GCPlot):
  118. # needs an AbsoluteResults object
  119. def __init__(self, results=None, label=None, title=None):
  120. self.results = list()
  121. self.labels = dict()
  122. if results != None:
  123. self.addResults(results, label=label)
  124. if title == None:
  125. self.title = "CDF"
  126. else:
  127. self.title = title
  128. def addResults(self, results, label=None):
  129. self.results.append(results)
  130. if label != None:
  131. self.labels[results] = label
  132. else:
  133. self.labels[results] = None
  134. def doPlot(self):
  135. ylim=1.02
  136. for results in self.results:
  137. print "%s:" % self.labels[results]
  138. f = open("%s.cdf" % self.labels[results], "w")
  139. try:
  140. values = results.getAbsoluteValues()
  141. uniqueValues = results.getUniqueAbsoluteValues()
  142. n = len(values)
  143. x = numpy.repeat(values, 2)
  144. #x = numpy.repeat(values, 1)
  145. y = numpy.hstack([0.0, numpy.repeat(numpy.arange(1,n) / float(n), 2), 1.0])
  146. for i in xrange(0, len(x)-1):
  147. if x[i+1] > x[i]:
  148. f.write("%d %f\n" % (x[i], y[i]))
  149. except Exception as inst:
  150. print inst
  151. f.close()
  152. pyplot.plot(x, y, label=self.labels[results], linewidth=1)
  153. pyplot.ylim([0.0, ylim])
  154. pyplot.yticks(numpy.arange(0, ylim, step=0.05))
  155. step=len(uniqueValues)/20
  156. pyplot.xticks(numpy.arange(0, max(values), step=step))#max(values)/20))
  157. pyplot.legend(bbox_to_anchor=(1.0, 1.0), loc=1, ncol=1, borderaxespad=0.0)
  158. pyplot.title(self.title)
  159. pyplot.grid(True)
  160. class HistogramPlot(GCPlot):
  161. # needs an AbsoluteResults object
  162. def __init__(self, results, label=None, title=None):
  163. self.results = results
  164. self.label = label
  165. if title == None:
  166. self.title = "Histogram"
  167. else:
  168. self.title = title
  169. def doPlot(self):
  170. pyplot.hist(self.results.getAbsoluteValues(), self.results.getUniqueAbsoluteValues())
  171. pyplot.title(self.title)
  172. class SimplePlot(GCPlot):
  173. # Needs a TimeseriesResults object
  174. def __init__(self, title=None, results=None, label=None, ylim=None):
  175. if title == None:
  176. self.title = "Simple Plot"
  177. else:
  178. self.title = title
  179. self.data = []
  180. if results != None:
  181. addResults(results, label, ylim)
  182. def addResults(self, results, label=None, ylim=None):
  183. data = (results, label, ylim)
  184. self.data.append(data)
  185. def doPlot(self):
  186. max_ylim = 0
  187. for d in self.data:
  188. results, label, ylim = d
  189. if label == None:
  190. label = "unlabeled"
  191. timestamps = results.getTimestamps()
  192. values = results.getValues()
  193. pyplot.plot(timestamps, values, label=label)
  194. if ylim != None and ylim > max_ylim:
  195. pyplot.ylim(ymax=ylim)
  196. max_ylim = ylim
  197. pyplot.legend(bbox_to_anchor=(1.0, 1.0), loc=1, ncol=1, borderaxespad=0.0)
  198. pyplot.title(self.title)
  199. pyplot.grid(True)
  200. if __name__ == "__main__":
  201. max_bin=317
  202. plotter = GCPlotter(xsize=20, ysize=10)
  203. #results_all = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.updateonly.updateonly-all.0", transaction_type=YCSBResults.UPDATE, max_bin=500)
  204. #results_some= YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.updateonly.updateonly-some.0", transaction_type=YCSBResults.UPDATE, max_bin=500)
  205. #results_min = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.updateonly.updateonly-min.0", transaction_type=YCSBResults.UPDATE, max_bin=500)
  206. #results_all = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.allnodes-minconn.0", transaction_type=YCSBResults.READ)
  207. #results_some= YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.somenodes-minconn.0", transaction_type=YCSBResults.READ)
  208. #results_min = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.minnodes-minconn.0", transaction_type=YCSBResults.READ)
  209. '''
  210. results_all_ONE = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.all-nodes.ONE.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  211. results_some_ONE = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.some-nodes.ONE.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  212. results_min_ONE = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.min-nodes.ONE.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  213. results_all_QUORUM = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.all-nodes.QUORUM.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  214. results_some_QUORUM = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.some-nodes.QUORUM.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  215. results_min_QUORUM = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.min-nodes.QUORUM.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  216. results_all_ALL = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.all-nodes.ALL.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  217. results_some_ALL = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.some-nodes.ALL.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  218. results_min_ALL = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.min-nodes.ALL.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
  219. cdf = CDFPlot(title="Read Latencies")
  220. cdf2 = CDFPlot(title="Read Latencies")
  221. cdf3 = CDFPlot(title="Read Latencies")
  222. cdf.addResults(results_all_ONE, label="30 Servers (ONE)")
  223. cdf.addResults(results_some_ONE, label="20 Servers (ONE)")
  224. cdf.addResults(results_min_ONE, label="10 Servers (ONE)")
  225. cdf.addResults(results_all_QUORUM, label="30 Servers (QUORUM)")
  226. cdf.addResults(results_some_QUORUM, label="20 Servers (QUORUM)")
  227. cdf.addResults(results_min_QUORUM, label="10 Servers (QUORUM)")
  228. cdf.addResults(results_all_ALL, label="30 Servers (ALL)")
  229. cdf.addResults(results_some_ALL, label="20 Servers (ALL)")
  230. cdf.addResults(results_min_ALL, label="10 Servers (ALL)")
  231. #hist_all = HistogramPlot(results_all, title="30 Servers")
  232. #hist_some = HistogramPlot(results_some, title="20 Servers")
  233. #hist_min = HistogramPlot(results_min, title="10 Servers")
  234. plotter.addPlot(cdf)
  235. #plotter.addPlot(cdf2)
  236. #plotter.addPlot(cdf3)
  237. #plotter.addPlot(hist_all)
  238. #plotter.addPlot(hist_some)
  239. #plotter.addPlot(hist_min)
  240. plotter.doPlot()
  241. '''
  242. base = sys.argv[1]
  243. targets = [3000, 5500]
  244. latencies = [15, 21]
  245. cdf = CDFPlot()
  246. for t in targets:
  247. for l in latencies:
  248. directory = "%s-%d-%d" % (base, t, l)
  249. filename = "%s/ycsb.latency_output.crypt11" % directory
  250. results = YCSBLatencyResults(filename, transaction_type=YCSBLatencyResults.READ, max_bin=1000)
  251. cdf.addResults(results, label="%d-%d" % (t, l))
  252. plotter.addPlot(cdf)
  253. plotter.doPlot()