/gcycsbplot.py
Python | 332 lines | 254 code | 63 blank | 15 comment | 44 complexity | a23622a71f7db90e43d527944297ebb3 MD5 | raw file
- #!/usr/bin/python
- import numpy
- import matplotlib.pyplot as pyplot
- import re
- import sys
- from gcplotter import *
- from datetime import *
- class AbsoluteResults(object):
-
- def __init__(self):
- pass
-
- def getAbsoluteValues(self):
- pass
-
- class TimeseriesResults(object):
- def __init__(self):
- pass
-
- def getTimestamps(self):
- pass
-
- def getValues(self):
- pass
-
- class YCSBLatencyResults(AbsoluteResults):
- READ = 1
- UPDATE = 2
-
- junk_lines = 7
-
- #values = list()
-
- def __init__(self, filename, transaction_type=1, min_bin=0, max_bin=1000):
- super(YCSBLatencyResults, self).__init__()
-
- self.min_bin = min_bin
- self.max_bin = max_bin
-
- self.values = None
- self.uniqueValues = None
-
- f = open(filename, 'r')
- ycsb_file = f.read()
- f.close()
-
- if transaction_type == self.READ:
- regex = "READ],"
- else:
- regex = "UPDATE],"
-
- line_re = re.compile(regex, re.I)
- lines = ycsb_file.split("\n")
- self.data_lines = list()
-
- for line in lines:
- if line_re.search(line):
- self.data_lines.append(line)
-
- #for line in self.data_lines:
- # print line
-
- for i in xrange(0, self.junk_lines + 1):
- self.data_lines.pop(0)
-
- def getPercentileLatency(self, percentile):
- bins = dict()
- total_ops = 0
-
- for line in self.data_lines:
- fields = line.split(", ")
- if fields[1] == ">1000":
- continue
- elif int(fields[1]) < self.min_bin or int(fields[1]) > self.max_bin:
- continue
- latency_bin = int(fields[1])
- latency_count = int(fields[2])
- bins[latency_bin] = latency_count;
- total_ops += latency_count
-
- index = int(math.ceil(percentile * total_ops) + 1)
-
- for b in bins.keys():
- if bins[b] < index:
- index -= bins[b]
- else:
- return b
-
- def processAbsoluteValues(self):
- self.values = list()
- self.uniqueValues = list()
-
- for line in self.data_lines:
- fields = line.split(", ")
- if fields[1] == ">1000":
- val = 1000
- elif int(fields[1]) < self.min_bin or int(fields[1]) > self.max_bin:
- continue
- else:
- val = int(fields[1])
-
- num = int(fields[2])
-
- for i in xrange(0, num):
- self.values.append(val)
-
- self.uniqueValues.append(val)
-
- def getAbsoluteValues(self):
- if self.values == None:
- self.processAbsoluteValues()
-
- return self.values
-
- def getUniqueAbsoluteValues(self):
- if self.uniqueValues == None:
- self.processAbsoluteValues()
-
- return self.uniqueValues
- class YCSBThroughputResults(TimeseriesResults):
- def __init__(self, filename):
- f = open(filename, 'r')
- ycsb_file = f.read()
- f.close()
-
- regex = "ops/sec;"
- line_re = re.compile(regex, re.I)
- lines = ycsb_file.split("\n")
- data_lines = list()
- for line in lines:
- if line_re.search(line):
- data_lines.append(line)
-
- self.timestamps = []
- self.values = []
-
- prev_timestamp = None
- for line in data_lines:
- fields = line.strip().split(" ")
- secs = fields[0]
- throughput = float(fields[4])
- if secs == prev_timestamp:
- continue
- self.timestamps.append(secs)
- self.values.append(throughput)
-
- def getTimestamps(self):
- return self.timestamps
-
- def getValues(self):
- return self.values
- class CDFPlot(GCPlot):
-
- # needs an AbsoluteResults object
- def __init__(self, results=None, label=None, title=None):
- self.results = list()
- self.labels = dict()
-
- if results != None:
- self.addResults(results, label=label)
-
- if title == None:
- self.title = "CDF"
- else:
- self.title = title
-
- def addResults(self, results, label=None):
- self.results.append(results)
- if label != None:
- self.labels[results] = label
- else:
- self.labels[results] = None
-
- def doPlot(self):
-
- ylim=1.02
- for results in self.results:
- print "%s:" % self.labels[results]
- f = open("%s.cdf" % self.labels[results], "w")
- try:
- values = results.getAbsoluteValues()
- uniqueValues = results.getUniqueAbsoluteValues()
- n = len(values)
- x = numpy.repeat(values, 2)
- #x = numpy.repeat(values, 1)
- y = numpy.hstack([0.0, numpy.repeat(numpy.arange(1,n) / float(n), 2), 1.0])
- for i in xrange(0, len(x)-1):
- if x[i+1] > x[i]:
- f.write("%d %f\n" % (x[i], y[i]))
- except Exception as inst:
- print inst
- f.close()
-
- pyplot.plot(x, y, label=self.labels[results], linewidth=1)
- pyplot.ylim([0.0, ylim])
- pyplot.yticks(numpy.arange(0, ylim, step=0.05))
- step=len(uniqueValues)/20
- pyplot.xticks(numpy.arange(0, max(values), step=step))#max(values)/20))
-
- pyplot.legend(bbox_to_anchor=(1.0, 1.0), loc=1, ncol=1, borderaxespad=0.0)
- pyplot.title(self.title)
- pyplot.grid(True)
- class HistogramPlot(GCPlot):
-
- # needs an AbsoluteResults object
- def __init__(self, results, label=None, title=None):
- self.results = results
- self.label = label
-
- if title == None:
- self.title = "Histogram"
- else:
- self.title = title
-
- def doPlot(self):
- pyplot.hist(self.results.getAbsoluteValues(), self.results.getUniqueAbsoluteValues())
- pyplot.title(self.title)
- class SimplePlot(GCPlot):
- # Needs a TimeseriesResults object
- def __init__(self, title=None, results=None, label=None, ylim=None):
- if title == None:
- self.title = "Simple Plot"
- else:
- self.title = title
-
- self.data = []
- if results != None:
- addResults(results, label, ylim)
-
- def addResults(self, results, label=None, ylim=None):
- data = (results, label, ylim)
- self.data.append(data)
- def doPlot(self):
- max_ylim = 0
- for d in self.data:
- results, label, ylim = d
- if label == None:
- label = "unlabeled"
-
- timestamps = results.getTimestamps()
- values = results.getValues()
-
- pyplot.plot(timestamps, values, label=label)
- if ylim != None and ylim > max_ylim:
- pyplot.ylim(ymax=ylim)
- max_ylim = ylim
-
- pyplot.legend(bbox_to_anchor=(1.0, 1.0), loc=1, ncol=1, borderaxespad=0.0)
- pyplot.title(self.title)
- pyplot.grid(True)
- if __name__ == "__main__":
- max_bin=317
- plotter = GCPlotter(xsize=20, ysize=10)
- #results_all = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.updateonly.updateonly-all.0", transaction_type=YCSBResults.UPDATE, max_bin=500)
- #results_some= YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.updateonly.updateonly-some.0", transaction_type=YCSBResults.UPDATE, max_bin=500)
- #results_min = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.updateonly.updateonly-min.0", transaction_type=YCSBResults.UPDATE, max_bin=500)
- #results_all = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.allnodes-minconn.0", transaction_type=YCSBResults.READ)
- #results_some= YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.somenodes-minconn.0", transaction_type=YCSBResults.READ)
- #results_min = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.minnodes-minconn.0", transaction_type=YCSBResults.READ)
- '''
- results_all_ONE = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.all-nodes.ONE.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_some_ONE = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.some-nodes.ONE.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_min_ONE = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.min-nodes.ONE.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_all_QUORUM = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.all-nodes.QUORUM.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_some_QUORUM = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.some-nodes.QUORUM.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_min_QUORUM = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.min-nodes.QUORUM.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
-
- results_all_ALL = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.all-nodes.ALL.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_some_ALL = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.some-nodes.ALL.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
- results_min_ALL = YCSBResults("/home/wkatsak/wonko_home/YCSB/results/latency.readonly.min-nodes.ALL.0", transaction_type=YCSBResults.READ, max_bin=max_bin)
-
- cdf = CDFPlot(title="Read Latencies")
- cdf2 = CDFPlot(title="Read Latencies")
- cdf3 = CDFPlot(title="Read Latencies")
-
- cdf.addResults(results_all_ONE, label="30 Servers (ONE)")
- cdf.addResults(results_some_ONE, label="20 Servers (ONE)")
- cdf.addResults(results_min_ONE, label="10 Servers (ONE)")
-
- cdf.addResults(results_all_QUORUM, label="30 Servers (QUORUM)")
- cdf.addResults(results_some_QUORUM, label="20 Servers (QUORUM)")
- cdf.addResults(results_min_QUORUM, label="10 Servers (QUORUM)")
-
- cdf.addResults(results_all_ALL, label="30 Servers (ALL)")
- cdf.addResults(results_some_ALL, label="20 Servers (ALL)")
- cdf.addResults(results_min_ALL, label="10 Servers (ALL)")
-
- #hist_all = HistogramPlot(results_all, title="30 Servers")
- #hist_some = HistogramPlot(results_some, title="20 Servers")
- #hist_min = HistogramPlot(results_min, title="10 Servers")
-
- plotter.addPlot(cdf)
- #plotter.addPlot(cdf2)
- #plotter.addPlot(cdf3)
- #plotter.addPlot(hist_all)
- #plotter.addPlot(hist_some)
- #plotter.addPlot(hist_min)
-
- plotter.doPlot()
-
- '''
- base = sys.argv[1]
-
- targets = [3000, 5500]
- latencies = [15, 21]
-
- cdf = CDFPlot()
-
- for t in targets:
- for l in latencies:
- directory = "%s-%d-%d" % (base, t, l)
- filename = "%s/ycsb.latency_output.crypt11" % directory
- results = YCSBLatencyResults(filename, transaction_type=YCSBLatencyResults.READ, max_bin=1000)
- cdf.addResults(results, label="%d-%d" % (t, l))
-
- plotter.addPlot(cdf)
- plotter.doPlot()
-