PageRenderTime 33ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/mysql_watcher/io_parser

https://bitbucket.org/lindenlab/apiary/
Python | 188 lines | 135 code | 9 blank | 44 comment | 7 complexity | d8eb2625b0019e2ccce9107e00c0097a MD5 | raw file
  1. #!/usr/bin/env python
  2. #
  3. # $LicenseInfo:firstyear=2010&license=mit$
  4. #
  5. # Copyright (c) 2010, Linden Research, Inc.
  6. #
  7. # Permission is hereby granted, free of charge, to any person obtaining a copy
  8. # of this software and associated documentation files (the "Software"), to deal
  9. # in the Software without restriction, including without limitation the rights
  10. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. # copies of the Software, and to permit persons to whom the Software is
  12. # furnished to do so, subject to the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be included in
  15. # all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. # THE SOFTWARE.
  24. # $/LicenseInfo$
  25. #
  26. #
  27. # Script that processes iostat data, and generates output compatible with the io_plot gnuplot script in this directory.
  28. #
  29. # Usage: Use this script to process raw iostat output, and then take the output and use it as source data
  30. # for the gnuplot script.
  31. #
  32. # ./io_parser < iostat_raw.txt > io.txt
  33. # gnuplot io_plot
  34. #
  35. import time
  36. import sys
  37. import re
  38. def rolling_average(rows):
  39. # Average data over a window. Not currently used.
  40. smooth_rows = []
  41. bin_size = 15
  42. offset = int(bin_size/2)
  43. for i in range(0, len(rows) - bin_size):
  44. s = 0
  45. for j in range(0, bin_size):
  46. s += rows[i+j][1]
  47. avg = s / bin_size
  48. t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(rows[i+offset][0]))
  49. print "%s\t%f" % (t, avg)
  50. def avg_output(minute_stats):
  51. # Dumps output with a weighted average (throws out low and high sample points).
  52. # Probably should do something like a median value instead...
  53. # Get the sorted keys for all sample points.
  54. # Note that the keys are (hour, minute) tuples
  55. keys = minute_stats.keys()
  56. keys.sort()
  57. for key in keys:
  58. minute_rows = minute_stats[key]
  59. total = 0.0
  60. count = len(minute_rows)
  61. width = len(minute_rows[0])
  62. low = [100000.0]*width
  63. high = [-100000.0]*width
  64. total = [0.0]*width
  65. weighted = [0.0]*width
  66. for row in minute_rows:
  67. # Iterate throw all rows
  68. for i in range(1, width):
  69. # Find sum and min/max for all samples for that minute
  70. val = row[i]
  71. low[i] = min(low[i], val)
  72. high[i] = max(high[i], val)
  73. total[i] += row[i]
  74. for i in range(1,width):
  75. # Generate a weighted average, throwing out the low and high values.
  76. weighted[i] = (total[i] - low[i] - high[i])/(count-2)
  77. # Dump this in a formate compatible with the io_plot gnuplot script
  78. print "%d:%d" % (key[0], key[1]),
  79. for i in range(1, width):
  80. print "%f" % weighted[i],
  81. print
  82. def main():
  83. # Process iostat input from stdin
  84. first = 1
  85. line = ""
  86. # Grab the first line, it includes the date.
  87. line = sys.stdin.readline()
  88. parts = line.split()
  89. date_str = parts[3]
  90. # Skip to the first timestamp
  91. while 1:
  92. line = sys.stdin.readline()
  93. if -1 != line.find("Time:"):
  94. break
  95. rows = []
  96. time_re = re.compile("Time:\s+(\d+):(\d+):(\d+)\n")
  97. # Line should point to the first timestamp
  98. # Start our loop
  99. # Track this information so we can do day rollover (sigh)
  100. last_day = time.mktime(time.strptime(date_str, "%m/%d/%y"))
  101. last_seconds = 0
  102. # For collecting column headers
  103. headers = []
  104. while 1:
  105. # Iterate through output lines
  106. # Figure out the actual time of this data by doing date math by hand (sigh)
  107. # Pull out timestamp
  108. m = time_re.match(line)
  109. if not m:
  110. break
  111. cur_seconds = int(m.group(1))*3600+int(m.group(2))*60+int(m.group(3))
  112. if last_seconds > cur_seconds:
  113. # Must be a new day, increment the day
  114. last_day += 24*3600
  115. last_seconds = cur_seconds
  116. time_secs = last_day + last_seconds
  117. # Skip CPU lines
  118. while 1:
  119. line = sys.stdin.readline()
  120. if -1 != line.find("Device:"):
  121. if first:
  122. headers.extend(line.split())
  123. # Figure out all the column headers
  124. first = 0
  125. break
  126. #i = 0
  127. #for i in range(0, len(headers)):
  128. # print i, headers[i]
  129. # Parse out columns of data, with the first column being the true time in seconds
  130. cols = []
  131. cols.append(time_secs)
  132. while 1:
  133. line = sys.stdin.readline()
  134. if line == "\n":
  135. break
  136. parts = line.split()
  137. dev = parts[0]
  138. # Only pull out % IO, reads/sec and writes/sec for now
  139. if dev == "sdb":
  140. cols.append(float(parts[13]))
  141. cols.append(float(parts[3]))
  142. cols.append(float(parts[4]))
  143. else:
  144. pass
  145. rows.append(cols)
  146. line = sys.stdin.readline()
  147. # We've got all the data, now let's collect aggregated stats
  148. # We generate lists of samples for each minute of the day.
  149. minute_stats = {}
  150. for row in rows:
  151. # Pull out the hour and minute to use as the key for the data.
  152. lt = time.localtime(row[0])
  153. h = lt.tm_hour
  154. m = lt.tm_min
  155. key = (h,m)
  156. # Create a new entry if we don't have one, and append the sample
  157. if not key in minute_stats:
  158. minute_stats[key] = []
  159. minute_stats[key].append(row)
  160. # Output the data
  161. avg_output(minute_stats)
  162. #rolling_average(rows)
  163. main()