/mysql_watcher/io_parser
Python | 188 lines | 135 code | 9 blank | 44 comment | 7 complexity | d8eb2625b0019e2ccce9107e00c0097a MD5 | raw file
- #!/usr/bin/env python
- #
- # $LicenseInfo:firstyear=2010&license=mit$
- #
- # Copyright (c) 2010, Linden Research, Inc.
- #
- # Permission is hereby granted, free of charge, to any person obtaining a copy
- # of this software and associated documentation files (the "Software"), to deal
- # in the Software without restriction, including without limitation the rights
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- # copies of the Software, and to permit persons to whom the Software is
- # furnished to do so, subject to the following conditions:
- #
- # The above copyright notice and this permission notice shall be included in
- # all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- # THE SOFTWARE.
- # $/LicenseInfo$
- #
- #
- # Script that processes iostat data, and generates output compatible with the io_plot gnuplot script in this directory.
- #
- # Usage: Use this script to process raw iostat output, and then take the output and use it as source data
- # for the gnuplot script.
- #
- # ./io_parser < iostat_raw.txt > io.txt
- # gnuplot io_plot
- #
- import time
- import sys
- import re
- def rolling_average(rows):
- # Average data over a window. Not currently used.
- smooth_rows = []
- bin_size = 15
- offset = int(bin_size/2)
- for i in range(0, len(rows) - bin_size):
- s = 0
- for j in range(0, bin_size):
- s += rows[i+j][1]
- avg = s / bin_size
- t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(rows[i+offset][0]))
- print "%s\t%f" % (t, avg)
- def avg_output(minute_stats):
- # Dumps output with a weighted average (throws out low and high sample points).
- # Probably should do something like a median value instead...
- # Get the sorted keys for all sample points.
- # Note that the keys are (hour, minute) tuples
- keys = minute_stats.keys()
- keys.sort()
- for key in keys:
- minute_rows = minute_stats[key]
- total = 0.0
- count = len(minute_rows)
- width = len(minute_rows[0])
- low = [100000.0]*width
- high = [-100000.0]*width
- total = [0.0]*width
- weighted = [0.0]*width
- for row in minute_rows:
- # Iterate throw all rows
- for i in range(1, width):
- # Find sum and min/max for all samples for that minute
- val = row[i]
- low[i] = min(low[i], val)
- high[i] = max(high[i], val)
- total[i] += row[i]
- for i in range(1,width):
- # Generate a weighted average, throwing out the low and high values.
- weighted[i] = (total[i] - low[i] - high[i])/(count-2)
- # Dump this in a formate compatible with the io_plot gnuplot script
- print "%d:%d" % (key[0], key[1]),
- for i in range(1, width):
- print "%f" % weighted[i],
- print
- def main():
- # Process iostat input from stdin
- first = 1
- line = ""
- # Grab the first line, it includes the date.
- line = sys.stdin.readline()
- parts = line.split()
- date_str = parts[3]
- # Skip to the first timestamp
- while 1:
- line = sys.stdin.readline()
- if -1 != line.find("Time:"):
- break
- rows = []
- time_re = re.compile("Time:\s+(\d+):(\d+):(\d+)\n")
- # Line should point to the first timestamp
- # Start our loop
- # Track this information so we can do day rollover (sigh)
- last_day = time.mktime(time.strptime(date_str, "%m/%d/%y"))
- last_seconds = 0
- # For collecting column headers
- headers = []
- while 1:
- # Iterate through output lines
-
- # Figure out the actual time of this data by doing date math by hand (sigh)
- # Pull out timestamp
- m = time_re.match(line)
- if not m:
- break
- cur_seconds = int(m.group(1))*3600+int(m.group(2))*60+int(m.group(3))
- if last_seconds > cur_seconds:
- # Must be a new day, increment the day
- last_day += 24*3600
- last_seconds = cur_seconds
- time_secs = last_day + last_seconds
- # Skip CPU lines
- while 1:
- line = sys.stdin.readline()
- if -1 != line.find("Device:"):
- if first:
- headers.extend(line.split())
- # Figure out all the column headers
- first = 0
- break
- #i = 0
- #for i in range(0, len(headers)):
- # print i, headers[i]
- # Parse out columns of data, with the first column being the true time in seconds
- cols = []
- cols.append(time_secs)
- while 1:
- line = sys.stdin.readline()
- if line == "\n":
- break
- parts = line.split()
- dev = parts[0]
- # Only pull out % IO, reads/sec and writes/sec for now
- if dev == "sdb":
- cols.append(float(parts[13]))
- cols.append(float(parts[3]))
- cols.append(float(parts[4]))
- else:
- pass
- rows.append(cols)
- line = sys.stdin.readline()
- # We've got all the data, now let's collect aggregated stats
- # We generate lists of samples for each minute of the day.
- minute_stats = {}
- for row in rows:
- # Pull out the hour and minute to use as the key for the data.
- lt = time.localtime(row[0])
- h = lt.tm_hour
- m = lt.tm_min
- key = (h,m)
- # Create a new entry if we don't have one, and append the sample
- if not key in minute_stats:
- minute_stats[key] = []
- minute_stats[key].append(row)
- # Output the data
- avg_output(minute_stats)
- #rolling_average(rows)
- main()