/mysql_watcher/io_parser
Python | 188 lines | 135 code | 9 blank | 44 comment | 14 complexity | d8eb2625b0019e2ccce9107e00c0097a MD5 | raw file
1#!/usr/bin/env python 2# 3# $LicenseInfo:firstyear=2010&license=mit$ 4# 5# Copyright (c) 2010, Linden Research, Inc. 6# 7# Permission is hereby granted, free of charge, to any person obtaining a copy 8# of this software and associated documentation files (the "Software"), to deal 9# in the Software without restriction, including without limitation the rights 10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11# copies of the Software, and to permit persons to whom the Software is 12# furnished to do so, subject to the following conditions: 13# 14# The above copyright notice and this permission notice shall be included in 15# all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23# THE SOFTWARE. 24# $/LicenseInfo$ 25# 26 27# 28# Script that processes iostat data, and generates output compatible with the io_plot gnuplot script in this directory. 29# 30# Usage: Use this script to process raw iostat output, and then take the output and use it as source data 31# for the gnuplot script. 32# 33# ./io_parser < iostat_raw.txt > io.txt 34# gnuplot io_plot 35# 36 37import time 38import sys 39import re 40 41def rolling_average(rows): 42 # Average data over a window. Not currently used. 43 smooth_rows = [] 44 bin_size = 15 45 offset = int(bin_size/2) 46 for i in range(0, len(rows) - bin_size): 47 s = 0 48 for j in range(0, bin_size): 49 s += rows[i+j][1] 50 avg = s / bin_size 51 t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(rows[i+offset][0])) 52 print "%s\t%f" % (t, avg) 53 54def avg_output(minute_stats): 55 # Dumps output with a weighted average (throws out low and high sample points). 56 # Probably should do something like a median value instead... 57 58 # Get the sorted keys for all sample points. 59 # Note that the keys are (hour, minute) tuples 60 keys = minute_stats.keys() 61 keys.sort() 62 63 for key in keys: 64 minute_rows = minute_stats[key] 65 total = 0.0 66 count = len(minute_rows) 67 width = len(minute_rows[0]) 68 low = [100000.0]*width 69 high = [-100000.0]*width 70 total = [0.0]*width 71 weighted = [0.0]*width 72 73 for row in minute_rows: 74 # Iterate throw all rows 75 for i in range(1, width): 76 # Find sum and min/max for all samples for that minute 77 val = row[i] 78 low[i] = min(low[i], val) 79 high[i] = max(high[i], val) 80 total[i] += row[i] 81 for i in range(1,width): 82 # Generate a weighted average, throwing out the low and high values. 83 weighted[i] = (total[i] - low[i] - high[i])/(count-2) 84 85 # Dump this in a formate compatible with the io_plot gnuplot script 86 print "%d:%d" % (key[0], key[1]), 87 for i in range(1, width): 88 print "%f" % weighted[i], 89 print 90 91def main(): 92 # Process iostat input from stdin 93 first = 1 94 line = "" 95 # Grab the first line, it includes the date. 96 line = sys.stdin.readline() 97 parts = line.split() 98 date_str = parts[3] 99 100 # Skip to the first timestamp 101 while 1: 102 line = sys.stdin.readline() 103 if -1 != line.find("Time:"): 104 break 105 106 rows = [] 107 time_re = re.compile("Time:\s+(\d+):(\d+):(\d+)\n") 108 109 # Line should point to the first timestamp 110 # Start our loop 111 112 # Track this information so we can do day rollover (sigh) 113 last_day = time.mktime(time.strptime(date_str, "%m/%d/%y")) 114 last_seconds = 0 115 116 # For collecting column headers 117 headers = [] 118 119 while 1: 120 # Iterate through output lines 121 122 # Figure out the actual time of this data by doing date math by hand (sigh) 123 # Pull out timestamp 124 m = time_re.match(line) 125 if not m: 126 break 127 cur_seconds = int(m.group(1))*3600+int(m.group(2))*60+int(m.group(3)) 128 if last_seconds > cur_seconds: 129 # Must be a new day, increment the day 130 last_day += 24*3600 131 last_seconds = cur_seconds 132 time_secs = last_day + last_seconds 133 134 # Skip CPU lines 135 while 1: 136 line = sys.stdin.readline() 137 if -1 != line.find("Device:"): 138 if first: 139 headers.extend(line.split()) 140 # Figure out all the column headers 141 first = 0 142 break 143 144 #i = 0 145 #for i in range(0, len(headers)): 146 # print i, headers[i] 147 148 # Parse out columns of data, with the first column being the true time in seconds 149 cols = [] 150 cols.append(time_secs) 151 while 1: 152 line = sys.stdin.readline() 153 if line == "\n": 154 break 155 parts = line.split() 156 dev = parts[0] 157 158 # Only pull out % IO, reads/sec and writes/sec for now 159 if dev == "sdb": 160 cols.append(float(parts[13])) 161 cols.append(float(parts[3])) 162 cols.append(float(parts[4])) 163 else: 164 pass 165 166 rows.append(cols) 167 line = sys.stdin.readline() 168 169 # We've got all the data, now let's collect aggregated stats 170 # We generate lists of samples for each minute of the day. 171 minute_stats = {} 172 for row in rows: 173 # Pull out the hour and minute to use as the key for the data. 174 lt = time.localtime(row[0]) 175 h = lt.tm_hour 176 m = lt.tm_min 177 key = (h,m) 178 179 # Create a new entry if we don't have one, and append the sample 180 if not key in minute_stats: 181 minute_stats[key] = [] 182 minute_stats[key].append(row) 183 184 # Output the data 185 avg_output(minute_stats) 186 #rolling_average(rows) 187 188main()