PageRenderTime 50ms CodeModel.GetById 46ms app.highlight 2ms RepoModel.GetById 0ms app.codeStats 1ms

/mysql_watcher/io_parser

https://bitbucket.org/lindenlab/apiary/
Python | 188 lines | 135 code | 9 blank | 44 comment | 14 complexity | d8eb2625b0019e2ccce9107e00c0097a MD5 | raw file
  1#!/usr/bin/env python
  2#
  3# $LicenseInfo:firstyear=2010&license=mit$
  4# 
  5# Copyright (c) 2010, Linden Research, Inc.
  6# 
  7# Permission is hereby granted, free of charge, to any person obtaining a copy
  8# of this software and associated documentation files (the "Software"), to deal
  9# in the Software without restriction, including without limitation the rights
 10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11# copies of the Software, and to permit persons to whom the Software is
 12# furnished to do so, subject to the following conditions:
 13# 
 14# The above copyright notice and this permission notice shall be included in
 15# all copies or substantial portions of the Software.
 16# 
 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23# THE SOFTWARE.
 24# $/LicenseInfo$
 25#
 26
 27#
 28# Script that processes iostat data, and generates output compatible with the io_plot gnuplot script in this directory.
 29#
 30# Usage: Use this script to process raw iostat output, and then take the output and use it as source data
 31# for the gnuplot script.
 32#
 33#  ./io_parser < iostat_raw.txt > io.txt 
 34#  gnuplot io_plot
 35#
 36
 37import time
 38import sys
 39import re
 40
 41def rolling_average(rows):
 42    # Average data over a window.  Not currently used.
 43    smooth_rows = []
 44    bin_size = 15
 45    offset = int(bin_size/2)
 46    for i in range(0, len(rows) - bin_size):
 47        s = 0
 48        for j in range(0, bin_size):
 49            s += rows[i+j][1]
 50        avg = s / bin_size
 51        t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(rows[i+offset][0]))
 52        print "%s\t%f" % (t, avg)
 53
 54def avg_output(minute_stats):
 55    # Dumps output with a weighted average (throws out low and high sample points).
 56    # Probably should do something like a median value instead...
 57
 58    # Get the sorted keys for all sample points.
 59    # Note that the keys are (hour, minute) tuples
 60    keys = minute_stats.keys()
 61    keys.sort()
 62
 63    for key in keys:
 64        minute_rows = minute_stats[key]
 65        total = 0.0
 66        count = len(minute_rows)
 67        width = len(minute_rows[0])
 68        low = [100000.0]*width
 69        high = [-100000.0]*width
 70        total = [0.0]*width
 71        weighted = [0.0]*width
 72
 73        for row in minute_rows:
 74            # Iterate throw all rows
 75            for i in range(1, width):
 76                # Find sum and min/max for all samples for that minute
 77                val = row[i]
 78                low[i] = min(low[i], val)
 79                high[i] = max(high[i], val)
 80                total[i] += row[i]
 81        for i in range(1,width):
 82            # Generate a weighted average, throwing out the low and high values.
 83            weighted[i] = (total[i] - low[i] - high[i])/(count-2)
 84
 85        # Dump this in a formate compatible with the io_plot gnuplot script
 86        print "%d:%d" % (key[0], key[1]),
 87        for i in range(1, width):
 88            print "%f" % weighted[i],
 89        print
 90
 91def main():
 92    # Process iostat input from stdin
 93    first = 1
 94    line = ""
 95    # Grab the first line, it includes the date.
 96    line = sys.stdin.readline()
 97    parts = line.split()
 98    date_str = parts[3]
 99
100    # Skip to the first timestamp
101    while 1:
102        line = sys.stdin.readline()
103        if -1 != line.find("Time:"):
104            break
105
106    rows = []
107    time_re = re.compile("Time:\s+(\d+):(\d+):(\d+)\n")
108
109    # Line should point to the first timestamp
110    # Start our loop
111
112    # Track this information so we can do day rollover (sigh)
113    last_day = time.mktime(time.strptime(date_str, "%m/%d/%y"))
114    last_seconds = 0
115
116    # For collecting column headers
117    headers = []
118
119    while 1:
120        # Iterate through output lines
121        
122        # Figure out the actual time of this data by doing date math by hand (sigh)
123        # Pull out timestamp
124        m = time_re.match(line)
125        if not m:
126            break
127        cur_seconds = int(m.group(1))*3600+int(m.group(2))*60+int(m.group(3))
128        if last_seconds > cur_seconds:
129            # Must be a new day, increment the day
130            last_day += 24*3600
131        last_seconds = cur_seconds
132        time_secs = last_day + last_seconds
133
134        # Skip CPU lines
135        while 1:
136            line = sys.stdin.readline()
137            if -1 != line.find("Device:"):
138                if first:
139                    headers.extend(line.split())
140                    # Figure out all the column headers
141                    first = 0
142                break
143
144        #i = 0
145        #for i in range(0, len(headers)):
146        #    print i, headers[i]
147
148        # Parse out columns of data, with the first column being the true time in seconds
149        cols = []
150        cols.append(time_secs)
151        while 1:
152            line = sys.stdin.readline()
153            if line == "\n":
154                break
155            parts = line.split()
156            dev = parts[0]
157
158            # Only pull out % IO, reads/sec and writes/sec for now
159            if dev == "sdb":
160                cols.append(float(parts[13]))
161                cols.append(float(parts[3]))
162                cols.append(float(parts[4]))                
163            else:
164                pass
165
166        rows.append(cols)
167        line = sys.stdin.readline()
168
169    # We've got all the data, now let's collect aggregated stats
170    # We generate lists of samples for each minute of the day.
171    minute_stats = {}
172    for row in rows:
173        # Pull out the hour and minute to use as the key for the data.
174        lt = time.localtime(row[0])
175        h = lt.tm_hour
176        m = lt.tm_min
177        key = (h,m)
178
179        # Create a new entry if we don't have one, and append the sample
180        if not key in minute_stats:
181            minute_stats[key] = []
182        minute_stats[key].append(row)
183
184    # Output the data
185    avg_output(minute_stats)
186    #rolling_average(rows)
187
188main()