io_parser - This script processes iostat data from stdin, e…

/mysql_watcher/io_parser

https://bitbucket.org/lindenlab/apiary/ · Python · 188 lines · 96 code · 25 blank · 67 comment · 21 complexity · d8eb2625b0019e2ccce9107e00c0097a MD5 · raw file


#!/usr/bin/env python
#
# $LicenseInfo:firstyear=2010&license=mit$
# 
# Copyright (c) 2010, Linden Research, Inc.
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# $/LicenseInfo$
#

#
# Script that processes iostat data, and generates output compatible with the io_plot gnuplot script in this directory.
#
# Usage: Use this script to process raw iostat output, and then take the output and use it as source data
# for the gnuplot script.
#
#  ./io_parser < iostat_raw.txt > io.txt 
#  gnuplot io_plot
#

import time
import sys
import re

def rolling_average(rows):
    # Average data over a window.  Not currently used.
    smooth_rows = []
    bin_size = 15
    offset = int(bin_size/2)
    for i in range(0, len(rows) - bin_size):
        s = 0
        for j in range(0, bin_size):
            s += rows[i+j][1]
        avg = s / bin_size
        t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(rows[i+offset][0]))
        print "%s\t%f" % (t, avg)

def avg_output(minute_stats):
    # Dumps output with a weighted average (throws out low and high sample points).
    # Probably should do something like a median value instead...

    # Get the sorted keys for all sample points.
    # Note that the keys are (hour, minute) tuples
    keys = minute_stats.keys()
    keys.sort()

    for key in keys:
        minute_rows = minute_stats[key]
        total = 0.0
        count = len(minute_rows)
        width = len(minute_rows[0])
        low = [100000.0]*width
        high = [-100000.0]*width
        total = [0.0]*width
        weighted = [0.0]*width

        for row in minute_rows:
            # Iterate throw all rows
            for i in range(1, width):
                # Find sum and min/max for all samples for that minute
                val = row[i]
                low[i] = min(low[i], val)
                high[i] = max(high[i], val)
                total[i] += row[i]
        for i in range(1,width):
            # Generate a weighted average, throwing out the low and high values.
            weighted[i] = (total[i] - low[i] - high[i])/(count-2)

        # Dump this in a formate compatible with the io_plot gnuplot script
        print "%d:%d" % (key[0], key[1]),
        for i in range(1, width):
            print "%f" % weighted[i],
        print

def main():
    # Process iostat input from stdin
    first = 1
    line = ""
    # Grab the first line, it includes the date.
    line = sys.stdin.readline()
    parts = line.split()
    date_str = parts[3]

    # Skip to the first timestamp
    while 1:
        line = sys.stdin.readline()
        if -1 != line.find("Time:"):
            break

    rows = []
    time_re = re.compile("Time:\s+(\d+):(\d+):(\d+)\n")

    # Line should point to the first timestamp
    # Start our loop

    # Track this information so we can do day rollover (sigh)
    last_day = time.mktime(time.strptime(date_str, "%m/%d/%y"))
    last_seconds = 0

    # For collecting column headers
    headers = []

    while 1:
        # Iterate through output lines
        
        # Figure out the actual time of this data by doing date math by hand (sigh)
        # Pull out timestamp
        m = time_re.match(line)
        if not m:
            break
        cur_seconds = int(m.group(1))*3600+int(m.group(2))*60+int(m.group(3))
        if last_seconds > cur_seconds:
            # Must be a new day, increment the day
            last_day += 24*3600
        last_seconds = cur_seconds
        time_secs = last_day + last_seconds

        # Skip CPU lines
        while 1:
            line = sys.stdin.readline()
            if -1 != line.find("Device:"):
                if first:
                    headers.extend(line.split())
                    # Figure out all the column headers
                    first = 0
                break

        #i = 0
        #for i in range(0, len(headers)):
        #    print i, headers[i]

        # Parse out columns of data, with the first column being the true time in seconds
        cols = []
        cols.append(time_secs)
        while 1:
            line = sys.stdin.readline()
            if line == "\n":
                break
            parts = line.split()
            dev = parts[0]

            # Only pull out % IO, reads/sec and writes/sec for now
            if dev == "sdb":
                cols.append(float(parts[13]))
                cols.append(float(parts[3]))
                cols.append(float(parts[4]))                
            else:
                pass

        rows.append(cols)
        line = sys.stdin.readline()

    # We've got all the data, now let's collect aggregated stats
    # We generate lists of samples for each minute of the day.
    minute_stats = {}
    for row in rows:
        # Pull out the hour and minute to use as the key for the data.
        lt = time.localtime(row[0])
        h = lt.tm_hour
        m = lt.tm_min
        key = (h,m)

        # Create a new entry if we don't have one, and append the sample
        if not key in minute_stats:
            minute_stats[key] = []
        minute_stats[key].append(row)

    # Output the data
    avg_output(minute_stats)
    #rolling_average(rows)

main()

Summary ✨

This script processes iostat data from stdin, extracting relevant information such as time, device usage, and reads/writes per second. It then aggregates this data by minute of the day, calculating weighted averages for each column. The output is formatted compatible with a gnuplot script, allowing it to be used for plotting iostat data.

Tech Fingerprint

Alerts (3)

'def' Ensure functions have docstrings for documentation
41 54 91