PageRenderTime 205ms CodeModel.GetById 101ms app.highlight 3ms RepoModel.GetById 97ms app.codeStats 1ms

/mysql_watcher/table_info

https://bitbucket.org/lindenlab/apiary/
Python | 370 lines | 334 code | 6 blank | 30 comment | 2 complexity | e156b0071e1c127a0b97d2d67e7d96ca MD5 | raw file
  1#!/usr/bin/env python
  2#
  3# $LicenseInfo:firstyear=2010&license=mit$
  4# 
  5# Copyright (c) 2010, Linden Research, Inc.
  6# 
  7# Permission is hereby granted, free of charge, to any person obtaining a copy
  8# of this software and associated documentation files (the "Software"), to deal
  9# in the Software without restriction, including without limitation the rights
 10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11# copies of the Software, and to permit persons to whom the Software is
 12# furnished to do so, subject to the following conditions:
 13# 
 14# The above copyright notice and this permission notice shall be included in
 15# all copies or substantial portions of the Software.
 16# 
 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23# THE SOFTWARE.
 24# $/LicenseInfo$
 25#
 26
 27"""
 28Generate info about queries based on the tables that are involved
 29"""
 30import os.path
 31if os.path.exists("../setup-path.py"):
 32    execfile("../setup-path.py")
 33import getopt, sys
 34
 35import copy
 36import curses
 37import curses.textpad
 38import curses.wrapper
 39import string
 40import sys
 41
 42from llbase import llsd
 43from dblibs.dbutil import *
 44from dblibs.dbbrowser import *
 45from dblibs.dbmonitor import *
 46
 47USER = "foo"
 48PASSWORD = "bar"
 49
 50def dump_table_summary(table_info):
 51    # Dump a summary suitable for use in excel summarizing table usage
 52    # Sort tables by total time used
 53    sorted_keys = sort_table_by_field(table_info, 'total_time')
 54    sorted_keys.reverse()
 55
 56    out_str = ""
 57    # Print header
 58    out_str += "Table\tNumQueries\tTotalTime\tQPS\tsecs/query\tUniqueQueries\tJoins\n"
 59    for table in sorted_keys:
 60        join_str = " ".join(table_info[table]['joins'])
 61        out_str += "%s\t%d\t%.2f\t%.2f\t%.3f\t%d\t%s\n" % (table,
 62                                                           table_info[table]['num_queries'],
 63                                                           table_info[table]['total_time'],
 64                                                           table_info[table]['qps'],
 65                                                           table_info[table]['avg_time'],
 66                                                           len(table_info[table]['queries']),
 67                                                           join_str)
 68    return out_str
 69
 70
 71def dump_query_summary(table_info):
 72    # Dump detailed query information, sorted by joins
 73    # Sort tables by total time used
 74    sorted_keys = sort_table_by_field(table_info, 'total_time')
 75    sorted_keys.reverse()
 76
 77    out_str = ""
 78    # Print header
 79    out_str += "Tables\tNumQueries\tTotalTime\tQPS\tsecs/query\tUniqueQueries\tJoins\n"
 80    for table in sorted_keys:
 81        # Iterate through query stats for each table
 82        query_list = table_info[table]['queries']
 83        for query in query_list:
 84            out_str += "%s\t%d\t%.2f\t%.2f\t%.3f\t%d\t%s\n" % (table,
 85                                                               query['num_queries'],
 86                                                               query['total_time'],
 87                                                               table_info[table]['qps'],
 88                                                               table_info[table]['avg_time'],
 89                                                               len(table_info[table]['queries']),
 90                                                           join_str)
 91    return out_str
 92
 93def dump_graphviz(table_info):
 94    # Create a graphviz file with the relationships between tables
 95    # Create a list of all joins
 96    # Uniquely identify joins
 97    join_map = {}
 98    for table in table_info:
 99        for join_table in table_info[table]['joins']:
100            pair = ['"' + table + '"',
101                    '"' + join_table + '"']
102            pair.sort()
103            join_str = " -- ".join(pair)
104            join_map[join_str] = 1
105    joins = join_map.keys()
106    joins.sort()
107
108    out_str = ""
109    for join in joins:
110        out_str += join + "\n"
111
112
113def dump_table_detail(table):
114    out_str = ""
115    out_str += "%s: %d queries, %.2f seconds, %.2f QPS, %.2f secs/query\n" % (table['name'],
116                                                                              table['num_queries'],
117                                                                              table['total_time'],
118                                                                              table['qps'],
119                                                                              table['avg_time'])
120    join_str = " ".join(table['joins'])
121    out_str += "Joins: " + join_str + "\n"
122    out_str += '----------------------------------------\n'
123    # Sort the queries by total time
124    queries = table['queries']
125    queries.sort(lambda x, y: cmp(x['total_time'], y['total_time']))
126    queries.reverse()
127    for query in queries:
128        out_str += "%0.2f\t%d\t%0.2f\t%s\n" % (query['total_time'], query['num_queries'], query['num_queries']/elapsed, query['query_clean'])
129    return out_str
130
131
132def load_db_info(fn):
133    # Read in the database information from the LLSD document
134    f = open(fn, "r")
135    in_str = f.read()
136    f.close()
137    dbs = llsd.LLSD.parse(in_str)
138    return dbs
139
140def prepend_database(dbs, in_tn):
141    # prepend the database name if it's not in the string.
142
143    # Already has a database name, return
144    if in_tn.find(".") != -1:
145        return in_tn
146
147    # Search for the table name inside all the dbs
148    for db in dbs.keys():
149        for table in dbs[db]['tables'].values():
150            if table['Name'] == in_tn:
151                return db+"."+table['Name']
152    #print "Unknown table!"
153    return in_tn
154
155def find_db_from_query(dbs, query):
156    tables = get_query_tables(query)
157    for table in tables:
158        # Already has a database name, skip - we can't figure out the database from this table
159        if table.find(".") != -1:
160            continue
161
162        # Search for the table name inside all the dbs
163        for db in dbs.keys():
164            for db_table in dbs[db]['tables'].values():
165                if db_table['Name'] == table:
166                    return db
167    #print "Failed, tables:", tables
168    return None
169
170def generate_explains(db_host, user, password):
171    # Open up a database connection
172    dbm = DBMonitor(db_host, user, password)
173
174    explain_results = {}
175    # Let's try explaining a bunch of these queries
176    for query in query_info:
177        db = find_db_from_query(dbs, query['query'])
178        #print db
179        #print query['query_clean']
180        exp = dbm.explain(db, query['query'])
181        if not exp:
182            continue
183
184        # Map keys must be strings in order to be valid LLSD
185        qc = query['query_clean']
186        if not qc:
187            qc = ''
188        explain_results[qc] = exp
189
190    # Dump the explains info into a file
191    f = open("query_explain.llsd", "w")
192    f.write(str(llsd.LLSD(explain_results)))
193    f.close()
194    
195
196def load_explains(fn):
197    try:
198        f = open(fn, 'r')
199        in_str = f.read()
200        f.close()
201        explains = llsd.LLSD.parse(in_str)
202    except:
203        explains = {}
204    return explains
205
206def sort_table_by_field(d, field):
207    "Returns the keys of dictionary d sorted by the value in the field"
208    items=d.items()
209    backitems=[ [v[1][field],v[0]] for v in items]
210    backitems.sort()
211    return [ backitems[i][1] for i in range(0,len(backitems))]
212
213def total_sort(a, b):
214    # Sort by total time, descending order
215    if  a['total_time'] < b['total_time']:
216        return 1
217    elif a['total_time'] > b['total_time']:
218        return -11
219    return 0
220
221if __name__ == "__main__":
222    path = sys.argv[1]
223
224    # Read in LLSD query information
225    in_file = open(path)
226    in_string = in_file.read()
227    in_file.close()
228    in_llsd = llsd.LLSD.parse(in_string)
229    session_metadata = in_llsd[0]
230    query_info = in_llsd[1]
231
232    description = session_metadata['description']
233    elapsed = session_metadata['last_time'] - session_metadata['start_time']
234
235    # Read in database information
236    dbs = load_db_info("db_info_mysql.agni.lindenlab.com.llsd")
237
238    if 0:
239        do_explain = 0
240        if do_explain:
241            generate_explains('slave.mysql.agni.lindenlab.com', USER, PASSWORD)
242
243        # Load explain information from the database
244        explains = load_explains("./query_explain.llsd")
245
246        extras = {}
247        # Iterate through the explains rows, and figure out what sucks
248        for exp in explains.items():
249            for raw_exp in exp[1]['raw_explain']:
250                extra = raw_exp['Extra']
251                if not extra in extras:
252                    extras[extra] = {}
253                    extras[extra]['count'] = 0
254                    extras[extra]['queries'] = {}
255                extras[extra]['count'] += 1
256                extras[extra]['queries'][exp[0]] = exp
257
258        # Find every query that's a temporary or filesort
259        file_temp_queries = {}
260        for extra in extras.items():
261            if (extra[0].find('file') == -1) and (extra[0].find('temp') == -1):
262                continue
263            #print extra[0], extra[1]['count']
264            #print '-----------------------------'
265            for exp in extra[1]['queries'].items():
266                query_clean = exp[0]
267                # Look up the query in our query stats
268                if not query_clean in file_temp_queries:
269                    file_temp_queries[query_clean] = extra[0]
270                count = 0
271                total_time = 0.0
272                for query in query_info:
273                    if query['query_clean'] == query_clean:
274                        count += query['num_queries']
275                        total_time += query['total_time']
276                avg_time = total_time / count
277                #print "%d %.2f %.4f %s" % (count, total_time, avg_time, query_clean)
278            #print
279            #print
280
281        # Sort file/tmp queries by total execution time
282        bad_q_list = []
283        for query in query_info:
284            qc = query['query_clean']
285            if not qc in file_temp_queries:
286                # Skip ones that aren't filesorts or temp tables
287                continue
288            query['Extra'] = file_temp_queries[qc]
289            bad_q_list.append(query)
290
291        # Sort the bad q list by total_time
292        #print "Total time\tCount\tQPS\tExtra\tHost\tQuery_clean"
293        bad_q_list.sort(total_sort)
294        for query in bad_q_list:
295            print "%.2f\t%d\t%.2f\t%s\t%s\t%s\t" % (query['total_time'],
296                                              query['num_queries'],
297                                              query['num_queries']/elapsed,
298                                              query['Extra'],
299                                              query['host_clean'],
300                                              query['query_clean'])
301
302    join_table_info = {}
303    table_info = {}
304    no_table_queries = []
305    for query in query_info:
306        if not query['tables']:
307            no_table_queries.append(query['query_clean'])
308
309        # Get all tables, and clean them up to have databases prepended
310        dirty_tables = get_query_tables(query['query_clean'])
311        tables = []
312        join_tables = []
313        for table in dirty_tables:
314            tables.append(prepend_database(dbs,table))
315        if len(tables):
316            print "Tables:", tables
317            tables.sort()
318            join_tables.append(','.join(tables))
319        else:
320            join_tables.append('None')
321        #query['tables'] = copy.deepcopy(tables)
322        query['tables'] = copy.deepcopy(join_tables)
323
324        # Iterate through tables associated with the query, and aggregate statistical data
325        # and append query to list
326        for table in query['tables']:
327            if not table in table_info:
328                table_info[table] = {}
329                table_info[table]['name'] = table
330                table_info[table]['num_queries'] = 0
331                table_info[table]['total_time'] = 0.0
332                table_info[table]['joins'] = {}
333                table_info[table]['queries'] = []
334            table_info[table]['queries'].append(query)
335            table_info[table]['num_queries'] += query['num_queries']
336            table_info[table]['total_time'] += query['total_time']
337        # Keep count of joins on a per-table basis
338        for table1 in query['tables']:
339            for table2 in query['tables']:
340                if table2 != table1:
341                    if not table2 in table_info[table1]['joins']:
342                        table_info[table1]['joins'][table2] = 0
343                    table_info[table1]['joins'][table2] += 1
344
345    # Generate stats for the tables
346    for table in table_info.keys():
347        table_info[table]['qps'] = table_info[table]['num_queries']/elapsed
348        table_info[table]['avg_time'] = table_info[table]['total_time']/table_info[table]['num_queries']
349
350    f = open("./table_stats.txt","w")
351    f.write(dump_table_summary(table_info))
352    f.close()
353
354    dump_graphviz(table_info)
355
356    # Sort tables by total time used
357    sorted_keys = sort_table_by_field(table_info, 'total_time')
358    sorted_keys.reverse()
359
360    out_str = ""
361    for table in sorted_keys:
362        out_str +=  dump_table_detail(table_info[table])
363        out_str += "\n"
364    out_str += "No table\n"
365    out_str += '----------------------------------------\n'
366    for query in no_table_queries:
367        out_str += query + '\n'
368    f = open("./table_details.txt", "w")
369    f.write(out_str)
370    f.close()