PageRenderTime 43ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/mysql_watcher/table_info

https://bitbucket.org/lindenlab/apiary/
Python | 370 lines | 334 code | 6 blank | 30 comment | 1 complexity | e156b0071e1c127a0b97d2d67e7d96ca MD5 | raw file
  1. #!/usr/bin/env python
  2. #
  3. # $LicenseInfo:firstyear=2010&license=mit$
  4. #
  5. # Copyright (c) 2010, Linden Research, Inc.
  6. #
  7. # Permission is hereby granted, free of charge, to any person obtaining a copy
  8. # of this software and associated documentation files (the "Software"), to deal
  9. # in the Software without restriction, including without limitation the rights
  10. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. # copies of the Software, and to permit persons to whom the Software is
  12. # furnished to do so, subject to the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be included in
  15. # all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. # THE SOFTWARE.
  24. # $/LicenseInfo$
  25. #
  26. """
  27. Generate info about queries based on the tables that are involved
  28. """
  29. import os.path
  30. if os.path.exists("../setup-path.py"):
  31. execfile("../setup-path.py")
  32. import getopt, sys
  33. import copy
  34. import curses
  35. import curses.textpad
  36. import curses.wrapper
  37. import string
  38. import sys
  39. from llbase import llsd
  40. from dblibs.dbutil import *
  41. from dblibs.dbbrowser import *
  42. from dblibs.dbmonitor import *
  43. USER = "foo"
  44. PASSWORD = "bar"
  45. def dump_table_summary(table_info):
  46. # Dump a summary suitable for use in excel summarizing table usage
  47. # Sort tables by total time used
  48. sorted_keys = sort_table_by_field(table_info, 'total_time')
  49. sorted_keys.reverse()
  50. out_str = ""
  51. # Print header
  52. out_str += "Table\tNumQueries\tTotalTime\tQPS\tsecs/query\tUniqueQueries\tJoins\n"
  53. for table in sorted_keys:
  54. join_str = " ".join(table_info[table]['joins'])
  55. out_str += "%s\t%d\t%.2f\t%.2f\t%.3f\t%d\t%s\n" % (table,
  56. table_info[table]['num_queries'],
  57. table_info[table]['total_time'],
  58. table_info[table]['qps'],
  59. table_info[table]['avg_time'],
  60. len(table_info[table]['queries']),
  61. join_str)
  62. return out_str
  63. def dump_query_summary(table_info):
  64. # Dump detailed query information, sorted by joins
  65. # Sort tables by total time used
  66. sorted_keys = sort_table_by_field(table_info, 'total_time')
  67. sorted_keys.reverse()
  68. out_str = ""
  69. # Print header
  70. out_str += "Tables\tNumQueries\tTotalTime\tQPS\tsecs/query\tUniqueQueries\tJoins\n"
  71. for table in sorted_keys:
  72. # Iterate through query stats for each table
  73. query_list = table_info[table]['queries']
  74. for query in query_list:
  75. out_str += "%s\t%d\t%.2f\t%.2f\t%.3f\t%d\t%s\n" % (table,
  76. query['num_queries'],
  77. query['total_time'],
  78. table_info[table]['qps'],
  79. table_info[table]['avg_time'],
  80. len(table_info[table]['queries']),
  81. join_str)
  82. return out_str
  83. def dump_graphviz(table_info):
  84. # Create a graphviz file with the relationships between tables
  85. # Create a list of all joins
  86. # Uniquely identify joins
  87. join_map = {}
  88. for table in table_info:
  89. for join_table in table_info[table]['joins']:
  90. pair = ['"' + table + '"',
  91. '"' + join_table + '"']
  92. pair.sort()
  93. join_str = " -- ".join(pair)
  94. join_map[join_str] = 1
  95. joins = join_map.keys()
  96. joins.sort()
  97. out_str = ""
  98. for join in joins:
  99. out_str += join + "\n"
  100. def dump_table_detail(table):
  101. out_str = ""
  102. out_str += "%s: %d queries, %.2f seconds, %.2f QPS, %.2f secs/query\n" % (table['name'],
  103. table['num_queries'],
  104. table['total_time'],
  105. table['qps'],
  106. table['avg_time'])
  107. join_str = " ".join(table['joins'])
  108. out_str += "Joins: " + join_str + "\n"
  109. out_str += '----------------------------------------\n'
  110. # Sort the queries by total time
  111. queries = table['queries']
  112. queries.sort(lambda x, y: cmp(x['total_time'], y['total_time']))
  113. queries.reverse()
  114. for query in queries:
  115. out_str += "%0.2f\t%d\t%0.2f\t%s\n" % (query['total_time'], query['num_queries'], query['num_queries']/elapsed, query['query_clean'])
  116. return out_str
  117. def load_db_info(fn):
  118. # Read in the database information from the LLSD document
  119. f = open(fn, "r")
  120. in_str = f.read()
  121. f.close()
  122. dbs = llsd.LLSD.parse(in_str)
  123. return dbs
  124. def prepend_database(dbs, in_tn):
  125. # prepend the database name if it's not in the string.
  126. # Already has a database name, return
  127. if in_tn.find(".") != -1:
  128. return in_tn
  129. # Search for the table name inside all the dbs
  130. for db in dbs.keys():
  131. for table in dbs[db]['tables'].values():
  132. if table['Name'] == in_tn:
  133. return db+"."+table['Name']
  134. #print "Unknown table!"
  135. return in_tn
  136. def find_db_from_query(dbs, query):
  137. tables = get_query_tables(query)
  138. for table in tables:
  139. # Already has a database name, skip - we can't figure out the database from this table
  140. if table.find(".") != -1:
  141. continue
  142. # Search for the table name inside all the dbs
  143. for db in dbs.keys():
  144. for db_table in dbs[db]['tables'].values():
  145. if db_table['Name'] == table:
  146. return db
  147. #print "Failed, tables:", tables
  148. return None
  149. def generate_explains(db_host, user, password):
  150. # Open up a database connection
  151. dbm = DBMonitor(db_host, user, password)
  152. explain_results = {}
  153. # Let's try explaining a bunch of these queries
  154. for query in query_info:
  155. db = find_db_from_query(dbs, query['query'])
  156. #print db
  157. #print query['query_clean']
  158. exp = dbm.explain(db, query['query'])
  159. if not exp:
  160. continue
  161. # Map keys must be strings in order to be valid LLSD
  162. qc = query['query_clean']
  163. if not qc:
  164. qc = ''
  165. explain_results[qc] = exp
  166. # Dump the explains info into a file
  167. f = open("query_explain.llsd", "w")
  168. f.write(str(llsd.LLSD(explain_results)))
  169. f.close()
  170. def load_explains(fn):
  171. try:
  172. f = open(fn, 'r')
  173. in_str = f.read()
  174. f.close()
  175. explains = llsd.LLSD.parse(in_str)
  176. except:
  177. explains = {}
  178. return explains
  179. def sort_table_by_field(d, field):
  180. "Returns the keys of dictionary d sorted by the value in the field"
  181. items=d.items()
  182. backitems=[ [v[1][field],v[0]] for v in items]
  183. backitems.sort()
  184. return [ backitems[i][1] for i in range(0,len(backitems))]
  185. def total_sort(a, b):
  186. # Sort by total time, descending order
  187. if a['total_time'] < b['total_time']:
  188. return 1
  189. elif a['total_time'] > b['total_time']:
  190. return -11
  191. return 0
  192. if __name__ == "__main__":
  193. path = sys.argv[1]
  194. # Read in LLSD query information
  195. in_file = open(path)
  196. in_string = in_file.read()
  197. in_file.close()
  198. in_llsd = llsd.LLSD.parse(in_string)
  199. session_metadata = in_llsd[0]
  200. query_info = in_llsd[1]
  201. description = session_metadata['description']
  202. elapsed = session_metadata['last_time'] - session_metadata['start_time']
  203. # Read in database information
  204. dbs = load_db_info("db_info_mysql.agni.lindenlab.com.llsd")
  205. if 0:
  206. do_explain = 0
  207. if do_explain:
  208. generate_explains('slave.mysql.agni.lindenlab.com', USER, PASSWORD)
  209. # Load explain information from the database
  210. explains = load_explains("./query_explain.llsd")
  211. extras = {}
  212. # Iterate through the explains rows, and figure out what sucks
  213. for exp in explains.items():
  214. for raw_exp in exp[1]['raw_explain']:
  215. extra = raw_exp['Extra']
  216. if not extra in extras:
  217. extras[extra] = {}
  218. extras[extra]['count'] = 0
  219. extras[extra]['queries'] = {}
  220. extras[extra]['count'] += 1
  221. extras[extra]['queries'][exp[0]] = exp
  222. # Find every query that's a temporary or filesort
  223. file_temp_queries = {}
  224. for extra in extras.items():
  225. if (extra[0].find('file') == -1) and (extra[0].find('temp') == -1):
  226. continue
  227. #print extra[0], extra[1]['count']
  228. #print '-----------------------------'
  229. for exp in extra[1]['queries'].items():
  230. query_clean = exp[0]
  231. # Look up the query in our query stats
  232. if not query_clean in file_temp_queries:
  233. file_temp_queries[query_clean] = extra[0]
  234. count = 0
  235. total_time = 0.0
  236. for query in query_info:
  237. if query['query_clean'] == query_clean:
  238. count += query['num_queries']
  239. total_time += query['total_time']
  240. avg_time = total_time / count
  241. #print "%d %.2f %.4f %s" % (count, total_time, avg_time, query_clean)
  242. #print
  243. #print
  244. # Sort file/tmp queries by total execution time
  245. bad_q_list = []
  246. for query in query_info:
  247. qc = query['query_clean']
  248. if not qc in file_temp_queries:
  249. # Skip ones that aren't filesorts or temp tables
  250. continue
  251. query['Extra'] = file_temp_queries[qc]
  252. bad_q_list.append(query)
  253. # Sort the bad q list by total_time
  254. #print "Total time\tCount\tQPS\tExtra\tHost\tQuery_clean"
  255. bad_q_list.sort(total_sort)
  256. for query in bad_q_list:
  257. print "%.2f\t%d\t%.2f\t%s\t%s\t%s\t" % (query['total_time'],
  258. query['num_queries'],
  259. query['num_queries']/elapsed,
  260. query['Extra'],
  261. query['host_clean'],
  262. query['query_clean'])
  263. join_table_info = {}
  264. table_info = {}
  265. no_table_queries = []
  266. for query in query_info:
  267. if not query['tables']:
  268. no_table_queries.append(query['query_clean'])
  269. # Get all tables, and clean them up to have databases prepended
  270. dirty_tables = get_query_tables(query['query_clean'])
  271. tables = []
  272. join_tables = []
  273. for table in dirty_tables:
  274. tables.append(prepend_database(dbs,table))
  275. if len(tables):
  276. print "Tables:", tables
  277. tables.sort()
  278. join_tables.append(','.join(tables))
  279. else:
  280. join_tables.append('None')
  281. #query['tables'] = copy.deepcopy(tables)
  282. query['tables'] = copy.deepcopy(join_tables)
  283. # Iterate through tables associated with the query, and aggregate statistical data
  284. # and append query to list
  285. for table in query['tables']:
  286. if not table in table_info:
  287. table_info[table] = {}
  288. table_info[table]['name'] = table
  289. table_info[table]['num_queries'] = 0
  290. table_info[table]['total_time'] = 0.0
  291. table_info[table]['joins'] = {}
  292. table_info[table]['queries'] = []
  293. table_info[table]['queries'].append(query)
  294. table_info[table]['num_queries'] += query['num_queries']
  295. table_info[table]['total_time'] += query['total_time']
  296. # Keep count of joins on a per-table basis
  297. for table1 in query['tables']:
  298. for table2 in query['tables']:
  299. if table2 != table1:
  300. if not table2 in table_info[table1]['joins']:
  301. table_info[table1]['joins'][table2] = 0
  302. table_info[table1]['joins'][table2] += 1
  303. # Generate stats for the tables
  304. for table in table_info.keys():
  305. table_info[table]['qps'] = table_info[table]['num_queries']/elapsed
  306. table_info[table]['avg_time'] = table_info[table]['total_time']/table_info[table]['num_queries']
  307. f = open("./table_stats.txt","w")
  308. f.write(dump_table_summary(table_info))
  309. f.close()
  310. dump_graphviz(table_info)
  311. # Sort tables by total time used
  312. sorted_keys = sort_table_by_field(table_info, 'total_time')
  313. sorted_keys.reverse()
  314. out_str = ""
  315. for table in sorted_keys:
  316. out_str += dump_table_detail(table_info[table])
  317. out_str += "\n"
  318. out_str += "No table\n"
  319. out_str += '----------------------------------------\n'
  320. for query in no_table_queries:
  321. out_str += query + '\n'
  322. f = open("./table_details.txt", "w")
  323. f.write(out_str)
  324. f.close()