PageRenderTime 51ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/apiary_mysql_logger.py

https://bitbucket.org/lindenlab/apiary/
Python | 787 lines | 499 code | 112 blank | 176 comment | 82 complexity | f85d7a6461556e3ab13f039633f5e122 MD5 | raw file
  1. #!/usr/bin/python
  2. #
  3. # $LicenseInfo:firstyear=2010&license=mit$
  4. #
  5. # Copyright (c) 2010, Linden Research, Inc.
  6. #
  7. # Permission is hereby granted, free of charge, to any person obtaining a copy
  8. # of this software and associated documentation files (the "Software"), to deal
  9. # in the Software without restriction, including without limitation the rights
  10. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. # copies of the Software, and to permit persons to whom the Software is
  12. # furnished to do so, subject to the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be included in
  15. # all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. # THE SOFTWARE.
  24. # $/LicenseInfo$
  25. #
  26. """
  27. Log all queries hitting a particular mysql database
  28. """
  29. try:
  30. import psyco
  31. psyco.full()
  32. except:
  33. pass
  34. import array
  35. import curses
  36. import curses.wrapper
  37. import getopt
  38. import os.path
  39. import re
  40. import socket
  41. import struct
  42. import sys
  43. import time
  44. import math
  45. LOG_ROTATION_INTERVAL=3600
  46. MAX_LOGS = 36
  47. MIN_BIN=-15
  48. MAX_BIN=10
  49. ip_table = {}
  50. host_type_cache = {}
  51. sim_re = re.compile(".*sim\d+.*")
  52. web_re = re.compile("int\.web\d+.*")
  53. iweb_re = re.compile("int\.iweb\d+.*")
  54. webds_re = re.compile(".*web-ds\d+.*")
  55. login_re = re.compile(".*login\d+.*")
  56. data_re = re.compile(".*data\..*")
  57. xmlrpc_re = re.compile("(?:int\.omgiwanna.*)|(?:int\.pony.*)")
  58. ip_re = re.compile("\d+\.\d+\.\d+\.\d+")
  59. ll_re = re.compile("(.*)\.lindenlab\.com")
  60. #
  61. # Utility stuff for query cleaner
  62. #
  63. hex_wildcard = r"[0-9a-fA-F]"
  64. word = hex_wildcard + r"{4,4}-"
  65. long_word = hex_wildcard + r"{8,8}-"
  66. very_long_word = hex_wildcard + r"{12,12}"
  67. UUID_REGEX_STRING = long_word + word + word + word + very_long_word
  68. uuid_re = re.compile("[\"\']"+UUID_REGEX_STRING+"[\"\']")
  69. hex_re = re.compile("[\"\'][\da-f]+[\"\']")
  70. num_re = re.compile("[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?")
  71. # Quoted string re from: http://blog.stevenlevithan.com/archives/match-quoted-string
  72. string_re = re.compile(r'([\"\'])(?:(?=(\\?))\2.)*?\1')
  73. values_re = re.compile('VALUES\s+\(.*\)', re.IGNORECASE)
  74. in_re = re.compile('IN\s+\(.*\)', re.IGNORECASE)
  75. prepare_re = re.compile('PREPARE.*', re.IGNORECASE)
  76. deallocate_re = re.compile('DEALLOCATE\s+PREPARE.*', re.IGNORECASE)
  77. execute_re = re.compile('EXECUTE.*', re.IGNORECASE)
  78. mdb_re = re.compile('MDB2_STATEMENT\S+')
  79. def llquery_from_llsd(query_llsd):
  80. # Hack, fill in arbitary data for info that isn't serialized
  81. query = LLQuery(None, None, query_llsd['query'], 0.0)
  82. query.mData['host_clean'] = query_llsd['host_clean']
  83. query.mData['query_clean'] = query_llsd['query_clean']
  84. # Hack, keeps correctOutliers from trashing the data
  85. #query.mNumQueries = query_llsd['num_queries']
  86. #query.mTotalTime = query_llsd['total_time']
  87. try:
  88. query.mNumQueriesCorrected = query_llsd['num_queries_corrected']
  89. query.mTotalTimeCorrected = query_llsd['total_time_corrected']
  90. except:
  91. # Hack for old output which didn't generate this data
  92. query.mNumQueriesCorrected = query_llsd['num_queries']
  93. query.mTotalTimeCorrected = query_llsd['total_time']
  94. return query
  95. # MySQL protocol sniffer, using tcpdump, ncap packet parsing and mysql internals
  96. # http://forge.mysql.com/wiki/MySQL_Internals_ClientServer_Protocol
  97. class LLQueryStream:
  98. "Process a raw tcpdump stream (in raw libpcap format)"
  99. def __init__(self, in_file):
  100. self.mInFile = in_file
  101. self.mStartTime = time.time()
  102. #
  103. # A list of all outstanding "connections", and what they're doing.
  104. # This is necessary in order to get script timing and other information.
  105. #
  106. self.mConnStatus = {}
  107. #
  108. # Parse/skip past the libpcap global header
  109. #
  110. #guint32 magic_number; /* magic number */
  111. #guint16 version_major; /* major version number */
  112. #guint16 version_minor; /* minor version number */
  113. #gint32 thiszone; /* GMT to local correction */
  114. #guint32 sigfigs; /* accuracy of timestamps */
  115. #guint32 snaplen; /* max length of captured packets, in octets */
  116. #guint32 network; /* data link type */
  117. # Skip past the libpcap global header
  118. format = 'IHHiIII'
  119. size = struct.calcsize(format)
  120. header_bin = self.mInFile.read(size)
  121. res = struct.unpack(format, header_bin)
  122. def closeConnection(self, ip_port):
  123. if ip_port in self.mConnStatus:
  124. del self.mConnStatus[ip_port]
  125. def getNextEvent(self):
  126. # Get the next event out of the packet stream
  127. td_format = 'IIII'
  128. ip_format = '!BBHHHBBHII'
  129. tcp_format = '!HHIIBBHHH'
  130. while 1:
  131. #
  132. # Parse out an individual packet from the tcpdump stream
  133. #
  134. # Match the packet header
  135. # Pull a record (packet) off of the wire
  136. # Packet header
  137. # guint32 ts_sec; /* timestamp seconds */
  138. # guint32 ts_usec; /* timestamp microseconds */
  139. # guint32 incl_len; /* number of octets of packet saved in file */
  140. # guint32 orig_len; /* actual length of packet */
  141. ph_bin = self.mInFile.read(16)
  142. res = struct.unpack(td_format, ph_bin)
  143. ts_sec = res[0]
  144. ts_usec = res[1]
  145. pkt_time = ts_sec + (ts_usec/1000000.0)
  146. incl_len = res[2]
  147. orig_len = res[3]
  148. # Packet data (incl_len bytes)
  149. raw_data = self.mInFile.read(incl_len)
  150. # Parse out the MAC header
  151. # Don't bother, we don't care - 14 byte header
  152. mac_offset = 14
  153. # Parse out the IP header (min 20 bytes)
  154. # 4 bits - version
  155. # 4 bits - header length in 32 bit words
  156. # 1 byte - type of service
  157. # 2 bytes - total length
  158. # 2 bytes - fragment identification
  159. # 3 bits - flags
  160. # 13 bits - fragment offset
  161. # 1 byte - TTL
  162. # 1 byte - Protocol (should be 6)
  163. # 2 bytes - header checksum
  164. # 4 bytes - source IP
  165. # 4 bytes - dest IP
  166. ip_header = struct.unpack(ip_format, raw_data[mac_offset:mac_offset + 20])
  167. # Assume all packets are TCP
  168. #if ip_header[6] != 6:
  169. # print "Not TCP!"
  170. # continue
  171. src_ip_bin = ip_header[8]
  172. src_ip = lookup_ip_string(src_ip_bin)
  173. #src_ip = "%d.%d.%d.%d" % ((src_ip_bin & 0xff000000L) >> 24,
  174. # (src_ip_bin & 0x00ff0000L) >> 16,
  175. # (src_ip_bin & 0x0000ff00L) >> 8,
  176. # src_ip_bin & 0x000000ffL)
  177. dst_ip_bin = ip_header[9]
  178. dst_ip = lookup_ip_string(dst_ip_bin)
  179. #dst_ip = "%d.%d.%d.%d" % ((dst_ip_bin & 0xff000000L) >> 24,
  180. # (dst_ip_bin & 0x00ff0000L) >> 16,
  181. # (dst_ip_bin & 0x0000ff00L) >> 8,
  182. # dst_ip_bin & 0x000000ffL)
  183. ip_size = (ip_header[0] & 0x0f) * 4
  184. # Parse out the TCP packet header
  185. # 2 bytes - src_prt
  186. # 2 bytes - dst_port
  187. # 4 bytes - sequence number
  188. # 4 bytes - ack number
  189. # 4 bits - data offset (size in 32 bit words of header
  190. # 6 bits - reserved
  191. # 6 bits - control bits
  192. # 2 bytes - window
  193. # 2 bytes - checksum
  194. # 2 bytes - urgent pointer
  195. tcp_offset = mac_offset + ip_size
  196. tcp_header = struct.unpack(tcp_format, raw_data[tcp_offset:tcp_offset+20])
  197. tcp_size = ((tcp_header[4] & 0xf0) >> 4) * 4
  198. src_port = tcp_header[0]
  199. dst_port = tcp_header[1]
  200. # 3 bytes - packet length
  201. # 1 byte - packet number
  202. # 1 byte - command
  203. # <n bytes> - args
  204. pkt_offset = tcp_offset + tcp_size
  205. if len(raw_data) == pkt_offset:
  206. continue
  207. # Clearly not a mysql packet if it's less than 5 bytes of data
  208. if len(raw_data) - pkt_offset < 5:
  209. continue
  210. src_ip_port = "%s:%d" % (src_ip, src_port)
  211. dst_ip_port = "%s:%d" % (dst_ip, dst_port)
  212. if src_port == 3306:
  213. #
  214. # We are processing traffic from mysql server -> client
  215. # This primarily is used to time how long it takes for use
  216. # to start receiving data to the client from the server.
  217. #
  218. mysql_arr = array.array('B', raw_data[pkt_offset])
  219. result_type = ord(raw_data[pkt_offset])
  220. # Track the connection if we don't know about it yet.
  221. if not dst_ip_port in self.mConnStatus:
  222. self.mConnStatus[dst_ip_port] = LLConnStatus(dst_ip_port, pkt_time)
  223. conn = self.mConnStatus[dst_ip_port]
  224. # Update the status of this connection, including query times on
  225. # connections
  226. if conn.updateResponse(pkt_time, result_type):
  227. # Event: Initial query response
  228. return "QueryResponse", conn.mLastQuery
  229. continue
  230. if dst_port == 3306:
  231. #
  232. # Processing a packet from the client to the server
  233. #
  234. # Pull out packet length from the header
  235. mysql_arr = array.array('B', raw_data[pkt_offset:pkt_offset+5])
  236. pkt_len = mysql_arr[0] + (long(mysql_arr[1]) << 8) + (long(mysql_arr[2]) << 16)
  237. pkt_number = mysql_arr[3]
  238. # Find the connection associated with this packet
  239. if not src_ip_port in self.mConnStatus:
  240. self.mConnStatus[src_ip_port] = LLConnStatus(src_ip_port, pkt_time)
  241. conn = self.mConnStatus[src_ip_port]
  242. #if conn.mLastMysqlPacketNumber != (pkt_number - 1):
  243. # print "Prev:", conn.mLastMysqlPacketNumber, "Cur:", pkt_number
  244. conn.mLastMysqlPacketNumber = pkt_number
  245. cmd = mysql_arr[4]
  246. # If we're not a command, do stuff
  247. if cmd > 0x1c:
  248. # Unfortunately, we can't trivially tell the difference between
  249. # various non-command packets
  250. # Assume that these are all AuthResponses for now.
  251. conn.updateNonCommand(pkt_time, raw_data[pkt_offset:])
  252. if "QuerySent" == conn.mCurState:
  253. return ("QueryStart", conn.mLastQuery)
  254. continue
  255. query = None
  256. if cmd == 1:
  257. # Event: Quitting a connection
  258. conn.quit(src_ip, src_port, pkt_time)
  259. # This connection is closing, get rid of it
  260. self.closeConnection(src_ip_port)
  261. return ("Quit", conn.mLastQuery)
  262. elif cmd == 3:
  263. # Event: Starting a query
  264. conn.queryStart(src_ip, src_port, pkt_time, raw_data, pkt_len, pkt_offset + 5)
  265. # Only return an QueryStart if we have the whole query
  266. if "QuerySent" == conn.mCurState:
  267. return ("QueryStart", conn.mLastQuery)
  268. else:
  269. pass
  270. class LLQuery:
  271. fromLLSDStats = staticmethod(llquery_from_llsd)
  272. def __init__(self, host, port, query, start_time):
  273. # Store information which will be serialized for metadata in a map
  274. self.mData = {}
  275. self.mData['host'] = host
  276. self.mData['port'] = port
  277. self.mData['query'] = query
  278. # Metadata
  279. self.mData['host_clean'] = None
  280. self.mData['query_clean'] = None
  281. self.mData['tables'] = []
  282. # Stats information
  283. self.mNumQueries = 0
  284. self.mTotalTime = 0.0
  285. self.mOutQueries = 0
  286. self.mTotalTimeCorrected = 0.0
  287. self.mNumQueriesCorrected = 0
  288. self.mBins = {} # Bins for histogram
  289. # This stuff doesn't usually get serialized
  290. self.mQueryLen = len(query)
  291. self.mStartTime = start_time
  292. self.mResponseTime = start_time
  293. def __hash__(self):
  294. return (self.mData['host_clean'] + ":" + self.mData['query_clean']).__hash__()
  295. def __eq__(self, other):
  296. # Note, this matches on clean, not strictly correct
  297. if ((self.mData['query_clean'] == other.mData['query_clean']) and
  298. (self.mData['host_clean'] == other.mData['host_clean'])):
  299. return True
  300. return False
  301. def getKey(self):
  302. # The string key is just the clean host and query, concatenated
  303. return self.mData['host_clean'] + ":" + self.mData['query_clean']
  304. def clean(self):
  305. "Generate the clean query so it can be used for statistics"
  306. if not self.mData['host_clean']:
  307. self.mData['host_clean'] = host_type(self.mData['host'])
  308. self.mData['query_clean'] = clean_query(self.mData['query'], 0)
  309. def getAvgTimeCorrected(self):
  310. return self.mTotalTimeCorrected/self.mNumQueriesCorrected
  311. def queryStart(self):
  312. "When collecting query stats, use this when the query is receieved"
  313. self.mNumQueries += 1
  314. self.mOutQueries += 1
  315. def queryResponse(self, elapsed):
  316. "When collecting stats, use this when the response is received"
  317. self.mTotalTime += elapsed
  318. self.mOutQueries -=1
  319. bin = MIN_BIN
  320. if elapsed:
  321. bin = int(math.log(elapsed,2))
  322. bin = max(MIN_BIN, bin)
  323. bin = min(MAX_BIN, bin)
  324. if bin not in self.mBins:
  325. self.mBins[bin] = LLQueryStatBin(bin)
  326. self.mBins[bin].accumulate(elapsed)
  327. def correctOutliers(self):
  328. "Find outliers bins and calculate corrected results"
  329. # Outliers are 3 orders of magnitude less than the total count
  330. if not self.mNumQueries:
  331. # FIXME: This is a hack because we don't save this information in the query count dump
  332. return
  333. min_queries = self.mNumQueries/1000
  334. self.mTotalTimeCorrected = 0.0
  335. self.mNumQueriesCorrected = 0
  336. for i in self.mBins.keys():
  337. if self.mBins[i].mNumQueries < min_queries:
  338. # Outlier, flag as such.
  339. self.mBins[i].mOutlier = True
  340. else:
  341. self.mTotalTimeCorrected += self.mBins[i].mTotalTime
  342. self.mNumQueriesCorrected += self.mBins[i].mNumQueries
  343. if self.mNumQueriesCorrected == 0:
  344. #HACK: Deal with divide by zero
  345. self.mNumQueriesCorrected = 1
  346. sReadRE = re.compile("(SELECT.*)|(USE.*)", re.IGNORECASE)
  347. sSelectWhereRE = re.compile("\(?\s*?SELECT.+?FROM\s+\(?(.*?)\)?\s+WHERE.*", re.IGNORECASE)
  348. sSelectRE = re.compile("\(?\s*?SELECT.+?FROM\s+(.+)(?:\s+LIMIT.*|.*)", re.IGNORECASE)
  349. sUpdateRE = re.compile("UPDATE\s+(.+?)\s+SET.*", re.IGNORECASE)
  350. sReplaceRE = re.compile("REPLACE INTO\s+(.+?)(?:\s*\(|\s+SET).*", re.IGNORECASE)
  351. sInsertRE = re.compile("INSERT.+?INTO\s+(.+?)(?:\s*\(|\s+SET).*", re.IGNORECASE)
  352. sDeleteRE = re.compile("DELETE.+?FROM\s+(.+?)\s+WHERE.*", re.IGNORECASE)
  353. def analyze(self):
  354. "Does some query analysis on the query"
  355. if 'type' in self.mData:
  356. # Already analyzed
  357. return
  358. query = self.mData['query_clean']
  359. if LLQuery.sReadRE.match(query):
  360. self.mData['type'] = 'read'
  361. else:
  362. self.mData['type'] = 'write'
  363. self.mData['tables'] = get_query_tables(query)
  364. def dumpLine(self, elapsed, query_len = 0):
  365. bin_str = ''
  366. for i in range(MIN_BIN,MAX_BIN+1):
  367. if i in self.mBins:
  368. if self.mBins[i].mOutlier:
  369. bin_str += '*'
  370. else:
  371. bin_str += str(int(math.log10(self.mBins[i].mNumQueries)))
  372. else:
  373. bin_str += '.'
  374. if not query_len:
  375. query_len = 4096
  376. num_queries = self.mNumQueriesCorrected
  377. if not num_queries:
  378. num_queries = 1
  379. return ("%s\t%5d\t%6.2f\t%6.2f\t%1.4f\t%s\t" % (bin_str, num_queries,
  380. num_queries/elapsed, self.mTotalTimeCorrected,
  381. self.mTotalTimeCorrected/num_queries, self.mData['host_clean'])) \
  382. + self.mData['query_clean'][0:query_len]
  383. def as_map(self):
  384. "Make an LLSD map version of data that can be used for merging"
  385. self.analyze()
  386. self.mData['num_queries'] = self.mNumQueries
  387. self.mData['total_time'] = self.mTotalTime
  388. self.mData['num_queries_corrected'] = self.mNumQueriesCorrected
  389. self.mData['total_time_corrected'] = self.mTotalTimeCorrected
  390. return self.mData
  391. class LLConnStatus:
  392. "Keeps track of the status of a connection talking to mysql"
  393. def __init__(self, ip_port, start_time):
  394. self.mLastMysqlPacketNumber = 0
  395. self.mNumPackets = 0
  396. self.mIPPort = ip_port
  397. self.mStartTime = start_time
  398. self.mLastUpdate = start_time
  399. self.mCurState = ""
  400. self.mLastQuery = None
  401. self.mNumQueries = 0
  402. def quit(self, src_ip, src_port, pkt_time):
  403. query = LLQuery(src_ip, src_port, "Quit", pkt_time)
  404. query.clean()
  405. self.mLastUpdate = pkt_time
  406. self.mLastQuery = query
  407. self.mNumPackets += 1
  408. def queryStart(self, src_ip, src_port, pkt_time, raw, pkt_len, offset):
  409. query_len = pkt_len - 1
  410. query = LLQuery(src_ip, src_port, raw[offset:offset + (pkt_len - 1)], pkt_time)
  411. self.mLastUpdate = pkt_time
  412. # Packet length includes the command, offset into raw doesn't
  413. if query_len > (len(raw) - offset):
  414. query.mQueryLen = query_len
  415. self.mCurState = "SendingQuery"
  416. else:
  417. self.mCurState = "QuerySent"
  418. query.clean()
  419. self.mNumQueries += 1
  420. self.mLastQuery = query
  421. self.mNumPackets += 1
  422. def queryStartProcessed(self, src_ip, src_port, pkt_time, query_str):
  423. query = LLQuery(src_ip, src_port, query_str, pkt_time)
  424. query.clean()
  425. self.mLastUpdate = pkt_time
  426. self.mCurState = "QuerySent"
  427. self.mNumQueries += 1
  428. self.mLastQuery = query
  429. self.mNumPackets += 1
  430. def updateNonCommand(self, pkt_time, raw):
  431. # Clean up an existing query if you get a non-command.
  432. self.mNumPackets += 1
  433. self.mLastUpdate = pkt_time
  434. if self.mLastQuery:
  435. if self.mCurState == "SendingQuery":
  436. # We're continuing a query
  437. # We won't generate a new clean version, because it'll $!@# up all the sorting.
  438. self.mLastQuery.mData['query'] += raw
  439. if len(self.mLastQuery.mData['query']) == self.mLastQuery.mQueryLen:
  440. self.mCurState = "QuerySent"
  441. self.mLastQuery.clean()
  442. return
  443. else:
  444. #
  445. # A non-command that's continuing a query. Not sure why this is happening,
  446. # but clear the last query to avoid generating inadvertent long query results.
  447. #
  448. self.mLastQuery = None
  449. # Default to setting state to "NonCommand"
  450. self.mCurState = "NonCommand"
  451. def updateResponse(self, pkt_time, result_type):
  452. # If we've got a query running, accumulate the elapsed time
  453. start_query_response = False
  454. if self.mCurState == "QuerySent":
  455. lq = self.mLastQuery
  456. if lq:
  457. if lq.mStartTime == 0.0:
  458. lq.mStartTime = pkt_time
  459. lq.mResponseTime = pkt_time
  460. start_query_response = True
  461. self.mLastUpdate = pkt_time
  462. if result_type == 0:
  463. self.mCurState = "Result:RecvOK"
  464. elif result_type == 0xff:
  465. self.mCurState = "Result:Error"
  466. elif result_type == 0xfe:
  467. self.mCurState = "Result:EOF"
  468. elif result_type == 0x01:
  469. self.mCurState = "Result:Header"
  470. else:
  471. self.mCurState = "Result:Data"
  472. return start_query_response
  473. def dump(self):
  474. if self.mLastQuery:
  475. print "%s: NumQ: %d State:%s\n\tLast: %s" % (self.mIPPort, self.mNumQueries, self.mCurState,
  476. self.mLastQuery.mData['query_clean'][0:40])
  477. else:
  478. print "%s: NumQ: %d State:%s\n\tLast: None" % (self.mIPPort, self.mNumQueries, self.mCurState)
  479. def clean_query(query, num_words):
  480. "Generalizes a query by removing all unique information"
  481. # Generalize the query, remove all unique information
  482. # Strip carriage returns
  483. query = query.replace("\n", " ")
  484. # Screw it, if it's a prepared statement or an execute, generalize the statement name
  485. if prepare_re.match(query):
  486. query = mdb_re.sub('*statement*', query)
  487. return query
  488. if execute_re.match(query):
  489. query = mdb_re.sub('*statement*', query)
  490. if deallocate_re.match(query):
  491. query = "DEALLOCATE PREPARE"
  492. return query
  493. # Replace all "unique" information - strings, uuids, numbers
  494. query = uuid_re.sub("*uuid*", query)
  495. query = hex_re.sub("*hex*", query)
  496. try:
  497. query = string_re.sub("*string*", query)
  498. except:
  499. pass
  500. query = num_re.sub("*num*", query)
  501. # Get rid of all "VALUES ()" data.
  502. query = values_re.sub("VALUES (*values*)", query)
  503. # Get rid of all "IN ()" data.
  504. query = in_re.sub("IN (*values*)", query)
  505. # After we do the cleanup, then we get rid of extra whitespace
  506. words = query.split(None)
  507. query = " ".join(words)
  508. return query
  509. def host_type(host):
  510. "Returns the genericized linden host type from an IP address or hostname"
  511. if host in host_type_cache:
  512. return host_type_cache[host]
  513. named_host = host
  514. if ip_re.match(host):
  515. # Look up the hostname
  516. try:
  517. named_host = socket.gethostbyaddr(host)[0]
  518. except:
  519. pass
  520. # Figure out generic host type
  521. host_type = named_host
  522. if sim_re.match(named_host):
  523. host_type = "sim"
  524. elif login_re.match(named_host):
  525. host_type = "login"
  526. elif web_re.match(named_host):
  527. host_type = "web"
  528. elif iweb_re.match(named_host):
  529. host_type = "iweb"
  530. elif webds_re.match(named_host):
  531. host_type = "web-ds"
  532. elif data_re.match(named_host):
  533. host_type = "data"
  534. elif xmlrpc_re.match(named_host):
  535. host_type = "xmlrpc"
  536. m = ll_re.match(host_type)
  537. if m:
  538. host_type = m.group(1)
  539. host_type_cache[host] = host_type
  540. return host_type
  541. def start_dump(host, port):
  542. # Start up tcpdump pushing data into netcat on the sql server
  543. interface = "eth0"
  544. # Start up tcpdump pushing data into netcat on the sql server
  545. SRC_DUMP_CMD = "ssh root@%s '/usr/sbin/tcpdump -n -s 0 -w - -i %s dst port 3306 or src port 3306 | nc %s %d'" \
  546. % (host, interface, socket.getfqdn(), port)
  547. os.popen2(SRC_DUMP_CMD, "r")
  548. def lookup_ip_string(ip_bin):
  549. if not ip_bin in ip_table:
  550. ip_table[ip_bin] = "%d.%d.%d.%d" % ((ip_bin & 0xff000000L) >> 24,
  551. (ip_bin & 0x00ff0000L) >> 16,
  552. (ip_bin & 0x0000ff00L) >> 8,
  553. ip_bin & 0x000000ffL)
  554. return ip_table[ip_bin]
  555. def remote_mysql_stream(host):
  556. # Create a server socket, then have tcpdump dump stuff to it.
  557. serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  558. bound = False
  559. port = 9999
  560. while not bound:
  561. try:
  562. serversocket.bind((socket.gethostname(), port))
  563. bound = True
  564. except:
  565. print port, " already bound, trying again"
  566. port += 1
  567. print "Bound port %d" % port
  568. serversocket.listen(1)
  569. # Fork off the dumper, start the server on the main connection
  570. pid = os.fork()
  571. if not pid:
  572. # Child process which gets data from the database
  573. time.sleep(1.0)
  574. print "Starting dump!"
  575. start_dump(host, port)
  576. print "Exiting dump!"
  577. sys.exit(0)
  578. print "Starting server"
  579. (clientsocket, address) = serversocket.accept()
  580. print "Accepted connection", address
  581. # Start listening to the data stream
  582. return clientsocket.makefile("rb")
  583. def rotate_logs(log_path, query_log_file):
  584. # Fork to do the actual rotation/compression
  585. print "Rotating query logs"
  586. if query_log_file:
  587. query_log_file.close()
  588. need_gzip = False
  589. if os.path.exists(log_path+"/query.log"):
  590. os.rename(log_path+"/query.log", log_path+"/query.log.tmp")
  591. need_gzip = True
  592. query_log_file = open("%s/query.log" % log_path, "w")
  593. pid = os.fork()
  594. if pid:
  595. return query_log_file
  596. # Child process actually does the log rotation
  597. # Delete the oldest
  598. log_filename = log_path+"/query.log.%d.gz" % (MAX_LOGS)
  599. if os.path.exists(log_filename):
  600. os.remove(log_filename)
  601. for i in range(0, MAX_LOGS):
  602. # Count down from the max and rename
  603. n = MAX_LOGS - i
  604. log_filename = log_path+"/query.log.%d.gz" % n
  605. if os.path.exists(log_filename):
  606. os.rename(log_path + ("/query.log.%d.gz" % n), log_path + ("/query.log.%d.gz" % (n+1)))
  607. if need_gzip:
  608. # Compress the "first" log (query.log.tmp)
  609. os.rename(log_path + "/query.log.tmp", log_path + "/query.log.1")
  610. os.system('gzip -f %s' % (log_path + "/query.log.1"))
  611. print "Done rotating logs!"
  612. sys.exit(0)
  613. def watch_host(query_stream, host):
  614. "Watches query traffic for a particular host. Returns the overall query counts when exited by breaking"
  615. # Make output path
  616. output_path = "./%s" % host
  617. os.system("mkdir -p %s" % output_path)
  618. query_log_file = rotate_logs(output_path, None)
  619. last_log_time = time.time()
  620. done = False
  621. count = 0
  622. try:
  623. while not done:
  624. (event_type, query) = query_stream.getNextEvent()
  625. # Use the start time to determine which hour bin to put the query into
  626. start_time = query.mStartTime
  627. start_hour = time.localtime(start_time)[3]
  628. if event_type == "QueryStart":
  629. query_log_file.write("%f\t%s:%d\t%s\tQueryStart\n" % (query.mStartTime, query.mData['host'], query.mData['port'], query.mData['host_clean']))
  630. query_log_file.write("%s\n" % (query.mData['query']))
  631. query_log_file.write("**************************************\n")
  632. count += 1
  633. elif (event_type == "QueryResponse"):
  634. query_log_file.write("%f\t%s:%d\t%s\tQueryResponse\n" % (query.mResponseTime, query.mData['host'], query.mData['port'], query.mData['host_clean']))
  635. query_log_file.write("%s\n" % (query.mData['query']))
  636. query_log_file.write("**************************************\n")
  637. elif event_type == "Quit":
  638. # Quit is an "instantaneous" query, both start and response
  639. query_log_file.write("%f\t%s:%d\t%s\tQuit\n" % (query.mStartTime, query.mData['host'], query.mData['port'], query.mData['host_clean']))
  640. query_log_file.write("%s\n" % (query.mData['query']))
  641. query_log_file.write("**************************************\n")
  642. continue
  643. if not (count % 1000):
  644. try:
  645. os.waitpid(-1, os.WNOHANG)
  646. except OSError:
  647. pass
  648. if (time.time() - last_log_time) > LOG_ROTATION_INTERVAL:
  649. last_log_time = time.time()
  650. query_log_file = rotate_logs(output_path, query_log_file)
  651. except KeyboardInterrupt:
  652. pass
  653. query_log_file.close()
  654. if __name__ == "__main__":
  655. opts, args = getopt.getopt(sys.argv[1:], "", ["host="])
  656. host = None
  657. for o, a in opts:
  658. if o in ("--host"):
  659. host = a
  660. if not host:
  661. print "Specify a host using --host="
  662. sys.exit(1)
  663. # Start up the stream from the target host and create a file
  664. # that we can hand to LLQueryStream
  665. query_stream_file = remote_mysql_stream(host)
  666. query_stream = LLQueryStream(query_stream_file)
  667. watch_host(query_stream, host)