PageRenderTime 86ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/pup/pup.py

https://bitbucket.org/d1rk/dd-agent
Python | 262 lines | 198 code | 35 blank | 29 comment | 22 complexity | de9d4ada7099cb829906eba2e1dbacb1 MD5 | raw file
  1. #!/usr/bin/python
  2. """
  3. Pup.py
  4. Datadog
  5. www.datadoghq.com
  6. ---
  7. Make sense of your IT Data
  8. (C) Datadog, Inc. 2012 all rights reserved
  9. """
  10. # set up logging before importing any other components
  11. from config import initialize_logging; initialize_logging('pup')
  12. import os; os.umask(022)
  13. # stdlib
  14. from collections import defaultdict
  15. import sys
  16. import optparse
  17. import os
  18. import re
  19. import time
  20. import logging
  21. import zlib
  22. # 3p
  23. import tornado
  24. from tornado import ioloop
  25. from tornado import web
  26. from tornado import websocket
  27. # project
  28. from config import get_config
  29. from util import json
  30. log = logging.getLogger('pup')
  31. AGENT_TRANSLATION = {
  32. 'cpuUser' : 'CPU user (%)',
  33. 'cpuSystem' : 'CPU system (%)',
  34. 'cpuWait' : 'CPU iowait (%)',
  35. 'cpuIdle' : 'CPU idle (%)',
  36. 'cpuStolen' : 'CPU stolen (%)',
  37. 'memPhysUsed' : 'Memory used',
  38. 'memPhysFree' : 'Memory free',
  39. 'memPhysTotal': 'system.mem.total',
  40. 'memCached' : 'system.mem.cached',
  41. 'memBuffers' : 'system.mem.buffered',
  42. 'memShared' : 'system.mem.shared',
  43. 'memPhysUsable': 'system.mem.usable',
  44. 'memSwapUsed' : 'Used Swap',
  45. 'memSwapFree' : 'Available Swap',
  46. 'memSwapTotal': 'system.swap.total',
  47. 'loadAvrg' : 'Load Averages 1',
  48. 'loadAvrg1' : 'Load Averages 1',
  49. 'loadAvrg5' : 'Load Averages 5',
  50. 'loadAvrg15' : 'Load Averages 15',
  51. 'nginxConnections' : 'nginx.net.connections',
  52. 'nginxReqPerSec' : 'nginx.net.request_per_s',
  53. 'nginxReading' : 'nginx.net.reading',
  54. 'nginxWriting' : 'nginx.net.writing',
  55. 'nginxWaiting' : 'nginx.net.waiting',
  56. 'mysqlConnections' : 'mysql.net.connections',
  57. 'mysqlCreatedTmpDiskTables' : 'mysql.performance.created_tmp_disk_tables',
  58. 'mysqlMaxUsedConnections' : 'mysql.net.max_connections',
  59. 'mysqlQueries' : 'mysql.performance.queries',
  60. 'mysqlQuestions' : 'mysql.performance.questions',
  61. 'mysqlOpenFiles' : 'mysql.performance.open_files',
  62. 'mysqlSlowQueries' : 'mysql.performance.slow_queries',
  63. 'mysqlTableLocksWaited' : 'mysql.performance.table_locks_waited',
  64. 'mysqlInnodbDataReads' : 'mysql.innodb.data_reads',
  65. 'mysqlInnodbDataWrites' : 'mysql.innodb.data_writes',
  66. 'mysqlInnodbOsLogFsyncs' : 'mysql.innodb.os_log_fsyncs',
  67. 'mysqlThreadsConnected' : 'mysql.performance.threads_connected',
  68. 'mysqlKernelTime' : 'mysql.performance.kernel_time',
  69. 'mysqlUserTime' : 'mysql.performance.user_time',
  70. 'mysqlSecondsBehindMaster' : 'mysql.replication.seconds_behind_master',
  71. 'apacheReqPerSec' : 'apache.net.request_per_s',
  72. 'apacheConnections' : 'apache.net.connections',
  73. 'apacheIdleWorkers' : 'apache.performance.idle_workers',
  74. 'apacheBusyWorkers' : 'apache.performance.busy_workers',
  75. 'apacheCPULoad' : 'apache.performance.cpu_load',
  76. 'apacheUptime' : 'apache.performance.uptime',
  77. 'apacheTotalBytes' : 'apache.net.bytes',
  78. 'apacheTotalAccesses' : 'apache.net.hits',
  79. 'apacheBytesPerSec' : 'apache.net.bytes_per_s',
  80. }
  81. # Comes along with the histogram series. Only min/avg/max are plotted.
  82. HISTOGRAM_IGNORE = [
  83. "count",
  84. "50percentile",
  85. "75percentile",
  86. "85percentile",
  87. "95percentile",
  88. "99percentile"
  89. ]
  90. # Ignored namespaces for agent and other Datadog software
  91. AGENT_IGNORE = [
  92. 'dd',
  93. 'app',
  94. 'events'
  95. ]
  96. # Check if using old version of Python. Pup's usage of defaultdict requires 2.5 or later,
  97. # and tornado only supports 2.5 or later. The agent supports 2.6 onwards it seems.
  98. if int(sys.version_info[1]) <= 5:
  99. sys.stderr.write("Pup requires python 2.6 or later.\n")
  100. sys.exit(2)
  101. metrics = defaultdict(lambda : defaultdict(list))
  102. listeners = {}
  103. port = 17125
  104. def is_number(n):
  105. try:
  106. float(n)
  107. return True
  108. except:
  109. return False
  110. def is_histogram(s):
  111. split = s['metric'].rsplit('.')
  112. if len(split) > 1:
  113. if split[-1] not in HISTOGRAM_IGNORE:
  114. return True
  115. return False
  116. def flush(message):
  117. for listener in listeners:
  118. listener.write_message(message)
  119. def send_metrics():
  120. if metrics == {}:
  121. flush(dict({"Waiting":1}))
  122. else: flush(metrics)
  123. metrics.clear()
  124. def update(series):
  125. """ Updates statsd metrics from POST to /api/v1/series """
  126. for s in series:
  127. tags = s['tags']
  128. split_metric_name = s['metric'].split(".")
  129. if is_histogram(s):
  130. # split everything
  131. namespace = split_metric_name[0]
  132. if namespace in AGENT_IGNORE:
  133. continue
  134. metric_name = ".".join(split_metric_name[0:-1])
  135. stack_name = split_metric_name[-1]
  136. values = s['points']
  137. metrics[metric_name]['points'].append({ "stackName" : stack_name, "values" : values })
  138. metrics[metric_name]['type'] = "histogram"
  139. metrics[metric_name]['tags'] = tags
  140. metrics[metric_name]['freq'] = 15
  141. else:
  142. if split_metric_name[-1] in HISTOGRAM_IGNORE:
  143. continue
  144. metric_name = s['metric']
  145. points = s['points']
  146. metrics[metric_name] = {"points" : points, "type" : "line", "tags" : tags, "freq" : 15}
  147. def agent_update(payload):
  148. """ Updates system metrics from POST to /intake """
  149. for p in payload:
  150. timestamp = payload['collection_timestamp']
  151. if (is_number(payload[p])) and p not in ['collection_timestamp', 'networkTraffic']:
  152. metric = AGENT_TRANSLATION.get(p, p)
  153. metrics[metric] = {"points" : [[timestamp, float(payload[p])]], "type" : "gauge", "freq" : 20}
  154. class MainHandler(tornado.web.RequestHandler):
  155. def get(self):
  156. self.render("pup.html",
  157. title="Pup",
  158. port=port)
  159. class PostHandler(tornado.web.RequestHandler):
  160. def post(self):
  161. try:
  162. body = json.loads(self.request.body)
  163. series = body['series']
  164. except:
  165. #log.exception("Error parsing the POST request body")
  166. return
  167. update(series)
  168. class AgentPostHandler(tornado.web.RequestHandler):
  169. def post(self):
  170. try:
  171. payload = json.loads(zlib.decompress(self.request.body))
  172. except:
  173. #log.exception("Error parsing the agent's POST request body")
  174. return
  175. agent_update(payload)
  176. class PupSocket(websocket.WebSocketHandler):
  177. def open(self):
  178. metrics = {}
  179. listeners[self] = self
  180. def on_message(self):
  181. pass
  182. def on_close(self):
  183. del listeners[self]
  184. settings = {
  185. "static_path": os.path.join(os.path.dirname(__file__), "static"),
  186. "cookie_secret": "61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
  187. "xsrf_cookies": True,
  188. }
  189. application = tornado.web.Application([
  190. (r"/", MainHandler),
  191. (r"/(.*\..*$)", tornado.web.StaticFileHandler,
  192. dict(path=settings['static_path'])),
  193. (r"/pupsocket", PupSocket),
  194. (r"/api/v1/series?", PostHandler),
  195. (r"/intake", AgentPostHandler),
  196. ])
  197. def run_pup(config):
  198. """ Run the pup server. """
  199. global port
  200. port = config.get('pup_port', 17125)
  201. if config.get('non_local_traffic', False) is True:
  202. application.listen(port)
  203. else:
  204. # localhost in lieu of 127.0.0.1 allows for ipv6
  205. application.listen(port, address="localhost")
  206. interval_ms = 2000
  207. io_loop = ioloop.IOLoop.instance()
  208. scheduler = ioloop.PeriodicCallback(send_metrics, interval_ms, io_loop=io_loop)
  209. scheduler.start()
  210. io_loop.start()
  211. def main():
  212. """ Parses arguments and starts Pup server """
  213. c = get_config(parse_args=False)
  214. is_enabled = c['use_pup']
  215. if is_enabled:
  216. log.info("Starting pup")
  217. run_pup(c)
  218. else:
  219. log.info("Pup is disabled. Exiting")
  220. # We're exiting purposefully, so exit with zero (supervisor's expected
  221. # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly
  222. # and thus can exit cleanly.
  223. time.sleep(4)
  224. sys.exit(0)
  225. if __name__ == "__main__":
  226. main()