PageRenderTime 40ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/agent/apps/health/td_monitor.py

https://github.com/jsonkey/neo
Python | 125 lines | 78 code | 19 blank | 28 comment | 22 complexity | 816f86da87669d9640504df6487bf412 MD5 | raw file
  1. #!/usr/bin/python -u
  2. #-*- coding: UTF-8 -*-
  3. #vim:sw=4:tw=4:ts=4:ai:expandtab
  4. ##==========================================================================
  5. ##
  6. ## Copyright (c) Tudou Inc. All Rights Reserved.
  7. ##
  8. ##--------------------------------------------------------------------------
  9. ##
  10. ## File: $Workfile: td_zombie.py$
  11. ## $Revision: 1$
  12. ##
  13. ## Last Update: $2009-3-25 19:57$
  14. ##
  15. ##--------------------------------------------------------------------------
  16. ##
  17. import os
  18. import re
  19. import sys
  20. import time
  21. import commands
  22. import traceback
  23. from signal import SIGTERM
  24. #
  25. curpath=os.path.normpath(os.path.join(os.getcwd(),os.path.dirname(__file__)))
  26. def trace_back():
  27. return traceback.print_exc()
  28. class Monitor:
  29. """monitor Process.
  30. """
  31. def __init__(self, timeout=6*60*60):
  32. self.timeout =6*60*60
  33. self.men =60
  34. self.mzombieName=("mencoder", "ffmpeg", "mplayer", "lame", "mp4creator", "yamdi", "codewav", "codevideo")
  35. self.kill_pid =[]
  36. def time2sec(self, mytime) :
  37. #time must be 03:34:33.2 format
  38. m = mytime.split(':')
  39. if m[0].find("-") >= 0:
  40. tmp=m[0].split("-")
  41. m[0]=int(tmp[0])*24*60*60 + int(tmp[1])*60*60
  42. else:
  43. m[0]=int(m[0])*60*60
  44. return int((m[0]) + int(m[1]) * 60 + float(m[2]))
  45. def kill(self, pid):
  46. # Try killing the daemon process
  47. try:
  48. os.system("kill -9 %s"%pid)
  49. except OSError, err:
  50. print str(err)
  51. return False
  52. def filter_need(self, program=None):
  53. if not program:
  54. return False
  55. if re.search(r"[a-zA-Z]+", program).group().lower() in self.mzombieName:
  56. return True
  57. else:
  58. return False
  59. def filter_timeout(self, pid=None, timestr=None):
  60. if not timestr:
  61. return
  62. t = self.time2sec(timestr)
  63. if t >= self.timeout:
  64. return pid
  65. def filter_men(self, pid=None, men=None):
  66. if not men:
  67. return
  68. if float(men) >= self.men:
  69. return pid
  70. def filter_zombie(self, pid=None, ppid=None):
  71. if not ppid:
  72. return
  73. if int(ppid) == 1:
  74. return pid
  75. def get_zombie(self):
  76. #28236 28746 mps python 00:00:00 0.3
  77. #28746 28756 mps python 00:00:00 0.2
  78. #18572 29604 mps ps 00:00:00 0.0
  79. process_list = commands.getoutput("ps -eo ppid,pid,user,comm,time,%mem | grep -v grep").split("\n")
  80. process_list.pop(0)
  81. for i in process_list:
  82. i = i.strip().split()
  83. ppid, pid, user, program, timestr, men = i[0], i[1], i[2], i[3], i[4], i[5]
  84. if not self.filter_need(program):
  85. continue
  86. print "i :", i
  87. # ok, need to filter.
  88. if self.filter_men(pid, men) or self.filter_timeout(pid, timestr) or self.filter_zombie(pid, ppid):
  89. self.kill_pid.append(pid)
  90. print self.kill_pid
  91. return True
  92. #checks the return value of an API function printing error information on
  93. #failure. usage checksc(funcname,sc)
  94. def checkcode(self, func, code):
  95. if code:
  96. print "%s ok: code=%s"%(func, code)
  97. else:
  98. print "%s fail: code=%s" % (func, code)
  99. raise Exception(func + code)
  100. def run(self):
  101. self.get_zombie()
  102. [self.kill(e) for e in self.kill_pid]
  103. if __name__ == '__main__' :
  104. #monitor()
  105. monitor=Monitor()
  106. monitor.run()