/lib/galaxy/util/heartbeat.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 182 lines · 150 code · 14 blank · 18 comment · 19 complexity · d7b9758c1e6462156214b555c70f240d MD5 · raw file

  1. # Attempt to load threadframe module, and only define Heartbeat class
  2. # if available
  3. try:
  4. import pkg_resources
  5. pkg_resources.require( "threadframe" )
  6. except:
  7. import sys
  8. print >> sys.stderr, "No threadframe module, Heartbeat not available"
  9. Heartbeat = None
  10. else:
  11. import threading
  12. import threadframe
  13. import time
  14. import traceback
  15. import os
  16. import sys
  17. def get_current_thread_object_dict():
  18. """
  19. Get a dictionary of all 'Thread' objects created via the threading
  20. module keyed by thread_id. Note that not all interpreter threads
  21. have a thread objects, only the main thread and any created via the
  22. 'threading' module. Threads created via the low level 'thread' module
  23. will not be in the returned dictionary.
  24. HACK: This mucks with the internals of the threading module since that
  25. module does not expose any way to match 'Thread' objects with
  26. intepreter thread identifiers (though it should).
  27. """
  28. rval = dict()
  29. # Acquire the lock and then union the contents of 'active' and 'limbo'
  30. # threads into the return value.
  31. threading._active_limbo_lock.acquire()
  32. rval.update( threading._active )
  33. rval.update( threading._limbo )
  34. threading._active_limbo_lock.release()
  35. return rval
  36. class Heartbeat( threading.Thread ):
  37. """
  38. Thread that periodically dumps the state of all threads to a file using
  39. the `threadframe` extension
  40. """
  41. def __init__( self, name="Heartbeat Thread", period=20, fname="heartbeat.log" ):
  42. threading.Thread.__init__( self, name=name )
  43. self.should_stop = False
  44. self.period = period
  45. self.fname = fname
  46. self.file = None
  47. self.fname_nonsleeping = fname + ".nonsleeping"
  48. self.file_nonsleeping = None
  49. self.nonsleeping_heartbeats = { }
  50. # Save process id
  51. self.pid = os.getpid()
  52. # Event to wait on when sleeping, allows us to interrupt for shutdown
  53. self.wait_event = threading.Event()
  54. def run( self ):
  55. self.file = open( self.fname, "a" )
  56. print >> self.file, "Heartbeat for pid %d thread started at %s" % ( self.pid, time.asctime() )
  57. print >> self.file
  58. self.file_nonsleeping = open ( self.fname_nonsleeping, "a" )
  59. print >> self.file_nonsleeping, "Non-Sleeping-threads for pid %d thread started at %s" % ( self.pid, time.asctime() )
  60. print >> self.file_nonsleeping
  61. try:
  62. while not self.should_stop:
  63. # Print separator with timestamp
  64. print >> self.file, "Traceback dump for all threads at %s:" % time.asctime()
  65. print >> self.file
  66. # Print the thread states
  67. threads = get_current_thread_object_dict()
  68. for thread_id, frame in threadframe.dict().iteritems():
  69. if thread_id in threads:
  70. object = repr( threads[thread_id] )
  71. else:
  72. object = "<No Thread object>"
  73. print >> self.file, "Thread %s, %s:" % ( thread_id, object )
  74. print >> self.file
  75. traceback.print_stack( frame, file=self.file )
  76. print >> self.file
  77. print >> self.file, "End dump"
  78. print >> self.file
  79. self.file.flush()
  80. self.print_nonsleeping(threads)
  81. # Sleep for a bit
  82. self.wait_event.wait( self.period )
  83. finally:
  84. print >> self.file, "Heartbeat for pid %d thread stopped at %s" % ( self.pid, time.asctime() )
  85. print >> self.file
  86. # Cleanup
  87. self.file.close()
  88. self.file_nonsleeping.close()
  89. def shutdown( self ):
  90. self.should_stop = True
  91. self.wait_event.set()
  92. self.join()
  93. def thread_is_sleeping ( self, last_stack_frame ):
  94. """
  95. Returns True if the given stack-frame represents a known
  96. sleeper function (at least in python 2.5)
  97. """
  98. _filename = last_stack_frame[0]
  99. _line = last_stack_frame[1]
  100. _funcname = last_stack_frame[2]
  101. _text = last_stack_frame[3]
  102. ### Ugly hack to tell if a thread is supposedly sleeping or not
  103. ### These are the most common sleeping functions I've found.
  104. ### Is there a better way? (python interpreter internals?)
  105. ### Tested only with python 2.5
  106. if _funcname=="wait" and _text=="waiter.acquire()":
  107. return True
  108. if _funcname=="wait" and _text=="_sleep(delay)":
  109. return True
  110. if _funcname=="accept" and _text[-14:]=="_sock.accept()":
  111. return True
  112. #Ugly hack: always skip the heartbeat thread
  113. #TODO: get the current thread-id in python
  114. # skip heartbeat thread by thread-id, not by filename
  115. if _filename.find("/lib/galaxy/util/heartbeat.py")!=-1:
  116. return True
  117. ## By default, assume the thread is not sleeping
  118. return False
  119. def get_interesting_stack_frame ( self, stack_frames ):
  120. """
  121. Scans a given backtrace stack frames, returns a single
  122. quadraple of [filename, line, function-name, text] of
  123. the single, deepest, most interesting frame.
  124. Interesting being::
  125. inside the galaxy source code ("/lib/galaxy"),
  126. prefreably not an egg.
  127. """
  128. for _filename, _line, _funcname, _text in reversed(stack_frames):
  129. idx = _filename.find("/lib/galaxy/")
  130. if idx!=-1:
  131. relative_filename = _filename[idx:]
  132. return ( relative_filename, _line, _funcname, _text )
  133. # no "/lib/galaxy" code found, return the innermost frame
  134. return stack_frames[-1]
  135. def print_nonsleeping( self, threads_object_dict ):
  136. print >> self.file_nonsleeping, "Non-Sleeping threads at %s:" % time.asctime()
  137. print >> self.file_nonsleeping
  138. all_threads_are_sleeping = True
  139. threads = get_current_thread_object_dict()
  140. for thread_id, frame in threadframe.dict().iteritems():
  141. if thread_id in threads:
  142. object = repr( threads[thread_id] )
  143. else:
  144. object = "<No Thread object>"
  145. tb = traceback.extract_stack(frame)
  146. if self.thread_is_sleeping(tb[-1]):
  147. if thread_id in self.nonsleeping_heartbeats:
  148. del self.nonsleeping_heartbeats[thread_id]
  149. continue
  150. # Count non-sleeping thread heartbeats
  151. if thread_id in self.nonsleeping_heartbeats:
  152. self.nonsleeping_heartbeats[thread_id] += 1
  153. else:
  154. self.nonsleeping_heartbeats[thread_id]=1
  155. good_frame = self.get_interesting_stack_frame(tb)
  156. print >> self.file_nonsleeping, "Thread %s\t%s\tnon-sleeping for %d heartbeat(s)\n File %s:%d\n Function \"%s\"\n %s" % \
  157. ( thread_id, object, self.nonsleeping_heartbeats[thread_id], good_frame[0], good_frame[1], good_frame[2], good_frame[3] )
  158. all_threads_are_sleeping = False
  159. if all_threads_are_sleeping:
  160. print >> self.file_nonsleeping, "All threads are sleeping."
  161. print >> self.file_nonsleeping
  162. self.file_nonsleeping.flush()