PageRenderTime 1901ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/socorro/processor/breakpad_transform_rules.py

https://github.com/lauraxt/socorro
Python | 306 lines | 275 code | 14 blank | 17 comment | 4 complexity | 99d9ca055b4e7e33f48866645e0fffd5 MD5 | raw file
  1. import re
  2. import os
  3. import subprocess
  4. import threading
  5. import ujson
  6. from contextlib import contextmanager, closing
  7. from collections import Mapping
  8. from configman import Namespace
  9. from socorro.lib.util import DotDict
  10. from socorro.lib.transform_rules import Rule
  11. #------------------------------------------------------------------------------
  12. def _create_symbol_path_str(input_str):
  13. symbols_sans_commas = input_str.replace(',', ' ')
  14. quoted_symbols_list = ['"%s"' % x.strip()
  15. for x in symbols_sans_commas.split()]
  16. return ' '.join(quoted_symbols_list)
  17. #==============================================================================
  18. class BreakpadStackwalkerRule(Rule):
  19. required_config = Namespace()
  20. required_config.add_option(
  21. 'dump_field',
  22. doc='the default name of a dump',
  23. default='upload_file_minidump',
  24. )
  25. required_config.add_option(
  26. 'stackwalk_command_line',
  27. doc='the template for the command to invoke stackwalker',
  28. default=(
  29. 'timeout -s KILL 30 $minidump_stackwalk_pathname '
  30. '--raw-json $rawfilePathname $dumpfilePathname '
  31. '$processor_symbols_pathname_list 2>/dev/null'
  32. ),
  33. )
  34. required_config.add_option(
  35. 'minidump_stackwalk_pathname',
  36. doc='the full pathname to the external program stackwalker '
  37. '(quote path with embedded spaces)',
  38. default='/data/socorro/stackwalk/bin/stackwalker',
  39. )
  40. required_config.add_option(
  41. 'symbol_cache_path',
  42. doc='the path where the symbol cache is found (quote path with '
  43. 'embedded spaces)',
  44. default='/mnt/socorro/symbols',
  45. )
  46. required_config.add_option(
  47. 'processor_symbols_pathname_list',
  48. doc='comma or space separated list of symbol files for '
  49. 'minidump_stackwalk (quote paths with embedded spaces)',
  50. default='/mnt/socorro/symbols/symbols_ffx,'
  51. '/mnt/socorro/symbols/symbols_sea,'
  52. '/mnt/socorro/symbols/symbols_tbrd,'
  53. '/mnt/socorro/symbols/symbols_sbrd,'
  54. '/mnt/socorro/symbols/symbols_os',
  55. from_string_converter=_create_symbol_path_str
  56. )
  57. required_config.add_option(
  58. 'temporary_file_system_storage_path',
  59. doc='a path where temporary files may be written',
  60. default='/tmp',
  61. )
  62. #--------------------------------------------------------------------------
  63. def __init__(self, config):
  64. super(BreakpadStackwalkerRule, self).__init__(config)
  65. # the code in this section originally hales from 2008 ExternalProcessor
  66. # class. It defines the template subsitution syntax used to spcecify
  67. # the shell command used to invoke the minidump stackwalker program.
  68. # The syntax was was requested to be of a Perl/shell style rather than
  69. # the original Pythonic syntax. This code takes that foreign syntax
  70. # and converts it to a Pythonic syntax for later use.
  71. strip_parens_re = re.compile(r'\$(\()(\w+)(\))')
  72. convert_to_python_substitution_format_re = re.compile(r'\$(\w+)')
  73. # Canonical form of $(param) is $param. Convert any that are needed
  74. tmp = strip_parens_re.sub(
  75. r'$\2',
  76. config.stackwalk_command_line
  77. )
  78. # Convert canonical $dumpfilePathname and $rawfilePathname
  79. tmp = tmp.replace('$dumpfilePathname', 'DUMPFILEPATHNAME')
  80. tmp = tmp.replace('$rawfilePathname', 'RAWFILEPATHNAME')
  81. # finally, convert any remaining $param to pythonic %(param)s
  82. tmp = convert_to_python_substitution_format_re.sub(r'%(\1)s', tmp)
  83. self.mdsw_command_line = tmp % config
  84. #--------------------------------------------------------------------------
  85. def version(self):
  86. return '1.0'
  87. #--------------------------------------------------------------------------
  88. @contextmanager
  89. def _temp_raw_crash_json_file(self, raw_crash, crash_id):
  90. file_pathname = os.path.join(
  91. self.config.temporary_file_system_storage_path,
  92. "%s.%s.TEMPORARY.json" % (
  93. crash_id,
  94. threading.currentThread().getName()
  95. )
  96. )
  97. with open(file_pathname, "w") as f:
  98. ujson.dump(dict(raw_crash), f)
  99. try:
  100. yield file_pathname
  101. finally:
  102. os.unlink(file_pathname)
  103. #--------------------------------------------------------------------------
  104. @contextmanager
  105. def _temp_file_context(self, raw_dump_path):
  106. """this contextmanager implements conditionally deleting a pathname
  107. at the end of a context if the pathname indicates that it is a temp
  108. file by having the word 'TEMPORARY' embedded in it."""
  109. try:
  110. yield raw_dump_path
  111. finally:
  112. if 'TEMPORARY' in raw_dump_path:
  113. try:
  114. os.unlink(raw_dump_path)
  115. except OSError:
  116. self.config.logger.warning(
  117. 'unable to delete %s. manual deletion is required.',
  118. raw_dump_path,
  119. exc_info=True
  120. )
  121. #--------------------------------------------------------------------------
  122. def _invoke_minidump_stackwalk(
  123. self,
  124. dump_name,
  125. dump_pathname,
  126. raw_crash_pathname,
  127. processor_notes
  128. ):
  129. """ This function invokes breakpad_stackdump as an external process
  130. capturing and returning the text output of stdout. This version
  131. represses the stderr output.
  132. input parameters:
  133. dump_pathname: the complete pathname of the dumpfile to be
  134. analyzed
  135. """
  136. with self._temp_file_context(dump_pathname):
  137. command_line = self.mdsw_command_line.replace(
  138. "DUMPFILEPATHNAME",
  139. dump_pathname
  140. ).replace(
  141. "RAWFILEPATHNAME",
  142. raw_crash_pathname
  143. )
  144. if self.config.chatty:
  145. self.config.logger.debug(
  146. "BreakpadStackwalkerRule: %s",
  147. command_line
  148. )
  149. subprocess_handle = subprocess.Popen(
  150. command_line,
  151. shell=True,
  152. stdout=subprocess.PIPE
  153. )
  154. with closing(subprocess_handle.stdout):
  155. try:
  156. stackwalker_output = ujson.load(subprocess_handle.stdout)
  157. except Exception, x:
  158. processor_notes.append(
  159. "MDSW output failed in json: %s" % x
  160. )
  161. stackwalker_output = {}
  162. return_code = subprocess_handle.wait()
  163. if not isinstance(stackwalker_output, Mapping):
  164. processor_notes.append(
  165. "MDSW produced unexpected output: %s..." %
  166. str(stackwalker_output)[:10]
  167. )
  168. stackwalker_output = {}
  169. stackwalker_data = DotDict()
  170. stackwalker_data.json_dump = stackwalker_output
  171. stackwalker_data.mdsw_return_code = return_code
  172. stackwalker_data.mdsw_status_string = stackwalker_output.get(
  173. 'status',
  174. 'unknown error'
  175. )
  176. stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK'
  177. if return_code == 124:
  178. processor_notes.append(
  179. "MDSW terminated with SIGKILL due to timeout"
  180. )
  181. elif return_code != 0 or not stackwalker_data.success:
  182. processor_notes.append(
  183. "MDSW failed on '%s': %s" % (
  184. dump_name,
  185. stackwalker_data.mdsw_status_string
  186. )
  187. )
  188. return stackwalker_data
  189. #--------------------------------------------------------------------------
  190. def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
  191. if 'additional_minidumps' not in processed_crash:
  192. processed_crash.additional_minidumps = []
  193. with self._temp_raw_crash_json_file(
  194. raw_crash,
  195. raw_crash.uuid
  196. ) as raw_crash_pathname:
  197. for dump_name, dump_pathname in raw_dumps.iteritems():
  198. if processor_meta.quit_check:
  199. processor_meta.quit_check()
  200. # this rule is only interested in dumps targeted for the
  201. # minidump stackwalker external program. As of the writing
  202. # of this code, there is one other dump type. The only way
  203. # to differentiate these dump types is by the name of the
  204. # dump. All minidumps targeted for the stackwalker will have
  205. # a name with a prefix specified in configuration:
  206. if not dump_name.startswith(self.config.dump_field):
  207. # dumps not intended for the stackwalker are ignored
  208. continue
  209. if self.config.chatty:
  210. self.config.logger.debug(
  211. "BreakpadStackwalkerRule: %s, %s",
  212. dump_name,
  213. dump_pathname
  214. )
  215. stackwalker_data = self._invoke_minidump_stackwalk(
  216. dump_name,
  217. dump_pathname,
  218. raw_crash_pathname,
  219. processor_meta.processor_notes
  220. )
  221. if dump_name == self.config.dump_field:
  222. processed_crash.update(stackwalker_data)
  223. else:
  224. processed_crash.additional_minidumps.append(dump_name)
  225. processed_crash[dump_name] = stackwalker_data
  226. return True
  227. #==============================================================================
  228. class CrashingThreadRule(Rule):
  229. #--------------------------------------------------------------------------
  230. def version(self):
  231. return '1.0'
  232. #--------------------------------------------------------------------------
  233. def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
  234. try:
  235. processed_crash.crashedThread = (
  236. processed_crash['json_dump']['crash_info']['crashing_thread']
  237. )
  238. except KeyError:
  239. processed_crash.crashedThread = None
  240. processor_meta.processor_notes.append(
  241. 'MDSW did not identify the crashing thread'
  242. )
  243. try:
  244. processed_crash.truncated = (
  245. processed_crash['json_dump']
  246. ['crashing_thread']['frames_truncated']
  247. )
  248. except KeyError:
  249. processed_crash.truncated = False
  250. try:
  251. processed_crash.address = (
  252. processed_crash['json_dump']
  253. ['crash_info']['address']
  254. )
  255. except KeyError:
  256. processed_crash.address = None
  257. try:
  258. processed_crash.reason = (
  259. processed_crash['json_dump']
  260. ['crash_info']['type']
  261. )
  262. except KeyError:
  263. processed_crash.reason = None
  264. return True