PageRenderTime 138ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/scripts/cros_list_buildbot_crashes.py

https://gitlab.com/github-cloud-corporation/chromite
Python | 261 lines | 233 code | 13 blank | 15 comment | 7 complexity | 675a1683a7892b8422a582cb1ee24e64 MD5 | raw file
  1. #!/usr/bin/python
  2. # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Script for listing top buildbot crashes."""
  6. from __future__ import print_function
  7. import collections
  8. import contextlib
  9. import datetime
  10. import multiprocessing
  11. import logging
  12. import optparse
  13. import os
  14. import re
  15. import sys
  16. from chromite.buildbot import cbuildbot_config
  17. from chromite.buildbot import constants
  18. from chromite.buildbot import manifest_version
  19. from chromite.lib import cros_build_lib
  20. from chromite.lib import parallel
  21. def ConvertGoogleStorageURLToHttpURL(url):
  22. return url.replace('gs://', 'http://sandbox.google.com/storage/')
  23. class CrashTriager(object):
  24. CRASH_PATTERN = re.compile(r'/([^/.]*)\.(\d+)[^/]*\.dmp\.txt$')
  25. STACK_TRACE_PATTERN = re.compile(r'Thread 0 ((?:[^\n]+\n)*)')
  26. FUNCTION_PATTERN = re.compile(r'\S+!\S+')
  27. def __init__(self, start_date, chrome_branch, all_programs, list_all, jobs):
  28. self.start_date = start_date
  29. self.chrome_branch = chrome_branch
  30. self.crash_triage_queue = multiprocessing.Queue()
  31. self.stack_trace_queue = multiprocessing.Queue()
  32. self.stack_traces = collections.defaultdict(list)
  33. self.all_programs = all_programs
  34. self.list_all = list_all
  35. self.jobs = jobs
  36. def Run(self):
  37. """Run the crash triager, printing the most common stack traces."""
  38. with self._PrintStackTracesInBackground():
  39. with self._DownloadCrashesInBackground():
  40. with self._ProcessCrashListInBackground():
  41. pass
  42. def _GetGSPath(self, bot_id, build_config):
  43. """Get the Google Storage path where crashes are stored for a given bot.
  44. Args:
  45. bot_id: Gather crashes from this bot id.
  46. build_config: Configuration options for this bot.
  47. """
  48. if build_config['gs_path'] == cbuildbot_config.GS_PATH_DEFAULT:
  49. gsutil_archive = 'gs://chromeos-image-archive/' + bot_id
  50. else:
  51. gsutil_archive = build_config['gs_path']
  52. return gsutil_archive
  53. def _ListCrashesForBot(self, bot_id, build_config):
  54. """List all crashes for the specified bot.
  55. Example output line: [
  56. 'gs://chromeos-image-archive/amd64-generic-full/R18-1414.0.0-a1-b537/' +
  57. 'chrome.20111207.181520.2533.dmp.txt'
  58. ]
  59. Args:
  60. bot_id: Gather crashes from this bot id.
  61. build_config: Configuration options for this bot.
  62. """
  63. chrome_branch = self.chrome_branch
  64. gsutil_archive = self._GetGSPath(bot_id, build_config)
  65. pattern = '%s/R%s-**.dmp.txt' % (gsutil_archive, chrome_branch)
  66. out = cros_build_lib.RunCommand(['gsutil', 'ls', pattern],
  67. error_code_ok=True,
  68. redirect_stdout=True,
  69. redirect_stderr=True,
  70. print_cmd=False)
  71. if out.returncode == 0:
  72. return out.output.split('\n')
  73. return []
  74. def _ProcessCrashListForBot(self, bot_id, build_config):
  75. """Process crashes for a given bot.
  76. Args:
  77. bot_id: Gather crashes from this bot id.
  78. build_config: Configuration options for this bot.
  79. """
  80. for line in self._ListCrashesForBot(bot_id, build_config):
  81. m = self.CRASH_PATTERN.search(line)
  82. if m is None: continue
  83. program, crash_date = m.groups()
  84. if self.all_programs or program == 'chrome':
  85. crash_date_obj = datetime.datetime.strptime(crash_date, '%Y%m%d')
  86. if self.start_date <= crash_date_obj:
  87. self.crash_triage_queue.put((program, crash_date, line))
  88. @contextlib.contextmanager
  89. def _ProcessCrashListInBackground(self):
  90. """Create a worker process for processing crash lists."""
  91. with parallel.BackgroundTaskRunner(self._ProcessCrashListForBot,
  92. processes=self.jobs) as queue:
  93. for bot_id, build_config in cbuildbot_config.config.iteritems():
  94. if build_config['vm_tests']:
  95. queue.put((bot_id, build_config))
  96. yield
  97. def _GetStackTrace(self, crash_report_url):
  98. """Retrieve a stack trace using gsutil cat.
  99. Args:
  100. crash_report_url: The URL where the crash is stored.
  101. """
  102. out = cros_build_lib.RunCommand(['gsutil', 'cat', crash_report_url],
  103. error_code_ok=True,
  104. redirect_stdout=True,
  105. redirect_stderr=True,
  106. print_cmd=False)
  107. return out
  108. def _DownloadStackTrace(self, program, crash_date, url):
  109. """Download a crash report, queuing up the stack trace info.
  110. Args:
  111. program: The program that crashed.
  112. crash_date: The date of the crash.
  113. url: The URL where the crash is stored.
  114. """
  115. out = self._GetStackTrace(url)
  116. if out.returncode == 0:
  117. self.stack_trace_queue.put((program, crash_date, url, out.output))
  118. @contextlib.contextmanager
  119. def _DownloadCrashesInBackground(self):
  120. """Create a worker process for downloading stack traces."""
  121. with parallel.BackgroundTaskRunner(self._DownloadStackTrace,
  122. queue=self.crash_triage_queue,
  123. processes=self.jobs):
  124. yield
  125. def _ProcessStackTrace(self, program, date, url, output):
  126. """Process a stack trace that has been downloaded.
  127. Args:
  128. program: The program that crashed.
  129. date: The date of the crash.
  130. url: The URL where the crash is stored.
  131. output: The content of the stack trace.
  132. """
  133. signature = 'uncategorized'
  134. m = self.STACK_TRACE_PATTERN.search(output)
  135. functions = []
  136. if m:
  137. trace = m.group(1)
  138. functions = self.FUNCTION_PATTERN.findall(trace)
  139. last_function = None
  140. for f in functions:
  141. if not f.startswith('libc-'):
  142. signature = f
  143. if last_function:
  144. signature += '[%s]' % last_function
  145. break
  146. last_function = f.partition('!')[2]
  147. else:
  148. if functions:
  149. signature = functions[0]
  150. stack_len = len(functions)
  151. self.stack_traces[(program, signature)].append((date, stack_len, url))
  152. def _PrintStackTraces(self):
  153. """Print all stack traces."""
  154. # Print header.
  155. if self.list_all:
  156. print('Crash count, program, function, date, URL')
  157. else:
  158. print('Crash count, program, function, first crash, last crash, URL')
  159. # Print details about stack traces.
  160. stack_traces = sorted(self.stack_traces.iteritems(),
  161. key=lambda x: len(x[1]), reverse=True)
  162. for (program, signature), crashes in stack_traces:
  163. if self.list_all:
  164. for crash in sorted(crashes, reverse=True):
  165. crash_url = ConvertGoogleStorageURLToHttpURL(crash[2])
  166. output = (str(len(crashes)), program, signature, crash[0], crash_url)
  167. print(*output, sep=', ')
  168. else:
  169. first_date = min(x[0] for x in crashes)
  170. last_date = max(x[0] for x in crashes)
  171. crash_url = ConvertGoogleStorageURLToHttpURL(max(crashes)[2])
  172. output = (str(len(crashes)), program, signature, first_date, last_date,
  173. crash_url)
  174. print(*output, sep=', ')
  175. @contextlib.contextmanager
  176. def _PrintStackTracesInBackground(self):
  177. with parallel.BackgroundTaskRunner(self._ProcessStackTrace,
  178. queue=self.stack_trace_queue,
  179. processes=1,
  180. onexit=self._PrintStackTraces):
  181. yield
  182. def _GetChromeBranch():
  183. """Get the current Chrome branch."""
  184. version_file = os.path.join(constants.SOURCE_ROOT, constants.VERSION_FILE)
  185. version_info = manifest_version.VersionInfo(version_file=version_file)
  186. return version_info.chrome_branch
  187. def _CreateParser():
  188. """Generate and return the parser with all the options."""
  189. # Parse options
  190. usage = 'usage: %prog [options]'
  191. parser = optparse.OptionParser(usage=usage)
  192. # Main options
  193. parser.add_option('', '--days', dest='days', default=7, type='int',
  194. help=('Number of days to look at for crash info.'))
  195. parser.add_option('', '--chrome_branch', dest='chrome_branch',
  196. default=_GetChromeBranch(),
  197. help=('Chrome branch to look at for crash info.'))
  198. parser.add_option('', '--all_programs', action='store_true',
  199. dest='all_programs', default=False,
  200. help=('Show crashes in programs other than Chrome.'))
  201. parser.add_option('', '--list', action='store_true', dest='list_all',
  202. default=False,
  203. help=('List all stack traces found (not just one).'))
  204. parser.add_option('', '--jobs', dest='jobs', default=32, type='int',
  205. help=('Number of processes to run in parallel.'))
  206. return parser
  207. def main(argv):
  208. # Setup boto config for gsutil.
  209. boto_config = os.path.abspath(os.path.join(constants.SOURCE_ROOT,
  210. 'src/private-overlays/chromeos-overlay/googlestorage_account.boto'))
  211. if os.path.isfile(boto_config):
  212. os.environ['BOTO_CONFIG'] = boto_config
  213. else:
  214. print('Cannot find %s' % boto_config, file=sys.stderr)
  215. print('This function requires a private checkout.', file=sys.stderr)
  216. print('See http://goto/chromeos-building', file=sys.stderr)
  217. sys.exit(1)
  218. logging.disable(level=logging.INFO)
  219. parser = _CreateParser()
  220. (options, _) = parser.parse_args(argv)
  221. since = datetime.datetime.today() - datetime.timedelta(days=options.days)
  222. triager = CrashTriager(since, options.chrome_branch, options.all_programs,
  223. options.list_all, options.jobs)
  224. triager.Run()