PageRenderTime 36ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/third_party/wpt_tools/wpt/tools/wptrunner/wptrunner/testrunner.py

https://github.com/chromium/chromium
Python | 946 lines | 745 code | 110 blank | 91 comment | 110 complexity | e4e022088b0ff73cbb55d8e0cdebb5ab MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, Apache-2.0, BSD-3-Clause
  1. # mypy: allow-untyped-defs
  2. import threading
  3. import traceback
  4. from queue import Empty
  5. from collections import namedtuple
  6. from mozlog import structuredlog, capture
  7. from . import mpcontext
  8. # Special value used as a sentinal in various commands
  9. Stop = object()
  10. def release_mozlog_lock():
  11. try:
  12. from mozlog.structuredlog import StructuredLogger
  13. try:
  14. StructuredLogger._lock.release()
  15. except threading.ThreadError:
  16. pass
  17. except ImportError:
  18. pass
  19. class LogMessageHandler:
  20. def __init__(self, send_message):
  21. self.send_message = send_message
  22. def __call__(self, data):
  23. self.send_message("log", data)
  24. class TestRunner:
  25. """Class implementing the main loop for running tests.
  26. This class delegates the job of actually running a test to the executor
  27. that is passed in.
  28. :param logger: Structured logger
  29. :param command_queue: subprocess.Queue used to send commands to the
  30. process
  31. :param result_queue: subprocess.Queue used to send results to the
  32. parent TestRunnerManager process
  33. :param executor: TestExecutor object that will actually run a test.
  34. """
  35. def __init__(self, logger, command_queue, result_queue, executor, recording):
  36. self.command_queue = command_queue
  37. self.result_queue = result_queue
  38. self.executor = executor
  39. self.name = mpcontext.get_context().current_process().name
  40. self.logger = logger
  41. self.recording = recording
  42. def __enter__(self):
  43. return self
  44. def __exit__(self, exc_type, exc_value, traceback):
  45. self.teardown()
  46. def setup(self):
  47. self.logger.debug("Executor setup")
  48. try:
  49. self.executor.setup(self)
  50. except Exception:
  51. # The caller is responsible for logging the exception if required
  52. self.send_message("init_failed")
  53. else:
  54. self.send_message("init_succeeded")
  55. self.logger.debug("Executor setup done")
  56. def teardown(self):
  57. self.executor.teardown()
  58. self.send_message("runner_teardown")
  59. self.result_queue = None
  60. self.command_queue = None
  61. self.browser = None
  62. def run(self):
  63. """Main loop accepting commands over the pipe and triggering
  64. the associated methods"""
  65. try:
  66. self.setup()
  67. except Exception:
  68. self.logger.warning("An error occured during executor setup:\n%s" %
  69. traceback.format_exc())
  70. raise
  71. commands = {"run_test": self.run_test,
  72. "reset": self.reset,
  73. "stop": self.stop,
  74. "wait": self.wait}
  75. while True:
  76. command, args = self.command_queue.get()
  77. try:
  78. rv = commands[command](*args)
  79. except Exception:
  80. self.send_message("error",
  81. "Error running command %s with arguments %r:\n%s" %
  82. (command, args, traceback.format_exc()))
  83. else:
  84. if rv is Stop:
  85. break
  86. def stop(self):
  87. return Stop
  88. def reset(self):
  89. self.executor.reset()
  90. def run_test(self, test):
  91. try:
  92. return self.executor.run_test(test)
  93. except Exception:
  94. self.logger.error(traceback.format_exc())
  95. raise
  96. def wait(self):
  97. rerun = self.executor.wait()
  98. self.send_message("wait_finished", rerun)
  99. def send_message(self, command, *args):
  100. self.result_queue.put((command, args))
  101. def start_runner(runner_command_queue, runner_result_queue,
  102. executor_cls, executor_kwargs,
  103. executor_browser_cls, executor_browser_kwargs,
  104. capture_stdio, stop_flag, recording):
  105. """Launch a TestRunner in a new process"""
  106. def send_message(command, *args):
  107. runner_result_queue.put((command, args))
  108. def handle_error(e):
  109. logger.critical(traceback.format_exc())
  110. stop_flag.set()
  111. # Ensure that when we start this in a new process we have the global lock
  112. # in the logging module unlocked
  113. release_mozlog_lock()
  114. proc_name = mpcontext.get_context().current_process().name
  115. logger = structuredlog.StructuredLogger(proc_name)
  116. logger.add_handler(LogMessageHandler(send_message))
  117. with capture.CaptureIO(logger, capture_stdio):
  118. try:
  119. browser = executor_browser_cls(**executor_browser_kwargs)
  120. executor = executor_cls(logger, browser, **executor_kwargs)
  121. with TestRunner(logger, runner_command_queue, runner_result_queue, executor, recording) as runner:
  122. try:
  123. runner.run()
  124. except KeyboardInterrupt:
  125. stop_flag.set()
  126. except Exception as e:
  127. handle_error(e)
  128. except Exception as e:
  129. handle_error(e)
  130. class BrowserManager:
  131. def __init__(self, logger, browser, command_queue, no_timeout=False):
  132. self.logger = logger
  133. self.browser = browser
  134. self.no_timeout = no_timeout
  135. self.browser_settings = None
  136. self.last_test = None
  137. self.started = False
  138. self.init_timer = None
  139. self.command_queue = command_queue
  140. def update_settings(self, test):
  141. browser_settings = self.browser.settings(test)
  142. restart_required = ((self.browser_settings is not None and
  143. self.browser_settings != browser_settings) or
  144. (self.last_test != test and test.expected() == "CRASH"))
  145. self.browser_settings = browser_settings
  146. self.last_test = test
  147. return restart_required
  148. def init(self, group_metadata):
  149. """Launch the browser that is being tested,
  150. and the TestRunner process that will run the tests."""
  151. # It seems that this lock is helpful to prevent some race that otherwise
  152. # sometimes stops the spawned processes initialising correctly, and
  153. # leaves this thread hung
  154. if self.init_timer is not None:
  155. self.init_timer.cancel()
  156. self.logger.debug("Init called, starting browser and runner")
  157. if not self.no_timeout:
  158. self.init_timer = threading.Timer(self.browser.init_timeout,
  159. self.init_timeout)
  160. try:
  161. if self.init_timer is not None:
  162. self.init_timer.start()
  163. self.logger.debug("Starting browser with settings %r" % self.browser_settings)
  164. self.browser.start(group_metadata=group_metadata, **self.browser_settings)
  165. self.browser_pid = self.browser.pid
  166. except Exception:
  167. self.logger.warning("Failure during init %s" % traceback.format_exc())
  168. if self.init_timer is not None:
  169. self.init_timer.cancel()
  170. self.logger.error(traceback.format_exc())
  171. succeeded = False
  172. else:
  173. succeeded = True
  174. self.started = True
  175. return succeeded
  176. def send_message(self, command, *args):
  177. self.command_queue.put((command, args))
  178. def init_timeout(self):
  179. # This is called from a separate thread, so we send a message to the
  180. # main loop so we get back onto the manager thread
  181. self.logger.debug("init_failed called from timer")
  182. self.send_message("init_failed")
  183. def after_init(self):
  184. """Callback when we have started the browser, started the remote
  185. control connection, and we are ready to start testing."""
  186. if self.init_timer is not None:
  187. self.init_timer.cancel()
  188. def stop(self, force=False):
  189. self.browser.stop(force=force)
  190. self.started = False
  191. def cleanup(self):
  192. if self.init_timer is not None:
  193. self.init_timer.cancel()
  194. def check_crash(self, test_id):
  195. return self.browser.check_crash(process=self.browser_pid, test=test_id)
  196. def is_alive(self):
  197. return self.browser.is_alive()
  198. class _RunnerManagerState:
  199. before_init = namedtuple("before_init", [])
  200. initializing = namedtuple("initializing",
  201. ["test", "test_group", "group_metadata", "failure_count"])
  202. running = namedtuple("running", ["test", "test_group", "group_metadata"])
  203. restarting = namedtuple("restarting", ["test", "test_group", "group_metadata", "force_stop"])
  204. error = namedtuple("error", [])
  205. stop = namedtuple("stop", ["force_stop"])
  206. RunnerManagerState = _RunnerManagerState()
  207. class TestRunnerManager(threading.Thread):
  208. def __init__(self, suite_name, index, test_type, test_queue, test_source_cls, browser_cls,
  209. browser_kwargs, executor_cls, executor_kwargs, stop_flag, rerun=1,
  210. pause_after_test=False, pause_on_unexpected=False, restart_on_unexpected=True,
  211. debug_info=None, capture_stdio=True, recording=None):
  212. """Thread that owns a single TestRunner process and any processes required
  213. by the TestRunner (e.g. the Firefox binary).
  214. TestRunnerManagers are responsible for launching the browser process and the
  215. runner process, and for logging the test progress. The actual test running
  216. is done by the TestRunner. In particular they:
  217. * Start the binary of the program under test
  218. * Start the TestRunner
  219. * Tell the TestRunner to start a test, if any
  220. * Log that the test started
  221. * Log the test results
  222. * Take any remedial action required e.g. restart crashed or hung
  223. processes
  224. """
  225. self.suite_name = suite_name
  226. self.test_source = test_source_cls(test_queue)
  227. self.manager_number = index
  228. self.test_type = test_type
  229. self.browser_cls = browser_cls
  230. self.browser_kwargs = browser_kwargs.copy()
  231. if self.browser_kwargs.get("device_serial"):
  232. # Assign Android device to runner according to current manager index
  233. self.browser_kwargs["device_serial"] = (
  234. self.browser_kwargs["device_serial"][index])
  235. self.executor_cls = executor_cls
  236. self.executor_kwargs = executor_kwargs
  237. mp = mpcontext.get_context()
  238. # Flags used to shut down this thread if we get a sigint
  239. self.parent_stop_flag = stop_flag
  240. self.child_stop_flag = mp.Event()
  241. self.rerun = rerun
  242. self.run_count = 0
  243. self.pause_after_test = pause_after_test
  244. self.pause_on_unexpected = pause_on_unexpected
  245. self.restart_on_unexpected = restart_on_unexpected
  246. self.debug_info = debug_info
  247. assert recording is not None
  248. self.recording = recording
  249. self.command_queue = mp.Queue()
  250. self.remote_queue = mp.Queue()
  251. self.test_runner_proc = None
  252. threading.Thread.__init__(self, name="TestRunnerManager-%s-%i" % (test_type, index))
  253. # This is started in the actual new thread
  254. self.logger = None
  255. self.test_count = 0
  256. self.unexpected_count = 0
  257. self.unexpected_pass_count = 0
  258. # This may not really be what we want
  259. self.daemon = True
  260. self.timer = None
  261. self.max_restarts = 5
  262. self.browser = None
  263. self.capture_stdio = capture_stdio
  264. def run(self):
  265. """Main loop for the TestRunnerManager.
  266. TestRunnerManagers generally receive commands from their
  267. TestRunner updating them on the status of a test. They
  268. may also have a stop flag set by the main thread indicating
  269. that the manager should shut down the next time the event loop
  270. spins."""
  271. self.recording.set(["testrunner", "startup"])
  272. self.logger = structuredlog.StructuredLogger(self.suite_name)
  273. with self.browser_cls(self.logger, remote_queue=self.command_queue,
  274. **self.browser_kwargs) as browser:
  275. self.browser = BrowserManager(self.logger,
  276. browser,
  277. self.command_queue,
  278. no_timeout=self.debug_info is not None)
  279. dispatch = {
  280. RunnerManagerState.before_init: self.start_init,
  281. RunnerManagerState.initializing: self.init,
  282. RunnerManagerState.running: self.run_test,
  283. RunnerManagerState.restarting: self.restart_runner,
  284. }
  285. self.state = RunnerManagerState.before_init()
  286. end_states = (RunnerManagerState.stop,
  287. RunnerManagerState.error)
  288. try:
  289. while not isinstance(self.state, end_states):
  290. f = dispatch.get(self.state.__class__)
  291. while f:
  292. self.logger.debug("Dispatch %s" % f.__name__)
  293. if self.should_stop():
  294. return
  295. new_state = f()
  296. if new_state is None:
  297. break
  298. self.state = new_state
  299. self.logger.debug("new state: %s" % self.state.__class__.__name__)
  300. if isinstance(self.state, end_states):
  301. return
  302. f = dispatch.get(self.state.__class__)
  303. new_state = None
  304. while new_state is None:
  305. new_state = self.wait_event()
  306. if self.should_stop():
  307. return
  308. self.state = new_state
  309. self.logger.debug("new state: %s" % self.state.__class__.__name__)
  310. except Exception:
  311. self.logger.error(traceback.format_exc())
  312. raise
  313. finally:
  314. self.logger.debug("TestRunnerManager main loop terminating, starting cleanup")
  315. force_stop = not isinstance(self.state, RunnerManagerState.stop) or self.state.force_stop
  316. self.stop_runner(force=force_stop)
  317. self.teardown()
  318. self.logger.debug("TestRunnerManager main loop terminated")
  319. def wait_event(self):
  320. dispatch = {
  321. RunnerManagerState.before_init: {},
  322. RunnerManagerState.initializing:
  323. {
  324. "init_succeeded": self.init_succeeded,
  325. "init_failed": self.init_failed,
  326. },
  327. RunnerManagerState.running:
  328. {
  329. "test_ended": self.test_ended,
  330. "wait_finished": self.wait_finished,
  331. },
  332. RunnerManagerState.restarting: {},
  333. RunnerManagerState.error: {},
  334. RunnerManagerState.stop: {},
  335. None: {
  336. "runner_teardown": self.runner_teardown,
  337. "log": self.log,
  338. "error": self.error
  339. }
  340. }
  341. try:
  342. command, data = self.command_queue.get(True, 1)
  343. self.logger.debug("Got command: %r" % command)
  344. except OSError:
  345. self.logger.error("Got IOError from poll")
  346. return RunnerManagerState.restarting(self.state.test,
  347. self.state.test_group,
  348. self.state.group_metadata,
  349. False)
  350. except Empty:
  351. if (self.debug_info and self.debug_info.interactive and
  352. self.browser.started and not self.browser.is_alive()):
  353. self.logger.debug("Debugger exited")
  354. return RunnerManagerState.stop(False)
  355. if (isinstance(self.state, RunnerManagerState.running) and
  356. not self.test_runner_proc.is_alive()):
  357. if not self.command_queue.empty():
  358. # We got a new message so process that
  359. return
  360. # If we got to here the runner presumably shut down
  361. # unexpectedly
  362. self.logger.info("Test runner process shut down")
  363. if self.state.test is not None:
  364. # This could happen if the test runner crashed for some other
  365. # reason
  366. # Need to consider the unlikely case where one test causes the
  367. # runner process to repeatedly die
  368. self.logger.critical("Last test did not complete")
  369. return RunnerManagerState.error()
  370. self.logger.warning("More tests found, but runner process died, restarting")
  371. return RunnerManagerState.restarting(self.state.test,
  372. self.state.test_group,
  373. self.state.group_metadata,
  374. False)
  375. else:
  376. f = (dispatch.get(self.state.__class__, {}).get(command) or
  377. dispatch.get(None, {}).get(command))
  378. if not f:
  379. self.logger.warning("Got command %s in state %s" %
  380. (command, self.state.__class__.__name__))
  381. return
  382. return f(*data)
  383. def should_stop(self):
  384. return self.child_stop_flag.is_set() or self.parent_stop_flag.is_set()
  385. def start_init(self):
  386. test, test_group, group_metadata = self.get_next_test()
  387. self.recording.set(["testrunner", "init"])
  388. if test is None:
  389. return RunnerManagerState.stop(True)
  390. else:
  391. return RunnerManagerState.initializing(test, test_group, group_metadata, 0)
  392. def init(self):
  393. assert isinstance(self.state, RunnerManagerState.initializing)
  394. if self.state.failure_count > self.max_restarts:
  395. self.logger.critical("Max restarts exceeded")
  396. return RunnerManagerState.error()
  397. self.browser.update_settings(self.state.test)
  398. result = self.browser.init(self.state.group_metadata)
  399. if result is Stop:
  400. return RunnerManagerState.error()
  401. elif not result:
  402. return RunnerManagerState.initializing(self.state.test,
  403. self.state.test_group,
  404. self.state.group_metadata,
  405. self.state.failure_count + 1)
  406. else:
  407. self.executor_kwargs["group_metadata"] = self.state.group_metadata
  408. self.executor_kwargs["browser_settings"] = self.browser.browser_settings
  409. self.start_test_runner()
  410. def start_test_runner(self):
  411. # Note that we need to be careful to start the browser before the
  412. # test runner to ensure that any state set when the browser is started
  413. # can be passed in to the test runner.
  414. assert isinstance(self.state, RunnerManagerState.initializing)
  415. assert self.command_queue is not None
  416. assert self.remote_queue is not None
  417. self.logger.info("Starting runner")
  418. executor_browser_cls, executor_browser_kwargs = self.browser.browser.executor_browser()
  419. args = (self.remote_queue,
  420. self.command_queue,
  421. self.executor_cls,
  422. self.executor_kwargs,
  423. executor_browser_cls,
  424. executor_browser_kwargs,
  425. self.capture_stdio,
  426. self.child_stop_flag,
  427. self.recording)
  428. mp = mpcontext.get_context()
  429. self.test_runner_proc = mp.Process(target=start_runner,
  430. args=args,
  431. name="TestRunner-%s-%i" % (
  432. self.test_type, self.manager_number))
  433. self.test_runner_proc.start()
  434. self.logger.debug("Test runner started")
  435. # Now we wait for either an init_succeeded event or an init_failed event
  436. def init_succeeded(self):
  437. assert isinstance(self.state, RunnerManagerState.initializing)
  438. self.browser.after_init()
  439. return RunnerManagerState.running(self.state.test,
  440. self.state.test_group,
  441. self.state.group_metadata)
  442. def init_failed(self):
  443. assert isinstance(self.state, RunnerManagerState.initializing)
  444. self.browser.check_crash(None)
  445. self.browser.after_init()
  446. self.stop_runner(force=True)
  447. return RunnerManagerState.initializing(self.state.test,
  448. self.state.test_group,
  449. self.state.group_metadata,
  450. self.state.failure_count + 1)
  451. def get_next_test(self, test_group=None):
  452. test = None
  453. while test is None:
  454. while test_group is None or len(test_group) == 0:
  455. test_group, group_metadata = self.test_source.group()
  456. if test_group is None:
  457. self.logger.info("No more tests")
  458. return None, None, None
  459. test = test_group.popleft()
  460. self.run_count = 0
  461. return test, test_group, group_metadata
  462. def run_test(self):
  463. assert isinstance(self.state, RunnerManagerState.running)
  464. assert self.state.test is not None
  465. if self.browser.update_settings(self.state.test):
  466. self.logger.info("Restarting browser for new test environment")
  467. return RunnerManagerState.restarting(self.state.test,
  468. self.state.test_group,
  469. self.state.group_metadata,
  470. False)
  471. self.recording.set(["testrunner", "test"] + self.state.test.id.split("/")[1:])
  472. self.logger.test_start(self.state.test.id)
  473. if self.rerun > 1:
  474. self.logger.info("Run %d/%d" % (self.run_count, self.rerun))
  475. self.send_message("reset")
  476. self.run_count += 1
  477. if self.debug_info is None:
  478. # Factor of 3 on the extra timeout here is based on allowing the executor
  479. # at least test.timeout + 2 * extra_timeout to complete,
  480. # which in turn is based on having several layers of timeout inside the executor
  481. wait_timeout = (self.state.test.timeout * self.executor_kwargs['timeout_multiplier'] +
  482. 3 * self.executor_cls.extra_timeout)
  483. self.timer = threading.Timer(wait_timeout, self._timeout)
  484. self.send_message("run_test", self.state.test)
  485. if self.timer:
  486. self.timer.start()
  487. def _timeout(self):
  488. # This is executed in a different thread (threading.Timer).
  489. self.logger.info("Got timeout in harness")
  490. test = self.state.test
  491. self.inject_message(
  492. "test_ended",
  493. test,
  494. (test.result_cls("EXTERNAL-TIMEOUT",
  495. "TestRunner hit external timeout "
  496. "(this may indicate a hang)"), []),
  497. )
  498. def test_ended(self, test, results):
  499. """Handle the end of a test.
  500. Output the result of each subtest, and the result of the overall
  501. harness to the logs.
  502. """
  503. if ((not isinstance(self.state, RunnerManagerState.running)) or
  504. (test != self.state.test)):
  505. # Due to inherent race conditions in EXTERNAL-TIMEOUT, we might
  506. # receive multiple test_ended for a test (e.g. from both Executor
  507. # and TestRunner), in which case we ignore the duplicate message.
  508. self.logger.error("Received unexpected test_ended for %s" % test)
  509. return
  510. if self.timer is not None:
  511. self.timer.cancel()
  512. # Write the result of each subtest
  513. file_result, test_results = results
  514. subtest_unexpected = False
  515. expect_any_subtest_status = test.expect_any_subtest_status()
  516. if expect_any_subtest_status:
  517. self.logger.debug("Ignoring subtest statuses for test %s" % test.id)
  518. for result in test_results:
  519. if test.disabled(result.name):
  520. continue
  521. if expect_any_subtest_status:
  522. expected = result.status
  523. else:
  524. expected = test.expected(result.name)
  525. known_intermittent = test.known_intermittent(result.name)
  526. is_unexpected = expected != result.status and result.status not in known_intermittent
  527. if is_unexpected:
  528. self.unexpected_count += 1
  529. self.logger.debug("Unexpected count in this thread %i" % self.unexpected_count)
  530. subtest_unexpected = True
  531. is_unexpected_pass = is_unexpected and result.status == "PASS"
  532. if is_unexpected_pass:
  533. self.unexpected_pass_count += 1
  534. self.logger.test_status(test.id,
  535. result.name,
  536. result.status,
  537. message=result.message,
  538. expected=expected,
  539. known_intermittent=known_intermittent,
  540. stack=result.stack)
  541. expected = test.expected()
  542. known_intermittent = test.known_intermittent()
  543. status = file_result.status
  544. if self.browser.check_crash(test.id) and status != "CRASH":
  545. if test.test_type == "crashtest" or status == "EXTERNAL-TIMEOUT":
  546. self.logger.info("Found a crash dump file; changing status to CRASH")
  547. status = "CRASH"
  548. else:
  549. self.logger.warning(f"Found a crash dump; should change status from {status} to CRASH but this causes instability")
  550. # We have a couple of status codes that are used internally, but not exposed to the
  551. # user. These are used to indicate that some possibly-broken state was reached
  552. # and we should restart the runner before the next test.
  553. # INTERNAL-ERROR indicates a Python exception was caught in the harness
  554. # EXTERNAL-TIMEOUT indicates we had to forcibly kill the browser from the harness
  555. # because the test didn't return a result after reaching the test-internal timeout
  556. status_subns = {"INTERNAL-ERROR": "ERROR",
  557. "EXTERNAL-TIMEOUT": "TIMEOUT"}
  558. status = status_subns.get(status, status)
  559. self.test_count += 1
  560. is_unexpected = expected != status and status not in known_intermittent
  561. if is_unexpected:
  562. self.unexpected_count += 1
  563. self.logger.debug("Unexpected count in this thread %i" % self.unexpected_count)
  564. is_unexpected_pass = is_unexpected and status == "OK"
  565. if is_unexpected_pass:
  566. self.unexpected_pass_count += 1
  567. if "assertion_count" in file_result.extra:
  568. assertion_count = file_result.extra["assertion_count"]
  569. if assertion_count is not None and assertion_count > 0:
  570. self.logger.assertion_count(test.id,
  571. int(assertion_count),
  572. test.min_assertion_count,
  573. test.max_assertion_count)
  574. file_result.extra["test_timeout"] = test.timeout * self.executor_kwargs['timeout_multiplier']
  575. self.logger.test_end(test.id,
  576. status,
  577. message=file_result.message,
  578. expected=expected,
  579. known_intermittent=known_intermittent,
  580. extra=file_result.extra,
  581. stack=file_result.stack)
  582. restart_before_next = (test.restart_after or
  583. file_result.status in ("CRASH", "EXTERNAL-TIMEOUT", "INTERNAL-ERROR") or
  584. ((subtest_unexpected or is_unexpected) and
  585. self.restart_on_unexpected))
  586. force_stop = test.test_type == "wdspec" and file_result.status == "EXTERNAL-TIMEOUT"
  587. self.recording.set(["testrunner", "after-test"])
  588. if (not file_result.status == "CRASH" and
  589. self.pause_after_test or
  590. (self.pause_on_unexpected and (subtest_unexpected or is_unexpected))):
  591. self.logger.info("Pausing until the browser exits")
  592. self.send_message("wait")
  593. else:
  594. return self.after_test_end(test, restart_before_next, force_stop=force_stop)
  595. def wait_finished(self, rerun=False):
  596. assert isinstance(self.state, RunnerManagerState.running)
  597. self.logger.debug("Wait finished")
  598. # The browser should be stopped already, but this ensures we do any
  599. # post-stop processing
  600. return self.after_test_end(self.state.test, not rerun, force_rerun=rerun)
  601. def after_test_end(self, test, restart, force_rerun=False, force_stop=False):
  602. assert isinstance(self.state, RunnerManagerState.running)
  603. # Mixing manual reruns and automatic reruns is confusing; we currently assume
  604. # that as long as we've done at least the automatic run count in total we can
  605. # continue with the next test.
  606. if not force_rerun and self.run_count >= self.rerun:
  607. test, test_group, group_metadata = self.get_next_test()
  608. if test is None:
  609. return RunnerManagerState.stop(force_stop)
  610. if test_group is not self.state.test_group:
  611. # We are starting a new group of tests, so force a restart
  612. self.logger.info("Restarting browser for new test group")
  613. restart = True
  614. else:
  615. test_group = self.state.test_group
  616. group_metadata = self.state.group_metadata
  617. if restart:
  618. return RunnerManagerState.restarting(test, test_group, group_metadata, force_stop)
  619. else:
  620. return RunnerManagerState.running(test, test_group, group_metadata)
  621. def restart_runner(self):
  622. """Stop and restart the TestRunner"""
  623. assert isinstance(self.state, RunnerManagerState.restarting)
  624. self.stop_runner(force=self.state.force_stop)
  625. return RunnerManagerState.initializing(self.state.test, self.state.test_group, self.state.group_metadata, 0)
  626. def log(self, data):
  627. self.logger.log_raw(data)
  628. def error(self, message):
  629. self.logger.error(message)
  630. self.restart_runner()
  631. def stop_runner(self, force=False):
  632. """Stop the TestRunner and the browser binary."""
  633. self.recording.set(["testrunner", "stop_runner"])
  634. if self.test_runner_proc is None:
  635. return
  636. if self.test_runner_proc.is_alive():
  637. self.send_message("stop")
  638. try:
  639. self.browser.stop(force=force)
  640. self.ensure_runner_stopped()
  641. finally:
  642. self.cleanup()
  643. def teardown(self):
  644. self.logger.debug("TestRunnerManager teardown")
  645. self.test_runner_proc = None
  646. self.command_queue.close()
  647. self.remote_queue.close()
  648. self.command_queue = None
  649. self.remote_queue = None
  650. self.recording.pause()
  651. def ensure_runner_stopped(self):
  652. self.logger.debug("ensure_runner_stopped")
  653. if self.test_runner_proc is None:
  654. return
  655. self.browser.stop(force=True)
  656. self.logger.debug("waiting for runner process to end")
  657. self.test_runner_proc.join(10)
  658. self.logger.debug("After join")
  659. mp = mpcontext.get_context()
  660. if self.test_runner_proc.is_alive():
  661. # This might leak a file handle from the queue
  662. self.logger.warning("Forcibly terminating runner process")
  663. self.test_runner_proc.terminate()
  664. self.logger.debug("After terminating runner process")
  665. # Multiprocessing queues are backed by operating system pipes. If
  666. # the pipe in the child process had buffered data at the time of
  667. # forced termination, the queue is no longer in a usable state
  668. # (subsequent attempts to retrieve items may block indefinitely).
  669. # Discard the potentially-corrupted queue and create a new one.
  670. self.logger.debug("Recreating command queue")
  671. self.command_queue.cancel_join_thread()
  672. self.command_queue.close()
  673. self.command_queue = mp.Queue()
  674. self.logger.debug("Recreating remote queue")
  675. self.remote_queue.cancel_join_thread()
  676. self.remote_queue.close()
  677. self.remote_queue = mp.Queue()
  678. else:
  679. self.logger.debug("Runner process exited with code %i" % self.test_runner_proc.exitcode)
  680. def runner_teardown(self):
  681. self.ensure_runner_stopped()
  682. return RunnerManagerState.stop(False)
  683. def send_message(self, command, *args):
  684. """Send a message to the remote queue (to Executor)."""
  685. self.remote_queue.put((command, args))
  686. def inject_message(self, command, *args):
  687. """Inject a message to the command queue (from Executor)."""
  688. self.command_queue.put((command, args))
  689. def cleanup(self):
  690. self.logger.debug("TestRunnerManager cleanup")
  691. if self.browser:
  692. self.browser.cleanup()
  693. while True:
  694. try:
  695. cmd, data = self.command_queue.get_nowait()
  696. except Empty:
  697. break
  698. else:
  699. if cmd == "log":
  700. self.log(*data)
  701. elif cmd == "runner_teardown":
  702. # It's OK for the "runner_teardown" message to be left in
  703. # the queue during cleanup, as we will already have tried
  704. # to stop the TestRunner in `stop_runner`.
  705. pass
  706. else:
  707. self.logger.warning(f"Command left in command_queue during cleanup: {cmd!r}, {data!r}")
  708. while True:
  709. try:
  710. cmd, data = self.remote_queue.get_nowait()
  711. self.logger.warning(f"Command left in remote_queue during cleanup: {cmd!r}, {data!r}")
  712. except Empty:
  713. break
  714. def make_test_queue(tests, test_source_cls, **test_source_kwargs):
  715. queue = test_source_cls.make_queue(tests, **test_source_kwargs)
  716. # There is a race condition that means sometimes we continue
  717. # before the tests have been written to the underlying pipe.
  718. # Polling the pipe for data here avoids that
  719. queue._reader.poll(10)
  720. assert not queue.empty()
  721. return queue
  722. class ManagerGroup:
  723. """Main thread object that owns all the TestRunnerManager threads."""
  724. def __init__(self, suite_name, size, test_source_cls, test_source_kwargs,
  725. browser_cls, browser_kwargs,
  726. executor_cls, executor_kwargs,
  727. rerun=1,
  728. pause_after_test=False,
  729. pause_on_unexpected=False,
  730. restart_on_unexpected=True,
  731. debug_info=None,
  732. capture_stdio=True,
  733. recording=None):
  734. self.suite_name = suite_name
  735. self.size = size
  736. self.test_source_cls = test_source_cls
  737. self.test_source_kwargs = test_source_kwargs
  738. self.browser_cls = browser_cls
  739. self.browser_kwargs = browser_kwargs
  740. self.executor_cls = executor_cls
  741. self.executor_kwargs = executor_kwargs
  742. self.pause_after_test = pause_after_test
  743. self.pause_on_unexpected = pause_on_unexpected
  744. self.restart_on_unexpected = restart_on_unexpected
  745. self.debug_info = debug_info
  746. self.rerun = rerun
  747. self.capture_stdio = capture_stdio
  748. self.recording = recording
  749. assert recording is not None
  750. self.pool = set()
  751. # Event that is polled by threads so that they can gracefully exit in the face
  752. # of sigint
  753. self.stop_flag = threading.Event()
  754. self.logger = structuredlog.StructuredLogger(suite_name)
  755. def __enter__(self):
  756. return self
  757. def __exit__(self, exc_type, exc_val, exc_tb):
  758. self.stop()
  759. def run(self, test_type, tests):
  760. """Start all managers in the group"""
  761. self.logger.debug("Using %i processes" % self.size)
  762. type_tests = tests[test_type]
  763. if not type_tests:
  764. self.logger.info("No %s tests to run" % test_type)
  765. return
  766. test_queue = make_test_queue(type_tests, self.test_source_cls, **self.test_source_kwargs)
  767. for idx in range(self.size):
  768. manager = TestRunnerManager(self.suite_name,
  769. idx,
  770. test_type,
  771. test_queue,
  772. self.test_source_cls,
  773. self.browser_cls,
  774. self.browser_kwargs,
  775. self.executor_cls,
  776. self.executor_kwargs,
  777. self.stop_flag,
  778. self.rerun,
  779. self.pause_after_test,
  780. self.pause_on_unexpected,
  781. self.restart_on_unexpected,
  782. self.debug_info,
  783. self.capture_stdio,
  784. recording=self.recording)
  785. manager.start()
  786. self.pool.add(manager)
  787. self.wait()
  788. def wait(self):
  789. """Wait for all the managers in the group to finish"""
  790. for manager in self.pool:
  791. manager.join()
  792. def stop(self):
  793. """Set the stop flag so that all managers in the group stop as soon
  794. as possible"""
  795. self.stop_flag.set()
  796. self.logger.debug("Stop flag set in ManagerGroup")
  797. def test_count(self):
  798. return sum(manager.test_count for manager in self.pool)
  799. def unexpected_count(self):
  800. return sum(manager.unexpected_count for manager in self.pool)
  801. def unexpected_pass_count(self):
  802. return sum(manager.unexpected_pass_count for manager in self.pool)