PageRenderTime 57ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/mysqlDB/mysql-test/lib/mtr_process.pl

https://github.com/olliolli/me.store
Perl | 912 lines | 595 code | 127 blank | 190 comment | 76 complexity | 80ba0f63502736182d5043688b11b9a1 MD5 | raw file
  1. # -*- cperl -*-
  2. # This is a library file used by the Perl version of mysql-test-run,
  3. # and is part of the translation of the Bourne shell script with the
  4. # same name.
  5. #use Carp qw(cluck);
  6. use Socket;
  7. use Errno;
  8. use strict;
  9. #use POSIX ":sys_wait_h";
  10. use POSIX 'WNOHANG';
  11. sub mtr_run ($$$$$$;$);
  12. sub mtr_spawn ($$$$$$;$);
  13. sub mtr_stop_mysqld_servers ($);
  14. sub mtr_kill_leftovers ();
  15. sub mtr_record_dead_children ();
  16. sub mtr_exit ($);
  17. sub sleep_until_file_created ($$$);
  18. sub mtr_kill_processes ($);
  19. # static in C
  20. sub spawn_impl ($$$$$$$$);
  21. ##############################################################################
  22. #
  23. # Execute an external command
  24. #
  25. ##############################################################################
  26. # This function try to mimic the C version used in "netware/mysql_test_run.c"
  27. # FIXME learn it to handle append mode as well, a "new" flag or a "append"
  28. sub mtr_run ($$$$$$;$) {
  29. my $path= shift;
  30. my $arg_list_t= shift;
  31. my $input= shift;
  32. my $output= shift;
  33. my $error= shift;
  34. my $pid_file= shift;
  35. my $spawn_opts= shift;
  36. return spawn_impl($path,$arg_list_t,'run',$input,$output,$error,$pid_file,
  37. $spawn_opts);
  38. }
  39. sub mtr_run_test ($$$$$$;$) {
  40. my $path= shift;
  41. my $arg_list_t= shift;
  42. my $input= shift;
  43. my $output= shift;
  44. my $error= shift;
  45. my $pid_file= shift;
  46. my $spawn_opts= shift;
  47. return spawn_impl($path,$arg_list_t,'test',$input,$output,$error,$pid_file,
  48. $spawn_opts);
  49. }
  50. sub mtr_spawn ($$$$$$;$) {
  51. my $path= shift;
  52. my $arg_list_t= shift;
  53. my $input= shift;
  54. my $output= shift;
  55. my $error= shift;
  56. my $pid_file= shift;
  57. my $spawn_opts= shift;
  58. return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error,$pid_file,
  59. $spawn_opts);
  60. }
  61. ##############################################################################
  62. #
  63. # If $join is set, we return the error code, else we return the PID
  64. #
  65. ##############################################################################
  66. sub spawn_impl ($$$$$$$$) {
  67. my $path= shift;
  68. my $arg_list_t= shift;
  69. my $mode= shift;
  70. my $input= shift;
  71. my $output= shift;
  72. my $error= shift;
  73. my $pid_file= shift; # FIXME
  74. my $spawn_opts= shift;
  75. if ( $::opt_script_debug )
  76. {
  77. print STDERR "\n";
  78. print STDERR "#### ", "-" x 78, "\n";
  79. print STDERR "#### ", "STDIN $input\n" if $input;
  80. print STDERR "#### ", "STDOUT $output\n" if $output;
  81. print STDERR "#### ", "STDERR $error\n" if $error;
  82. print STDERR "#### ", "$mode : $path ", join(" ",@$arg_list_t), "\n";
  83. print STDERR "#### ", "spawn options:\n";
  84. if ($spawn_opts)
  85. {
  86. foreach my $key (sort keys %{$spawn_opts})
  87. {
  88. print STDERR "#### ", " - $key: $spawn_opts->{$key}\n";
  89. }
  90. }
  91. else
  92. {
  93. print STDERR "#### ", " none\n";
  94. }
  95. print STDERR "#### ", "-" x 78, "\n";
  96. }
  97. FORK:
  98. {
  99. my $pid= fork();
  100. if ( ! defined $pid )
  101. {
  102. if ( $! == $!{EAGAIN} ) # See "perldoc Errno"
  103. {
  104. mtr_debug("Got EAGAIN from fork(), sleep 1 second and redo");
  105. sleep(1);
  106. redo FORK;
  107. }
  108. else
  109. {
  110. mtr_error("$path ($pid) can't be forked");
  111. }
  112. }
  113. if ( $pid )
  114. {
  115. spawn_parent_impl($pid,$mode,$path);
  116. }
  117. else
  118. {
  119. # Child, redirect output and exec
  120. # FIXME I tried POSIX::setsid() here to detach and, I hoped,
  121. # avoid zombies. But everything went wild, somehow the parent
  122. # became a deamon as well, and was hard to kill ;-)
  123. # Need to catch SIGCHLD and do waitpid or something instead......
  124. $SIG{INT}= 'DEFAULT'; # Parent do some stuff, we don't
  125. if ( $::glob_cygwin_shell and $mode eq 'test' )
  126. {
  127. # Programs started from mysqltest under Cygwin, are to
  128. # execute them within Cygwin. Else simple things in test
  129. # files like
  130. # --system "echo 1 > file"
  131. # will fail.
  132. # FIXME not working :-(
  133. # $ENV{'COMSPEC'}= "$::glob_cygwin_shell -c";
  134. }
  135. my $log_file_open_mode = '>';
  136. if ($spawn_opts and $spawn_opts->{'append_log_file'})
  137. {
  138. $log_file_open_mode = '>>';
  139. }
  140. if ( $output )
  141. {
  142. if ( ! open(STDOUT,$log_file_open_mode,$output) )
  143. {
  144. mtr_child_error("can't redirect STDOUT to \"$output\": $!");
  145. }
  146. }
  147. if ( $error )
  148. {
  149. if ( $output eq $error )
  150. {
  151. if ( ! open(STDERR,">&STDOUT") )
  152. {
  153. mtr_child_error("can't dup STDOUT: $!");
  154. }
  155. }
  156. else
  157. {
  158. if ( ! open(STDERR,$log_file_open_mode,$error) )
  159. {
  160. mtr_child_error("can't redirect STDERR to \"$error\": $!");
  161. }
  162. }
  163. }
  164. if ( $input )
  165. {
  166. if ( ! open(STDIN,"<",$input) )
  167. {
  168. mtr_child_error("can't redirect STDIN to \"$input\": $!");
  169. }
  170. }
  171. if ( ! exec($path,@$arg_list_t) )
  172. {
  173. mtr_child_error("failed to execute \"$path\": $!");
  174. }
  175. }
  176. }
  177. }
  178. sub spawn_parent_impl {
  179. my $pid= shift;
  180. my $mode= shift;
  181. my $path= shift;
  182. if ( $mode eq 'run' or $mode eq 'test' )
  183. {
  184. if ( $mode eq 'run' )
  185. {
  186. # Simple run of command, we wait for it to return
  187. my $ret_pid= waitpid($pid,0);
  188. if ( $ret_pid <= 0 )
  189. {
  190. mtr_error("$path ($pid) got lost somehow");
  191. }
  192. return mtr_process_exit_status($?);
  193. }
  194. else
  195. {
  196. # We run mysqltest and wait for it to return. But we try to
  197. # catch dying mysqld processes as well.
  198. #
  199. # We do blocking waitpid() until we get the return from the
  200. # "mysqltest" call. But if a mysqld process dies that we
  201. # started, we take this as an error, and kill mysqltest.
  202. #
  203. # FIXME is this as it should be? Can't mysqld terminate
  204. # normally from running a test case?
  205. my $exit_value= -1;
  206. my $saved_exit_value;
  207. my $ret_pid; # What waitpid() returns
  208. while ( ($ret_pid= waitpid(-1,0)) != -1 )
  209. {
  210. # Someone terminated, don't know who. Collect
  211. # status info first before $? is lost,
  212. # but not $exit_value, this is flagged from
  213. #
  214. my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid);
  215. if ( $timer_name )
  216. {
  217. if ( $timer_name eq "suite" )
  218. {
  219. # We give up here
  220. # FIXME we should only give up the suite, not all of the run?
  221. print STDERR "\n";
  222. mtr_error("Test suite timeout");
  223. }
  224. elsif ( $timer_name eq "testcase" )
  225. {
  226. $saved_exit_value= 63; # Mark as timeout
  227. kill(9, $pid); # Kill mysqltest
  228. next; # Go on and catch the termination
  229. }
  230. }
  231. if ( $ret_pid == $pid )
  232. {
  233. # We got termination of mysqltest, we are done
  234. $exit_value= mtr_process_exit_status($?);
  235. last;
  236. }
  237. # If one of the mysqld processes died, we want to
  238. # mark this, and kill the mysqltest process.
  239. foreach my $idx (0..1)
  240. {
  241. if ( $::master->[$idx]->{'pid'} eq $ret_pid )
  242. {
  243. mtr_debug("child $ret_pid was master[$idx], " .
  244. "exit during mysqltest run");
  245. $::master->[$idx]->{'pid'}= 0;
  246. last;
  247. }
  248. }
  249. foreach my $idx (0..2)
  250. {
  251. if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
  252. {
  253. mtr_debug("child $ret_pid was slave[$idx], " .
  254. "exit during mysqltest run");
  255. $::slave->[$idx]->{'pid'}= 0;
  256. last;
  257. }
  258. }
  259. mtr_debug("waitpid() catched exit of unknown child $ret_pid, " .
  260. "exit during mysqltest run");
  261. }
  262. if ( $ret_pid != $pid )
  263. {
  264. # We terminated the waiting because a "mysqld" process died.
  265. # Kill the mysqltest process.
  266. kill(9,$pid);
  267. $ret_pid= waitpid($pid,0);
  268. if ( $ret_pid == -1 )
  269. {
  270. mtr_error("$path ($pid) got lost somehow");
  271. }
  272. }
  273. return $saved_exit_value || $exit_value;
  274. }
  275. }
  276. else
  277. {
  278. # We spawned a process we don't wait for
  279. return $pid;
  280. }
  281. }
  282. # ----------------------------------------------------------------------
  283. # We try to emulate how an Unix shell calculates the exit code
  284. # ----------------------------------------------------------------------
  285. sub mtr_process_exit_status {
  286. my $raw_status= shift;
  287. if ( $raw_status & 127 )
  288. {
  289. return ($raw_status & 127) + 128; # Signal num + 128
  290. }
  291. else
  292. {
  293. return $raw_status >> 8; # Exit code
  294. }
  295. }
  296. ##############################################################################
  297. #
  298. # Kill processes left from previous runs
  299. #
  300. ##############################################################################
  301. # We just "ping" on the ports, and if we can't do a socket connect
  302. # we assume the server is dead. So we don't *really* know a server
  303. # is dead, we just hope that it after letting the listen port go,
  304. # it is dead enough for us to start a new server.
  305. sub mtr_kill_leftovers () {
  306. # First, kill all masters and slaves that would conflict with
  307. # this run. Make sure to remove the PID file, if any.
  308. # FIXME kill IM manager first, else it will restart the servers, how?!
  309. my @args;
  310. for ( my $idx; $idx < 2; $idx++ )
  311. {
  312. push(@args,{
  313. pid => 0, # We don't know the PID
  314. pidfile => $::instance_manager->{'instances'}->[$idx]->{'path_pid'},
  315. sockfile => $::instance_manager->{'instances'}->[$idx]->{'path_sock'},
  316. port => $::instance_manager->{'instances'}->[$idx]->{'port'},
  317. });
  318. }
  319. for ( my $idx; $idx < 2; $idx++ )
  320. {
  321. push(@args,{
  322. pid => 0, # We don't know the PID
  323. pidfile => $::master->[$idx]->{'path_mypid'},
  324. sockfile => $::master->[$idx]->{'path_mysock'},
  325. port => $::master->[$idx]->{'path_myport'},
  326. });
  327. }
  328. for ( my $idx; $idx < 3; $idx++ )
  329. {
  330. push(@args,{
  331. pid => 0, # We don't know the PID
  332. pidfile => $::slave->[$idx]->{'path_mypid'},
  333. sockfile => $::slave->[$idx]->{'path_mysock'},
  334. port => $::slave->[$idx]->{'path_myport'},
  335. });
  336. }
  337. mtr_mysqladmin_shutdown(\@args, 20);
  338. # We now have tried to terminate nice. We have waited for the listen
  339. # port to be free, but can't really tell if the mysqld process died
  340. # or not. We now try to find the process PID from the PID file, and
  341. # send a kill to that process. Note that Perl let kill(0,@pids) be
  342. # a way to just return the numer of processes the kernel can send
  343. # signals to. So this can be used (except on Cygwin) to determine
  344. # if there are processes left running that we cound out might exists.
  345. #
  346. # But still after all this work, all we know is that we have
  347. # the ports free.
  348. # We scan the "var/run/" directory for other process id's to kill
  349. # FIXME $path_run_dir or something
  350. my $rundir= "$::opt_vardir/run";
  351. if ( -d $rundir )
  352. {
  353. opendir(RUNDIR, $rundir)
  354. or mtr_error("can't open directory \"$rundir\": $!");
  355. my @pids;
  356. while ( my $elem= readdir(RUNDIR) )
  357. {
  358. my $pidfile= "$rundir/$elem";
  359. if ( -f $pidfile )
  360. {
  361. my $pid= mtr_get_pid_from_file($pidfile);
  362. # Race, could have been removed between I tested with -f
  363. # and the unlink() below, so I better check again with -f
  364. if ( ! unlink($pidfile) and -f $pidfile )
  365. {
  366. mtr_error("can't remove $pidfile");
  367. }
  368. if ( $::glob_cygwin_perl or kill(0, $pid) )
  369. {
  370. push(@pids, $pid); # We know (cygwin guess) it exists
  371. }
  372. }
  373. }
  374. closedir(RUNDIR);
  375. if ( @pids )
  376. {
  377. if ( $::glob_cygwin_perl )
  378. {
  379. # We have no (easy) way of knowing the Cygwin controlling
  380. # process, in the PID file we only have the Windows process id.
  381. system("kill -f " . join(" ",@pids)); # Hope for the best....
  382. mtr_debug("Sleep 5 seconds waiting for processes to die");
  383. sleep(5);
  384. }
  385. else
  386. {
  387. my $retries= 10; # 10 seconds
  388. do
  389. {
  390. kill(9, @pids);
  391. mtr_debug("Sleep 1 second waiting for processes to die");
  392. sleep(1) # Wait one second
  393. } while ( $retries-- and kill(0, @pids) );
  394. if ( kill(0, @pids) ) # Check if some left
  395. {
  396. mtr_warning("can't kill process(es) " . join(" ", @pids));
  397. }
  398. }
  399. }
  400. }
  401. # We may have failed everything, bug we now check again if we have
  402. # the listen ports free to use, and if they are free, just go for it.
  403. foreach my $srv ( @args )
  404. {
  405. if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
  406. {
  407. mtr_warning("can't kill old mysqld holding port $srv->{'port'}");
  408. }
  409. }
  410. }
  411. ##############################################################################
  412. #
  413. # Shut down mysqld servers we have started from this run of this script
  414. #
  415. ##############################################################################
  416. # To speed things we kill servers in parallel. The argument is a list
  417. # of 'ports', 'pids', 'pidfiles' and 'socketfiles'.
  418. # FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and
  419. # $srv->{'pidfile'} will not be the same PID. We need to try to kill
  420. # both I think.
  421. sub mtr_stop_mysqld_servers ($) {
  422. my $spec= shift;
  423. # ----------------------------------------------------------------------
  424. # First try nice normal shutdown using 'mysqladmin'
  425. # ----------------------------------------------------------------------
  426. # Shutdown time must be high as slave may be in reconnect
  427. mtr_mysqladmin_shutdown($spec, 70);
  428. # ----------------------------------------------------------------------
  429. # We loop with waitpid() nonblocking to see how many of the ones we
  430. # are to kill, actually got killed by mtr_mysqladmin_shutdown().
  431. # Note that we don't rely on this, the mysqld server might have stop
  432. # listening to the port, but still be alive. But it is a start.
  433. # ----------------------------------------------------------------------
  434. foreach my $srv ( @$spec )
  435. {
  436. if ( $srv->{'pid'} and (waitpid($srv->{'pid'},&WNOHANG) == $srv->{'pid'}) )
  437. {
  438. $srv->{'pid'}= 0;
  439. }
  440. }
  441. # ----------------------------------------------------------------------
  442. # We know the process was started from this file, so there is a PID
  443. # saved, or else we have nothing to do.
  444. # Might be that is is recorded to be missing, but we failed to
  445. # take away the PID file earlier, then we do it now.
  446. # ----------------------------------------------------------------------
  447. my %mysqld_pids;
  448. foreach my $srv ( @$spec )
  449. {
  450. if ( $srv->{'pid'} )
  451. {
  452. $mysqld_pids{$srv->{'pid'}}= 1;
  453. }
  454. else
  455. {
  456. # Server is dead, we remove the pidfile if any
  457. # Race, could have been removed between I tested with -f
  458. # and the unlink() below, so I better check again with -f
  459. if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
  460. -f $srv->{'pidfile'} )
  461. {
  462. mtr_error("can't remove $srv->{'pidfile'}");
  463. }
  464. }
  465. }
  466. # ----------------------------------------------------------------------
  467. # If the processes where started from this script, and we had no PIDS
  468. # then we don't have to do anything.
  469. # ----------------------------------------------------------------------
  470. if ( ! keys %mysqld_pids )
  471. {
  472. # cluck "This is how we got here!";
  473. return;
  474. }
  475. # ----------------------------------------------------------------------
  476. # In mtr_mysqladmin_shutdown() we only waited for the mysqld servers
  477. # not to listen to the port. But we are not sure we got them all
  478. # killed. If we suspect it lives, try nice kill with SIG_TERM. Note
  479. # that for true Win32 processes, kill(0,$pid) will not return 1.
  480. # ----------------------------------------------------------------------
  481. start_reap_all(); # Avoid zombies
  482. my @mysqld_pids= keys %mysqld_pids;
  483. mtr_kill_processes(\@mysqld_pids);
  484. stop_reap_all(); # Get into control again
  485. # ----------------------------------------------------------------------
  486. # Now, we check if all we can find using kill(0,$pid) are dead,
  487. # and just assume the rest are. We cleanup socket and PID files.
  488. # ----------------------------------------------------------------------
  489. {
  490. my $errors= 0;
  491. foreach my $srv ( @$spec )
  492. {
  493. if ( $srv->{'pid'} )
  494. {
  495. if ( kill(0,$srv->{'pid'}) )
  496. {
  497. # FIXME In Cygwin there seem to be some fast reuse
  498. # of PIDs, so dying may not be the right thing to do.
  499. $errors++;
  500. mtr_warning("can't kill process $srv->{'pid'}");
  501. }
  502. else
  503. {
  504. # We managed to kill it at last
  505. # FIXME In Cygwin, we will get here even if the process lives.
  506. # Not needed as we know the process is dead, but to be safe
  507. # we unlink and check success in two steps. We first unlink
  508. # without checking the error code, and then check if the
  509. # file still exists.
  510. foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
  511. {
  512. # Know it is dead so should be no race, careful anyway
  513. if ( -f $file and ! unlink($file) and -f $file )
  514. {
  515. $errors++;
  516. mtr_warning("couldn't delete $file");
  517. }
  518. }
  519. }
  520. }
  521. }
  522. if ( $errors )
  523. {
  524. # We are in trouble, just die....
  525. mtr_error("we could not kill or clean up all processes");
  526. }
  527. }
  528. # FIXME We just assume they are all dead, for Cygwin we are not
  529. # really sure
  530. }
  531. ##############################################################################
  532. #
  533. # Shut down mysqld servers using "mysqladmin ... shutdown".
  534. # To speed this up, we start them in parallel and use waitpid() to
  535. # catch their termination. Note that this doesn't say the servers
  536. # are terminated, just that 'mysqladmin' is terminated.
  537. #
  538. # Note that mysqladmin will ask the server about what PID file it uses,
  539. # and mysqladmin will wait for it to be removed before it terminates
  540. # (unless passes timeout).
  541. #
  542. # This function will take at most about 20 seconds, and we still are not
  543. # sure we killed them all. If none is responding to ping, we return 1,
  544. # else we return 0.
  545. #
  546. ##############################################################################
  547. sub mtr_mysqladmin_shutdown {
  548. my $spec= shift;
  549. my $adm_shutdown_tmo= shift;
  550. my %mysql_admin_pids;
  551. my @to_kill_specs;
  552. foreach my $srv ( @$spec )
  553. {
  554. if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
  555. {
  556. push(@to_kill_specs, $srv);
  557. }
  558. }
  559. foreach my $srv ( @to_kill_specs )
  560. {
  561. # FIXME wrong log.....
  562. # FIXME, stderr.....
  563. # Shutdown time must be high as slave may be in reconnect
  564. my $args;
  565. mtr_init_args(\$args);
  566. mtr_add_arg($args, "--no-defaults");
  567. mtr_add_arg($args, "--user=%s", $::opt_user);
  568. mtr_add_arg($args, "--password=");
  569. if ( -e $srv->{'sockfile'} )
  570. {
  571. mtr_add_arg($args, "--socket=%s", $srv->{'sockfile'});
  572. }
  573. if ( $srv->{'port'} )
  574. {
  575. mtr_add_arg($args, "--port=%s", $srv->{'port'});
  576. }
  577. if ( $srv->{'port'} and ! -e $srv->{'sockfile'} )
  578. {
  579. mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
  580. }
  581. mtr_add_arg($args, "--connect_timeout=5");
  582. mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo");
  583. mtr_add_arg($args, "shutdown");
  584. # We don't wait for termination of mysqladmin
  585. my $pid= mtr_spawn($::exe_mysqladmin, $args,
  586. "", $::path_manager_log, $::path_manager_log, "",
  587. { append_log_file => 1 });
  588. $mysql_admin_pids{$pid}= 1;
  589. }
  590. # As mysqladmin is such a simple program, we trust it to terminate.
  591. # I.e. we wait blocking, and wait wait for them all before we go on.
  592. while (keys %mysql_admin_pids)
  593. {
  594. foreach my $pid (keys %mysql_admin_pids)
  595. {
  596. if ( waitpid($pid,0) > 0 )
  597. {
  598. delete $mysql_admin_pids{$pid};
  599. }
  600. }
  601. }
  602. # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
  603. # terminate now, but we don't (FIXME should be debugged).
  604. # So we try again to ping and at least wait the same amount of time
  605. # mysqladmin would for all to die.
  606. my $timeout= 20; # 20 seconds max
  607. my $res= 1; # If we just fall through, we are done
  608. # in the sense that the servers don't
  609. # listen to their ports any longer
  610. TIME:
  611. while ( $timeout-- )
  612. {
  613. foreach my $srv ( @to_kill_specs )
  614. {
  615. $res= 1; # We are optimistic
  616. if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
  617. {
  618. mtr_debug("Sleep 1 second waiting for processes to stop using port");
  619. sleep(1); # One second
  620. $res= 0;
  621. next TIME;
  622. }
  623. }
  624. last; # If we got here, we are done
  625. }
  626. $timeout or mtr_debug("At least one server is still listening to its port");
  627. sleep(5) if $::glob_win32; # FIXME next startup fails if no sleep
  628. return $res;
  629. }
  630. ##############################################################################
  631. #
  632. # The operating system will keep information about dead children,
  633. # we read this information here, and if we have records the process
  634. # is alive, we mark it as dead.
  635. #
  636. ##############################################################################
  637. sub mtr_record_dead_children () {
  638. my $ret_pid;
  639. # FIXME the man page says to wait for -1 to terminate,
  640. # but on OS X we get '0' all the time...
  641. while ( ($ret_pid= waitpid(-1,&WNOHANG)) > 0 )
  642. {
  643. mtr_debug("waitpid() catched exit of child $ret_pid");
  644. foreach my $idx (0..1)
  645. {
  646. if ( $::master->[$idx]->{'pid'} eq $ret_pid )
  647. {
  648. mtr_debug("child $ret_pid was master[$idx]");
  649. $::master->[$idx]->{'pid'}= 0;
  650. }
  651. }
  652. foreach my $idx (0..2)
  653. {
  654. if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
  655. {
  656. mtr_debug("child $ret_pid was slave[$idx]");
  657. $::slave->[$idx]->{'pid'}= 0;
  658. last;
  659. }
  660. }
  661. }
  662. }
  663. sub start_reap_all {
  664. # This causes terminating processes to not become zombies, avoiding
  665. # the need for (or possibility of) explicit waitpid().
  666. $SIG{CHLD}= 'IGNORE';
  667. # On some platforms (Linux, QNX, OSX, ...) there is potential race
  668. # here. If a process terminated before setting $SIG{CHLD} (but after
  669. # any attempt to waitpid() it), it will still be a zombie. So we
  670. # have to handle any such process here.
  671. while(waitpid(-1, &WNOHANG) > 0) { };
  672. }
  673. sub stop_reap_all {
  674. $SIG{CHLD}= 'DEFAULT';
  675. }
  676. sub mtr_ping_mysqld_server () {
  677. my $port= shift;
  678. my $remote= "localhost";
  679. my $iaddr= inet_aton($remote);
  680. if ( ! $iaddr )
  681. {
  682. mtr_error("can't find IP number for $remote");
  683. }
  684. my $paddr= sockaddr_in($port, $iaddr);
  685. my $proto= getprotobyname('tcp');
  686. if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) )
  687. {
  688. mtr_error("can't create socket: $!");
  689. }
  690. if ( connect(SOCK, $paddr) )
  691. {
  692. close(SOCK); # FIXME check error?
  693. return 1;
  694. }
  695. else
  696. {
  697. return 0;
  698. }
  699. }
  700. ##############################################################################
  701. #
  702. # Wait for a file to be created
  703. #
  704. ##############################################################################
  705. # FIXME check that the pidfile contains the expected pid!
  706. sub sleep_until_file_created ($$$) {
  707. my $pidfile= shift;
  708. my $timeout= shift;
  709. my $pid= shift;
  710. for ( my $loop= 1; $loop <= $timeout; $loop++ )
  711. {
  712. if ( -r $pidfile )
  713. {
  714. return $pid;
  715. }
  716. # Check if it died after the fork() was successful
  717. if ( $pid > 0 && waitpid($pid,&WNOHANG) == $pid )
  718. {
  719. return 0;
  720. }
  721. mtr_debug("Sleep 1 second waiting for creation of $pidfile");
  722. if ( $loop % 60 == 0 )
  723. {
  724. my $left= $timeout - $loop;
  725. mtr_warning("Waited $loop seconds for $pidfile to be created, " .
  726. "still waiting for $left seconds...");
  727. }
  728. sleep(1);
  729. }
  730. return 0;
  731. }
  732. sub mtr_kill_processes ($) {
  733. my $pids = shift;
  734. foreach my $sig (15, 9)
  735. {
  736. my $retries= 10;
  737. while (1)
  738. {
  739. kill($sig, @{$pids});
  740. last unless kill (0, @{$pids}) and $retries--;
  741. mtr_debug("Sleep 2 second waiting for processes to die");
  742. sleep(2);
  743. }
  744. }
  745. }
  746. ##############################################################################
  747. #
  748. # When we exit, we kill off all children
  749. #
  750. ##############################################################################
  751. # FIXME something is wrong, we sometimes terminate with "Hangup" written
  752. # to tty, and no STDERR output telling us why.
  753. # FIXME for some readon, setting HUP to 'IGNORE' will cause exit() to
  754. # write out "Hangup", and maybe loose some output. We insert a sleep...
  755. sub mtr_exit ($) {
  756. my $code= shift;
  757. # cluck("Called mtr_exit()");
  758. mtr_timer_stop_all($::glob_timers);
  759. local $SIG{HUP} = 'IGNORE';
  760. # ToDo: Signalling -$$ will only work if we are the process group
  761. # leader (in fact on QNX it will signal our session group leader,
  762. # which might be Do-compile or Pushbuild, causing tests to be
  763. # aborted). So we only do it if we are the group leader. We might
  764. # set ourselves as the group leader at startup (with
  765. # POSIX::setpgrp(0,0)), but then care must be needed to always do
  766. # proper child process cleanup.
  767. kill('HUP', -$$) if $$ == getpgrp();
  768. sleep 2;
  769. exit($code);
  770. }
  771. 1;