PageRenderTime 54ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/MHA/ServerManager.pm

https://github.com/luismottacampos/mha4mysql-manager
Perl | 1438 lines | 1272 code | 114 blank | 52 comment | 173 complexity | eec8d260c4b3bc04477c9e3fba19669e MD5 | raw file
Possible License(s): GPL-2.0
  1. #!/usr/bin/env perl
  2. # Copyright (C) 2011 DeNA Co.,Ltd.
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc.,
  17. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. package MHA::ServerManager;
  19. use strict;
  20. use warnings FATAL => 'all';
  21. use Carp qw(croak);
  22. use English qw(-no_match_vars);
  23. use MHA::SlaveUtil;
  24. use MHA::DBHelper;
  25. use MHA::Server;
  26. use MHA::ManagerConst;
  27. use Parallel::ForkManager;
  28. sub new {
  29. my $class = shift;
  30. my $self = {
  31. servers => [],
  32. dead_servers => [],
  33. alive_servers => [],
  34. alive_slaves => [],
  35. failed_slaves => [],
  36. latest_slaves => [],
  37. oldest_slaves => [],
  38. unmanaged_slaves => [],
  39. orig_master => undef,
  40. new_master => undef,
  41. logger => undef,
  42. @_,
  43. };
  44. return bless $self, $class;
  45. }
  46. sub set_servers($$) {
  47. my $self = shift;
  48. my $servers_ref = shift;
  49. $self->{servers} = $servers_ref;
  50. }
  51. sub set_latest_slaves($$) {
  52. my $self = shift;
  53. my $servers_ref = shift;
  54. $self->{latest_slaves} = $servers_ref;
  55. }
  56. sub set_oldest_slaves($$) {
  57. my $self = shift;
  58. my $servers_ref = shift;
  59. $self->{oldest_slaves} = $servers_ref;
  60. }
  61. sub set_unmanaged_slaves($$) {
  62. my $self = shift;
  63. my $servers_ref = shift;
  64. $self->{unmanaged_slaves} = $servers_ref;
  65. }
  66. sub get_servers($) {
  67. my $self = shift;
  68. return @{ $self->{servers} };
  69. }
  70. sub get_dead_servers($) {
  71. my $self = shift;
  72. return @{ $self->{dead_servers} };
  73. }
  74. sub get_alive_servers($) {
  75. my $self = shift;
  76. return @{ $self->{alive_servers} };
  77. }
  78. sub get_alive_slaves($) {
  79. my $self = shift;
  80. return @{ $self->{alive_slaves} };
  81. }
  82. sub get_failed_slaves($) {
  83. my $self = shift;
  84. return @{ $self->{failed_slaves} };
  85. }
  86. sub get_latest_slaves($) {
  87. my $self = shift;
  88. return @{ $self->{latest_slaves} };
  89. }
  90. sub get_oldest_slaves($) {
  91. my $self = shift;
  92. return @{ $self->{oldest_slaves} };
  93. }
  94. sub get_unmanaged_slaves($) {
  95. my $self = shift;
  96. return @{ $self->{unmanaged_slaves} };
  97. }
  98. sub add_dead_server($$) {
  99. my $self = shift;
  100. my $server = shift;
  101. push @{ $self->{dead_servers} }, $server;
  102. }
  103. sub add_alive_server($$) {
  104. my $self = shift;
  105. my $server = shift;
  106. push @{ $self->{alive_servers} }, $server;
  107. }
  108. sub add_alive_slave($$) {
  109. my $self = shift;
  110. my $server = shift;
  111. push @{ $self->{alive_slaves} }, $server;
  112. }
  113. sub add_failed_slave($$) {
  114. my $self = shift;
  115. my $server = shift;
  116. push @{ $self->{failed_slaves} }, $server;
  117. }
  118. sub add_unmanaged_slave($$) {
  119. my $self = shift;
  120. my $server = shift;
  121. push @{ $self->{unmanaged_slaves} }, $server;
  122. }
  123. sub set_orig_master($$) {
  124. my $self = shift;
  125. my $server = shift;
  126. $self->{orig_master} = $server;
  127. $server->{orig_master} = 1;
  128. }
  129. sub get_orig_master($) {
  130. my $self = shift;
  131. return $self->{orig_master};
  132. }
  133. sub init_servers($) {
  134. my $self = shift;
  135. my $log = $self->{logger};
  136. my @servers = $self->get_servers();
  137. $self->{dead_servers} = [];
  138. $self->{alive_servers} = [];
  139. $self->{alive_slaves} = [];
  140. $self->{failed_slaves} = [];
  141. $self->{unmanaged_slaves} = [];
  142. foreach my $server (@servers) {
  143. if ( $server->{dead} ) {
  144. $self->add_dead_server($server);
  145. }
  146. elsif ( $server->{unmanaged} ) {
  147. $self->add_unmanaged_slave($server);
  148. }
  149. else {
  150. $self->add_alive_server($server);
  151. if ( $server->{not_slave} eq '0' && !$server->{orig_master} ) {
  152. if ( !$server->is_sql_thread_error() && !$server->{lack_relay_log} ) {
  153. $self->add_alive_slave($server);
  154. }
  155. else {
  156. $self->add_failed_slave($server);
  157. }
  158. }
  159. }
  160. }
  161. my @alive_servers = $self->get_alive_servers();
  162. if ( $#alive_servers <= -1 ) {
  163. $log->error("There is no alive server. We can't do failover");
  164. croak;
  165. }
  166. my @alive_slaves = $self->get_alive_slaves();
  167. if ( $#alive_slaves <= -1 ) {
  168. $log->error("There is no alive slave. We can't do failover");
  169. croak;
  170. }
  171. }
  172. sub set_logger($$) {
  173. my $self = shift;
  174. my $logger = shift;
  175. $self->{logger} = $logger;
  176. }
  177. sub connect_all_and_read_server_status($$$$) {
  178. my $self = shift;
  179. my $dead_master_host = shift;
  180. my $dead_master_ip = shift;
  181. my $dead_master_port = shift;
  182. my $log = $self->{logger};
  183. my @servers = $self->get_servers();
  184. $log->debug("Connecting to servers..");
  185. my $should_die = 0;
  186. my $connection_checker = new Parallel::ForkManager( $#servers + 1 );
  187. $connection_checker->run_on_start(
  188. sub {
  189. my ( $pid, $target ) = @_;
  190. }
  191. );
  192. $connection_checker->run_on_finish(
  193. sub {
  194. my ( $pid, $exit_code, $target ) = @_;
  195. if ( $exit_code == $MHA::ManagerConst::MYSQL_DEAD_RC ) {
  196. $target->{dead} = 1;
  197. }
  198. elsif ($exit_code) {
  199. $should_die = 1;
  200. }
  201. }
  202. );
  203. foreach my $target (@servers) {
  204. unless ( $target->{logger} ) {
  205. $target->{logger} = $log;
  206. }
  207. $connection_checker->start($target) and next;
  208. eval {
  209. $SIG{INT} = $SIG{HUP} = $SIG{QUIT} = $SIG{TERM} = "DEFAULT";
  210. if ( $dead_master_host
  211. && $dead_master_ip
  212. && $dead_master_port )
  213. {
  214. if (
  215. $target->server_equals(
  216. $dead_master_host, $dead_master_ip, $dead_master_port
  217. )
  218. )
  219. {
  220. $connection_checker->finish($MHA::ManagerConst::MYSQL_DEAD_RC);
  221. }
  222. }
  223. my $rc = $target->connect_check(2);
  224. $connection_checker->finish($rc);
  225. };
  226. if ($@) {
  227. $log->error($@);
  228. undef $@;
  229. $connection_checker->finish(1);
  230. }
  231. $connection_checker->finish(0);
  232. }
  233. $connection_checker->wait_all_children;
  234. if ($should_die) {
  235. $log->error("Got fatal error, stopping operations");
  236. croak;
  237. }
  238. foreach my $target (@servers) {
  239. next if ( $target->{dead} );
  240. $target->connect_and_get_status();
  241. }
  242. $self->init_servers();
  243. $self->compare_slave_version();
  244. $log->debug("Connecting to servers done.");
  245. $self->validate_current_master();
  246. }
  247. sub get_oldest_version($) {
  248. my $self = shift;
  249. my @servers = $self->get_alive_servers();
  250. my $oldest_version;
  251. foreach my $server (@servers) {
  252. if ( $server->{oldest_major_version} ) {
  253. $oldest_version = $server->{mysql_version};
  254. last;
  255. }
  256. }
  257. return $oldest_version;
  258. }
  259. sub compare_slave_version($) {
  260. my $self = shift;
  261. my @servers = $self->get_alive_servers();
  262. my $log = $self->{logger};
  263. $log->debug(" Comparing MySQL versions..");
  264. my $min_major_version;
  265. foreach (@servers) {
  266. my $dbhelper = $_->{dbhelper};
  267. next if ( $_->{dead} || $_->{not_slave} );
  268. my $parsed_major_version =
  269. MHA::NodeUtil::parse_mysql_major_version( $_->{mysql_version} );
  270. if (!$min_major_version
  271. || $parsed_major_version < $min_major_version )
  272. {
  273. $min_major_version = $parsed_major_version;
  274. }
  275. }
  276. foreach (@servers) {
  277. my $dbhelper = $_->{dbhelper};
  278. next if ( $_->{dead} || $_->{not_slave} );
  279. my $parsed_major_version =
  280. MHA::NodeUtil::parse_mysql_major_version( $_->{mysql_version} );
  281. if ( $min_major_version == $parsed_major_version ) {
  282. $_->{oldest_major_version} = 1;
  283. }
  284. else {
  285. $_->{oldest_major_version} = 0;
  286. }
  287. }
  288. $log->debug(" Comparing MySQL versions done.");
  289. }
  290. sub print_filter_rules($$) {
  291. my $self = shift;
  292. my $master = shift;
  293. my $log = $self->{logger};
  294. my $msg = "Bad Binlog/Replication filtering rules:\n";
  295. $msg .= $master->print_filter(1) if ( $master && !$master->{dead} );
  296. my @slaves = $self->get_alive_slaves();
  297. foreach my $slave (@slaves) {
  298. $msg .= $slave->print_filter();
  299. }
  300. $log->warning($msg);
  301. }
  302. sub validate_repl_filter($$) {
  303. my $self = shift;
  304. my $master = shift;
  305. my $log = $self->{logger};
  306. $log->info("Checking replication filtering settings..");
  307. my $binlog_do_db;
  308. my $binlog_ignore_db;
  309. # If master is alive
  310. if ( $master && !$master->{dead} ) {
  311. $binlog_do_db = $master->{Binlog_Do_DB};
  312. $binlog_ignore_db = $master->{Binlog_Ignore_DB};
  313. $log->info(
  314. " binlog_do_db= $binlog_do_db, binlog_ignore_db= $binlog_ignore_db");
  315. }
  316. my @slaves = $self->get_alive_slaves();
  317. my $replicate_do_db;
  318. my $replicate_ignore_db;
  319. my $replicate_do_table;
  320. my $replicate_ignore_table;
  321. my $replicate_wild_do_table;
  322. my $replicate_wild_ignore_table;
  323. foreach (@slaves) {
  324. $replicate_do_db = $_->{Replicate_Do_DB} unless ($replicate_do_db);
  325. $replicate_ignore_db = $_->{Replicate_Ignore_DB}
  326. unless ($replicate_ignore_db);
  327. $replicate_do_table = $_->{Replicate_Do_Table} unless ($replicate_do_table);
  328. $replicate_ignore_table = $_->{Replicate_Ignore_Table}
  329. unless ($replicate_ignore_table);
  330. $replicate_wild_do_table = $_->{Replicate_Wild_Do_Table}
  331. unless ($replicate_wild_do_table);
  332. $replicate_wild_ignore_table = $_->{Replicate_Wild_Ignore_Table}
  333. unless ($replicate_wild_ignore_table);
  334. if ( $_->{log_bin} ) {
  335. $binlog_do_db = $_->{Binlog_Do_DB} unless ($binlog_do_db);
  336. $binlog_ignore_db = $_->{Binlog_Ignore_DB} unless ($binlog_ignore_db);
  337. }
  338. if ( $replicate_do_db ne $_->{Replicate_Do_DB}
  339. || $replicate_ignore_db ne $_->{Replicate_Ignore_DB}
  340. || $replicate_do_table ne $_->{Replicate_Do_Table}
  341. || $replicate_ignore_table ne $_->{Replicate_Ignore_Table}
  342. || $replicate_wild_do_table ne $_->{Replicate_Wild_Do_Table}
  343. || $replicate_wild_ignore_table ne $_->{Replicate_Wild_Ignore_Table} )
  344. {
  345. $log->error(
  346. sprintf(
  347. "Replication filtering check failed on %s! All slaves must have same replication filtering rules. Check SHOW SLAVE STATUS output and set my.cnf correctly.",
  348. $_->get_hostinfo() )
  349. );
  350. $self->print_filter_rules($master);
  351. return 1;
  352. }
  353. if ( $_->{log_bin} ) {
  354. if ( $binlog_do_db ne $_->{Binlog_Do_DB}
  355. || $binlog_ignore_db ne $_->{Binlog_Ignore_DB} )
  356. {
  357. $log->error(
  358. sprintf(
  359. "Binlog filtering check failed on %s! All log-bin enabled servers must have same binlog filtering rules (same binlog-do-db and binlog-ignore-db). Check SHOW MASTER STATUS output and set my.cnf correctly.",
  360. $_->get_hostinfo() )
  361. );
  362. $self->print_filter_rules($master);
  363. return 1;
  364. }
  365. }
  366. }
  367. if ( $binlog_do_db && $replicate_do_db ) {
  368. if ( $binlog_do_db ne $replicate_do_db ) {
  369. $log->error(
  370. sprintf(
  371. "binlog_do_db on master(%s) must be the same as replicate_do_db on slaves(%s).",
  372. $binlog_do_db, $replicate_do_db
  373. )
  374. );
  375. $self->print_filter_rules($master);
  376. return 1;
  377. }
  378. }
  379. if ( $binlog_ignore_db && $replicate_ignore_db ) {
  380. if ( $binlog_ignore_db ne $replicate_ignore_db ) {
  381. $log->error(
  382. sprintf(
  383. "binlog_ignore_db on master(%s) must be the same as replicate_ignore_db on slaves(%s).",
  384. $binlog_ignore_db, $replicate_ignore_db
  385. )
  386. );
  387. $self->print_filter_rules($master);
  388. return 1;
  389. }
  390. }
  391. $log->info(" Replication filtering check ok.");
  392. return 0;
  393. }
  394. sub validate_num_alive_servers($$$) {
  395. my $self = shift;
  396. my $current_master = shift;
  397. my $ignore_fail_check = shift;
  398. my $log = $self->{logger};
  399. my @dead_servers = $self->get_dead_servers();
  400. my @failed_slaves = $self->get_failed_slaves();
  401. foreach (@dead_servers) {
  402. next if ( $_->{id} eq $current_master->{id} );
  403. next if ( $ignore_fail_check && $_->{ignore_fail} );
  404. $log->error(
  405. sprintf( " Server %s is dead, but must be alive! Check server settings.",
  406. $_->get_hostinfo() )
  407. );
  408. croak;
  409. }
  410. foreach (@failed_slaves) {
  411. next if ( $ignore_fail_check && $_->{ignore_fail} );
  412. $log->error(
  413. sprintf( " Replication on %s fails! Check server settings.",
  414. $_->get_hostinfo() )
  415. );
  416. croak;
  417. }
  418. return 0;
  419. }
  420. # Check the following
  421. # 1. All slaves are read_only (INFO)
  422. # 2. All slaves see the same master ip/port (ERROR)
  423. # 3. All slaves set relay_log_purge=0 (WARN)
  424. # 4. All slaves have same replication filter rules with a master (ERROR)
  425. # return 0: ok, others: NG
  426. sub validate_slaves($$$) {
  427. my $self = shift;
  428. my $check_repl_filter = shift;
  429. my $master = shift;
  430. my $log = $self->{logger};
  431. my @slaves = $self->get_alive_slaves();
  432. my ( $mip, $mport ) = ();
  433. my $error = 0;
  434. $log->info("Checking slave configurations..");
  435. foreach (@slaves) {
  436. if ( $_->{read_only} ne '1' ) {
  437. $log->info(
  438. sprintf( " read_only=1 is not set on slave %s.\n", $_->get_hostinfo() )
  439. );
  440. }
  441. if ( $_->{relay_purge} ne '0' ) {
  442. $log->warning(
  443. sprintf( " relay_log_purge=0 is not set on slave %s.\n",
  444. $_->get_hostinfo() )
  445. );
  446. }
  447. if ( $_->{log_bin} eq '0' ) {
  448. $log->warning(
  449. sprintf(
  450. " log-bin is not set on slave %s. This host can not be a master.\n",
  451. $_->get_hostinfo() )
  452. );
  453. }
  454. }
  455. $error = $self->validate_repl_filter($master)
  456. if ($check_repl_filter);
  457. return $error;
  458. }
  459. sub get_alive_server_by_ipport {
  460. my $self = shift;
  461. my $ip = shift;
  462. my $port = shift;
  463. $self->get_server_by_ipport( $ip, $port, 1 );
  464. }
  465. sub get_server_by_ipport {
  466. my $self = shift;
  467. my $ip = shift;
  468. my $port = shift;
  469. my $alive_only = shift;
  470. my @servers;
  471. if ($alive_only) {
  472. @servers = $self->get_alive_servers();
  473. }
  474. else {
  475. @servers = $self->get_servers();
  476. }
  477. foreach (@servers) {
  478. if ( $_->{ip} eq $ip && $_->{port} == $port ) {
  479. return $_;
  480. }
  481. }
  482. return;
  483. }
  484. sub get_alive_server_by_hostport {
  485. my $self = shift;
  486. my $host = shift;
  487. my $port = shift;
  488. my @servers = $self->get_alive_servers();
  489. foreach (@servers) {
  490. if ( $_->{hostname} eq $host && $_->{port} == $port ) {
  491. return $_;
  492. }
  493. }
  494. return;
  495. }
  496. sub get_server_from_by_id {
  497. my $self = shift;
  498. my $servers_ref = shift;
  499. my $id = shift;
  500. my @servers = @$servers_ref;
  501. foreach (@servers) {
  502. if ( $_->{id} eq $id ) {
  503. return $_;
  504. }
  505. }
  506. return;
  507. }
  508. sub get_alive_server_by_id {
  509. my $self = shift;
  510. my $id = shift;
  511. my @alive_servers = $self->get_alive_servers();
  512. foreach (@alive_servers) {
  513. if ( $_->{id} eq $id ) {
  514. return $_;
  515. }
  516. }
  517. return;
  518. }
  519. sub get_alive_slave_by_id {
  520. my $self = shift;
  521. my $id = shift;
  522. my @alive_slaves = $self->get_alive_slaves();
  523. foreach (@alive_slaves) {
  524. if ( $_->{id} eq $id ) {
  525. return $_;
  526. }
  527. }
  528. return;
  529. }
  530. sub get_master_by_slave {
  531. my $self = shift;
  532. my $slave = shift;
  533. return $self->get_server_by_ipport( $slave->{Master_IP},
  534. $slave->{Master_Port} );
  535. }
  536. sub validate_current_master($) {
  537. my $self = shift;
  538. my $log = $self->{logger};
  539. my @alive_servers = $self->get_alive_servers();
  540. my %master_hash;
  541. my $num_slaves = 0;
  542. my $not_slave_servers = 0;
  543. foreach (@alive_servers) {
  544. if ( $_->{not_slave} eq '0' ) {
  545. $master_hash{"$_->{Master_IP}:$_->{Master_Port}"} = $_;
  546. $num_slaves++;
  547. }
  548. else {
  549. $not_slave_servers++;
  550. }
  551. }
  552. if ( $not_slave_servers >= 2 ) {
  553. $log->error(
  554. "There are $not_slave_servers non-slave servers! MHA manages at most one non-slave server. Check configurations."
  555. );
  556. croak;
  557. }
  558. if ( $num_slaves < 1 ) {
  559. $log->error(
  560. "There is not any alive slave! Check slave settings for details.");
  561. croak;
  562. }
  563. # verify masters exist in a config file
  564. my $master;
  565. foreach my $key ( keys(%master_hash) ) {
  566. my $slave = $master_hash{$key};
  567. $master = $self->get_master_by_slave($slave);
  568. unless ($master) {
  569. $log->error(
  570. sprintf(
  571. "Master %s:%d from which slave %s replicates is not defined in the configuration file!",
  572. $slave->{Master_IP}, $slave->{Master_Port},
  573. $slave->get_hostinfo()
  574. )
  575. );
  576. croak;
  577. }
  578. }
  579. my $real_master;
  580. if ( keys(%master_hash) >= 2 ) {
  581. $real_master = $self->get_primary_master( \%master_hash );
  582. }
  583. else {
  584. $real_master = $master;
  585. $self->set_orig_master($real_master);
  586. }
  587. $self->validate_master_ip_port($real_master);
  588. return $real_master;
  589. }
  590. sub validate_master_ip_port {
  591. my $self = shift;
  592. my $real_master = shift;
  593. my $log = $self->{logger};
  594. my $has_unmanaged_slaves = 0;
  595. my @alive_servers = $self->get_alive_servers();
  596. foreach my $slave (@alive_servers) {
  597. next if ( $slave->{id} eq $real_master->{id} );
  598. unless ( $self->get_alive_slave_by_id( $slave->{id} ) ) {
  599. $log->error(
  600. sprintf( "Server %s is alive, but does not work as a slave!",
  601. $slave->get_hostinfo() )
  602. );
  603. croak;
  604. }
  605. if (
  606. !(
  607. ( $slave->{Master_IP} eq $real_master->{ip} )
  608. && ( $slave->{Master_Port} == $real_master->{port} )
  609. )
  610. )
  611. {
  612. if ( $slave->{multi_tier_slave} ) {
  613. $slave->{unmanaged} = 1;
  614. $has_unmanaged_slaves = 1;
  615. }
  616. else {
  617. my $msg = sprintf(
  618. "Slave %s replicates from %s:%d, but real master is %s!",
  619. $slave->get_hostinfo(), $slave->{Master_Host},
  620. $slave->{Master_Port}, $real_master->get_hostinfo()
  621. );
  622. $log->error($msg);
  623. croak;
  624. }
  625. }
  626. }
  627. if ($has_unmanaged_slaves) {
  628. $self->init_servers();
  629. }
  630. }
  631. sub get_multi_master_print_info {
  632. my $self = shift;
  633. my $master_hash_ref = shift;
  634. my %master_hash = %$master_hash_ref;
  635. my $str = "";
  636. foreach my $key ( keys(%master_hash) ) {
  637. my $slave = $master_hash{$key};
  638. my $master = $self->get_master_by_slave($slave);
  639. $str .= "Master " . $master->get_hostinfo();
  640. $str .=
  641. ", replicating from $master->{Master_Host}($master->{Master_IP}:$master->{Master_Port})"
  642. if ( $master->{Master_Host} );
  643. $str .= ", read-only" if ( $master->{read_only} );
  644. $str .= ", dead" if ( $master->{dead} );
  645. $str .= "\n";
  646. }
  647. $str .= "\n";
  648. return $str;
  649. }
  650. sub get_primary_master {
  651. my $self = shift;
  652. my $master_hash_ref = shift;
  653. my $log = $self->{logger};
  654. my @alive_servers = $self->get_alive_servers();
  655. my %master_hash = %$master_hash_ref;
  656. my $num_real_masters = 0;
  657. my $real_master;
  658. foreach my $key ( keys(%master_hash) ) {
  659. my $slave = $master_hash{$key};
  660. my $master = $self->get_master_by_slave($slave);
  661. next if ( !$master->{dead} && $master->{read_only} );
  662. $real_master = $master;
  663. $num_real_masters++;
  664. }
  665. if ( $num_real_masters < 1 ) {
  666. $log->error(
  667. sprintf(
  668. "Multi-master configuration is detected, but all of them are read-only! Check configurations for details. Master configurations are as below: \n%s",
  669. $self->get_multi_master_print_info($master_hash_ref) )
  670. );
  671. croak;
  672. }
  673. elsif ( $num_real_masters >= 2 ) {
  674. $log->error(
  675. sprintf(
  676. "Multi-master configuration is detected, but two or more masters are either writable (read-only is not set) or dead! Check configurations for details. Master configurations are as below: \n%s",
  677. $self->get_multi_master_print_info($master_hash_ref) )
  678. );
  679. croak;
  680. }
  681. else {
  682. $self->set_orig_master($real_master);
  683. $log->info(
  684. sprintf(
  685. "Multi-master configuration is detected. Current primary(writable) master is %s",
  686. $real_master->get_hostinfo() )
  687. );
  688. $log->info(
  689. sprintf( "Master configurations are as below: \n%s",
  690. $self->get_multi_master_print_info($master_hash_ref) )
  691. );
  692. $self->init_servers();
  693. }
  694. return $real_master;
  695. }
  696. sub get_candidate_masters($) {
  697. my $self = shift;
  698. my $log = $self->{logger};
  699. my @servers = $self->get_servers();
  700. my @ret_servers = ();
  701. foreach (@servers) {
  702. next if ( $_->{dead} eq '1' );
  703. if ( $_->{candidate_master} >= 1 ) {
  704. push( @ret_servers, $_ );
  705. }
  706. }
  707. return @ret_servers;
  708. }
  709. sub print_dead_servers {
  710. my $self = shift;
  711. $self->print_servers( $self->{dead_servers} );
  712. }
  713. sub print_alive_servers {
  714. my $self = shift;
  715. my $log = $self->{logger};
  716. my @alive_servers = $self->get_alive_servers();
  717. foreach (@alive_servers) {
  718. $log->info( " " . $_->get_hostinfo() );
  719. }
  720. }
  721. sub print_alive_slaves {
  722. my $self = shift;
  723. $self->print_servers( $self->{alive_slaves} );
  724. }
  725. sub print_latest_slaves {
  726. my $self = shift;
  727. $self->print_servers( $self->{latest_slaves} );
  728. }
  729. sub print_oldest_slaves {
  730. my $self = shift;
  731. $self->print_servers( $self->{oldest_slaves} );
  732. }
  733. sub print_failed_slaves_if {
  734. my $self = shift;
  735. my $log = $self->{logger};
  736. my @failed_slaves = $self->get_failed_slaves();
  737. if ( $#failed_slaves >= 0 ) {
  738. $log->info("Failed Slaves:");
  739. $self->print_servers( $self->{failed_slaves} );
  740. }
  741. }
  742. sub print_unmanaged_slaves_if {
  743. my $self = shift;
  744. my $log = $self->{logger};
  745. my @unmanaged_slaves = $self->get_unmanaged_slaves();
  746. if ( $#unmanaged_slaves >= 0 ) {
  747. $log->info("Unmanaged Servers:");
  748. $self->print_servers( $self->{unmanaged_slaves} );
  749. }
  750. }
  751. sub print_servers {
  752. my ( $self, $servers_ref ) = @_;
  753. my @servers = @$servers_ref;
  754. foreach (@servers) {
  755. $_->print_server();
  756. }
  757. }
  758. sub disconnect_all($) {
  759. my $self = shift;
  760. my $log = $self->{logger};
  761. my @servers = $self->get_alive_servers();
  762. foreach (@servers) {
  763. $_->disconnect();
  764. }
  765. }
  766. # Check master is not reachable from all alive slaves
  767. # prerequisite: all slaves see the same master
  768. # return 0;ok 1: running
  769. sub is_master_reachable_from_slaves($$) {
  770. my $self = shift;
  771. my $slaves_ref = shift;
  772. my $log = $self->{logger};
  773. my @slaves = $self->get_alive_slaves();
  774. $log->info("Checking the current master is not reachable from all slaves..");
  775. foreach (@slaves) {
  776. my $dbhelper = $_->{dbhelper};
  777. $dbhelper->stop_io_thread();
  778. $dbhelper->start_io_thread();
  779. sleep(3);
  780. my %status = $dbhelper->check_slave_status();
  781. if ( $status{Status} ne '0' || !defined( $status{Slave_IO_Running} ) ) {
  782. $log->error(
  783. sprintf( "Got error when stopping/starting io thread on %s",
  784. $_->get_hostinfo() )
  785. );
  786. return 1;
  787. }
  788. if ( $status{Slave_IO_Running} eq "Yes" ) {
  789. $log->warning(
  790. sprintf( "Master is reachable from slave %s", $_->get_hostinfo() ) );
  791. return 1;
  792. }
  793. $dbhelper->stop_io_thread();
  794. $log->info(
  795. sprintf( " Master is not reachable from slave %s", $_->get_hostinfo() ) );
  796. }
  797. $log->info(" done.");
  798. return 0;
  799. }
  800. # checking slave status again before starting main operations.
  801. # alive slaves info was already fetched by connect_all_and_read_server_status,
  802. # so check_slave_status should not fail here. If it fails, we die here.
  803. sub read_slave_status($) {
  804. my $self = shift;
  805. my $log = $self->{logger};
  806. my @slaves = $self->get_alive_slaves();
  807. $log->debug("Fetching current slave status..");
  808. foreach (@slaves) {
  809. my $dbhelper = $_->{dbhelper};
  810. my ($sstatus) = ();
  811. my %status = $dbhelper->check_slave_status();
  812. # This should not happen so die if it happens
  813. if ( $status{Status} ) {
  814. my $msg = "Checking slave status failed.";
  815. $msg .= " err=$status{Errstr}" if ( $status{Errstr} );
  816. $log->error($msg);
  817. croak;
  818. }
  819. $_->{latest} = 0;
  820. $_->{Master_Log_File} = $status{Master_Log_File};
  821. $_->{Read_Master_Log_Pos} = $status{Read_Master_Log_Pos};
  822. $_->{Relay_Master_Log_File} = $status{Relay_Master_Log_File};
  823. $_->{Exec_Master_Log_Pos} = $status{Exec_Master_Log_Pos};
  824. $_->{Relay_Log_File} = $status{Relay_Log_File};
  825. $_->{Relay_Log_Pos} = $status{Relay_Log_Pos};
  826. }
  827. $log->debug(" Fetching current slave status done.");
  828. }
  829. sub start_sql_threads_if($) {
  830. my $self = shift;
  831. my @slaves = $self->get_alive_slaves();
  832. foreach my $slave (@slaves) {
  833. $slave->start_sql_thread_if();
  834. }
  835. }
  836. sub get_failover_advisory_locks($) {
  837. my $self = shift;
  838. my $log = $self->{logger};
  839. my @slaves = $self->get_alive_slaves();
  840. foreach my $slave (@slaves) {
  841. if ( $slave->get_failover_advisory_lock() ) {
  842. $log->error(
  843. sprintf(
  844. "Getting advisory lock failed on %s. Maybe failover script or purge_relay_logs script is running on the same slave?",
  845. $slave->get_hostinfo() )
  846. );
  847. croak;
  848. }
  849. }
  850. }
  851. sub identify_latest_slaves($$) {
  852. my $self = shift;
  853. my $find_oldest = shift;
  854. $find_oldest = 0 unless ($find_oldest);
  855. my $log = $self->{logger};
  856. my @slaves = $self->get_alive_slaves();
  857. my @latest = ();
  858. foreach (@slaves) {
  859. my $a = $latest[0]{Master_Log_File};
  860. my $b = $latest[0]{Read_Master_Log_Pos};
  861. if (
  862. !$find_oldest
  863. && (
  864. ( !$a && !defined($b) )
  865. || ( $_->{Master_Log_File} gt $latest[0]{Master_Log_File} )
  866. || ( ( $_->{Master_Log_File} ge $latest[0]{Master_Log_File} )
  867. && $_->{Read_Master_Log_Pos} > $latest[0]{Read_Master_Log_Pos} )
  868. )
  869. )
  870. {
  871. @latest = ();
  872. push( @latest, $_ );
  873. }
  874. elsif (
  875. $find_oldest
  876. && (
  877. ( !$a && !defined($b) )
  878. || ( $_->{Master_Log_File} lt $latest[0]{Master_Log_File} )
  879. || ( ( $_->{Master_Log_File} le $latest[0]{Master_Log_File} )
  880. && $_->{Read_Master_Log_Pos} < $latest[0]{Read_Master_Log_Pos} )
  881. )
  882. )
  883. {
  884. @latest = ();
  885. push( @latest, $_ );
  886. }
  887. elsif ( ( $_->{Master_Log_File} eq $latest[0]{Master_Log_File} )
  888. && ( $_->{Read_Master_Log_Pos} == $latest[0]{Read_Master_Log_Pos} ) )
  889. {
  890. push( @latest, $_ );
  891. }
  892. }
  893. foreach (@latest) {
  894. $_->{latest} = 1 if ( !$find_oldest );
  895. $_->{oldest} = 1 if ($find_oldest);
  896. }
  897. $log->info(
  898. sprintf(
  899. "The %s binary log file/position on all slaves is" . " %s:%d\n",
  900. $find_oldest ? "oldest" : "latest", $latest[0]{Master_Log_File},
  901. $latest[0]{Read_Master_Log_Pos}
  902. )
  903. );
  904. if ($find_oldest) {
  905. $self->set_oldest_slaves( \@latest );
  906. }
  907. else {
  908. $self->set_latest_slaves( \@latest );
  909. }
  910. }
  911. sub identify_oldest_slaves($) {
  912. my $self = shift;
  913. return $self->identify_latest_slaves(1);
  914. }
  915. # 1: higher
  916. # -1: older
  917. # 0: equal
  918. sub pos_cmp {
  919. my ( $self, $a_mlf, $a_mlp, $b_mlf, $b_mlp ) = @_;
  920. return 0 if ( $a_mlf eq $b_mlf && $a_mlp == $b_mlp );
  921. return -1 if ( $a_mlf lt $b_mlf || ( $a_mlf le $b_mlf && $a_mlp < $b_mlp ) );
  922. return 1;
  923. }
  924. sub set_no_master_if_older($$$) {
  925. my $self = shift;
  926. my $mlf = shift;
  927. my $mlp = shift;
  928. my @slaves = $self->get_alive_slaves();
  929. foreach (@slaves) {
  930. $_->{no_master} = 1
  931. if (
  932. $self->pos_cmp( $_->{Master_Log_File}, $_->{Read_Master_Log_Pos},
  933. $mlf, $mlp ) < 0
  934. );
  935. }
  936. }
  937. sub get_oldest_limit_pos($) {
  938. my $self = shift;
  939. my @slaves = $self->get_alive_slaves();
  940. my $target;
  941. foreach (@slaves) {
  942. next if ( $_->{ignore_fail} );
  943. my $a = $target->{Master_Log_File};
  944. my $b = $target->{Read_Master_Log_Pos};
  945. if (
  946. ( !$a && !defined($b) )
  947. || ( $_->{Master_Log_File} lt $target->{Master_Log_File} )
  948. || ( ( $_->{Master_Log_File} le $target->{Master_Log_File} )
  949. && $_->{Read_Master_Log_Pos} < $target->{Read_Master_Log_Pos} )
  950. )
  951. {
  952. $target = $_;
  953. }
  954. }
  955. return ( $target->{Master_Log_File}, $target->{Read_Master_Log_Pos} )
  956. if ($target);
  957. }
  958. # check slave is too behind master or not
  959. # 0: no or acceptable delay
  960. # 1: unacceptable delay (can not be a master)
  961. sub check_slave_delay($$$) {
  962. my $self = shift;
  963. my $target = shift;
  964. my $latest = shift;
  965. my $log = $self->{logger};
  966. $log->debug(
  967. sprintf( "Checking replication delay on %s.. ", $target->get_hostinfo() ) );
  968. if (
  969. ( $latest->{Master_Log_File} gt $target->{Relay_Master_Log_File} )
  970. || ( $latest->{Read_Master_Log_Pos} >
  971. $target->{Exec_Master_Log_Pos} + 100000000 )
  972. )
  973. {
  974. $log->warning(
  975. sprintf(
  976. " Slave %s SQL Thread delays too much. Latest log file:%s:%d, Current log file:%s:%d. This server is not selected as a new master because recovery will take long time.\n",
  977. $target->get_hostinfo(), $latest->{Master_Log_File},
  978. $latest->{Read_Master_Log_Pos}, $target->{Relay_Master_Log_File},
  979. $target->{Exec_Master_Log_Pos}
  980. )
  981. );
  982. return 1;
  983. }
  984. $log->debug(" ok.");
  985. return 0;
  986. }
  987. # The following servers can not be master:
  988. # - dead servers
  989. # - Set no_master in conf files (i.e. DR servers)
  990. # - log_bin is disabled
  991. # - Major version is not the oldest
  992. # - too much replication delay
  993. sub get_bad_candidate_masters($$$) {
  994. my $self = shift;
  995. my $latest_slave = shift;
  996. my $check_replication_delay = shift;
  997. my $log = $self->{logger};
  998. my @servers = $self->get_alive_slaves();
  999. my @ret_servers = ();
  1000. foreach (@servers) {
  1001. if (
  1002. $_->{no_master} >= 1
  1003. || $_->{log_bin} eq '0'
  1004. || $_->{oldest_major_version} eq '0'
  1005. || (
  1006. $latest_slave
  1007. && ( $check_replication_delay
  1008. && $self->check_slave_delay( $_, $latest_slave ) >= 1 )
  1009. )
  1010. )
  1011. {
  1012. push( @ret_servers, $_ );
  1013. }
  1014. }
  1015. return @ret_servers;
  1016. }
  1017. sub is_target_bad_for_new_master {
  1018. my $self = shift;
  1019. my $target = shift;
  1020. my @bad = $self->get_bad_candidate_masters();
  1021. foreach (@bad) {
  1022. return 1 if ( $target->{id} eq $_->{id} );
  1023. }
  1024. return 0;
  1025. }
  1026. # Picking up new master
  1027. # If preferred node is specified, one of active preferred nodes will be new master.
  1028. # If the latest server behinds too much (i.e. stopping sql thread for online backups), we should not use it as a new master, but we should fetch relay log there. Even though preferred master is configured, it does not become a master if it's far behind.
  1029. sub select_new_master {
  1030. my $self = shift;
  1031. my $prio_new_master_host = shift;
  1032. my $prio_new_master_port = shift;
  1033. my $check_replication_delay = shift;
  1034. $check_replication_delay = 1 if ( !defined($check_replication_delay) );
  1035. my $log = $self->{logger};
  1036. my @latest = $self->get_latest_slaves();
  1037. my @slaves = $self->get_alive_slaves();
  1038. my @pref = $self->get_candidate_masters();
  1039. my @bad =
  1040. $self->get_bad_candidate_masters( $latest[0], $check_replication_delay );
  1041. if ( $prio_new_master_host && $prio_new_master_port ) {
  1042. my $new_master =
  1043. $self->get_alive_server_by_hostport( $prio_new_master_host,
  1044. $prio_new_master_port );
  1045. if ($new_master) {
  1046. my $a = $self->get_server_from_by_id( \@bad, $new_master->{id} );
  1047. unless ($a) {
  1048. $log->info("$prio_new_master_host can be new master.");
  1049. return $new_master;
  1050. }
  1051. else {
  1052. $log->error("$prio_new_master_host is bad as a new master!");
  1053. return;
  1054. }
  1055. }
  1056. else {
  1057. $log->error("$prio_new_master_host is not alive!");
  1058. return;
  1059. }
  1060. }
  1061. $log->info("Searching new master from slaves..");
  1062. $log->info(" Candidate masters from the configuration file:");
  1063. $self->print_servers( \@pref );
  1064. $log->info(" Non-candidate masters:");
  1065. $self->print_servers( \@bad );
  1066. return $latest[0]
  1067. if ( $#pref < 0 && $#bad < 0 && $latest[0]->{latest_priority} );
  1068. if ( $latest[0]->{latest_priority} ) {
  1069. $log->info(
  1070. " Searching from candidate_master slaves which have received the latest relay log events.."
  1071. ) if ( $#pref >= 0 );
  1072. foreach my $h (@latest) {
  1073. foreach my $p (@pref) {
  1074. if ( $h->{id} eq $p->{id} ) {
  1075. return $h
  1076. if ( !$self->get_server_from_by_id( \@bad, $p->{id} ) );
  1077. }
  1078. }
  1079. }
  1080. $log->info(" Not found.") if ( $#pref >= 0 );
  1081. }
  1082. #new master is not latest
  1083. $log->info(" Searching from all candidate_master slaves..")
  1084. if ( $#pref >= 0 );
  1085. foreach my $s (@slaves) {
  1086. foreach my $p (@pref) {
  1087. if ( $s->{id} eq $p->{id} ) {
  1088. my $a = $self->get_server_from_by_id( \@bad, $p->{id} );
  1089. return $s unless ($a);
  1090. }
  1091. }
  1092. }
  1093. $log->info(" Not found.") if ( $#pref >= 0 );
  1094. if ( $latest[0]->{latest_priority} ) {
  1095. $log->info(
  1096. " Searching from all slaves which have received the latest relay log events.."
  1097. );
  1098. foreach my $h (@latest) {
  1099. my $a = $self->get_server_from_by_id( \@bad, $h->{id} );
  1100. return $h unless ($a);
  1101. }
  1102. $log->info(" Not found.");
  1103. }
  1104. # none of latest servers can not be a master
  1105. $log->info(" Searching from all slaves..");
  1106. foreach my $s (@slaves) {
  1107. my $a = $self->get_server_from_by_id( \@bad, $s->{id} );
  1108. return $s unless ($a);
  1109. }
  1110. $log->info(" Not found.");
  1111. return;
  1112. }
  1113. sub get_new_master_binlog_position($$) {
  1114. my $self = shift;
  1115. my $target = shift; # master
  1116. my $dbhelper = $target->{dbhelper};
  1117. my $log = $self->{logger};
  1118. $log->info("Getting new master's binlog name and position..");
  1119. my ( $file, $pos ) = $dbhelper->show_master_status();
  1120. if ( $file && defined($pos) ) {
  1121. $log->info(" $file:$pos");
  1122. $log->info(
  1123. sprintf(
  1124. " All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_LOG_FILE='%s', MASTER_LOG_POS=%d, MASTER_USER='%s', MASTER_PASSWORD='xxx';",
  1125. ( $target->{hostname} eq $target->{ip} )
  1126. ? $target->{hostname}
  1127. : ("$target->{hostname} or $target->{ip}"),
  1128. $target->{port},
  1129. $file,
  1130. $pos,
  1131. $target->{repl_user}
  1132. )
  1133. );
  1134. }
  1135. else {
  1136. $log->error("Getting new master's binlog position failed!");
  1137. return;
  1138. }
  1139. return ( $file, $pos );
  1140. }
  1141. sub change_master_and_start_slave {
  1142. my ( $self, $target, $master, $master_log_file, $master_log_pos, $log ) = @_;
  1143. $log = $self->{logger} unless ($log);
  1144. return if ( $target->{id} eq $master->{id} );
  1145. my $dbhelper = $target->{dbhelper};
  1146. $log->info(
  1147. sprintf(
  1148. " Resetting slave %s and starting replication from the new master %s..",
  1149. $target->get_hostinfo(),
  1150. $master->get_hostinfo()
  1151. )
  1152. );
  1153. $target->stop_slave($log) unless ( $target->{not_slave} );
  1154. $dbhelper->reset_slave() unless ( $target->{not_slave} );
  1155. $dbhelper->change_master( $target->{use_ip_for_change_master}
  1156. ? $master->{ip}
  1157. : $master->{hostname},
  1158. $master->{port}, $master_log_file, $master_log_pos, $master->{repl_user},
  1159. $master->{repl_password} );
  1160. $log->info(" Executed CHANGE MASTER.");
  1161. # After executing CHANGE MASTER, relay_log_purge is automatically disabled.
  1162. # If the original value is 0, we should turn to 0 explicitly.
  1163. unless ( $target->{relay_purge} ) {
  1164. $target->disable_relay_log_purge();
  1165. }
  1166. my $ret = $target->start_slave($log);
  1167. unless ($ret) {
  1168. $log->info(" Slave started.");
  1169. }
  1170. return $ret;
  1171. }
  1172. sub get_current_alive_master($) {
  1173. my $self = shift;
  1174. my $log = $self->{logger};
  1175. my $master = $self->get_orig_master();
  1176. unless ($master) {
  1177. $log->error(
  1178. "MySQL master is not correctly configured. Check master/slave settings");
  1179. croak;
  1180. }
  1181. my $m = $self->get_alive_server_by_id( $master->{id} );
  1182. unless ($m) {
  1183. $log->warning("MySQL master is not currently alive!");
  1184. return;
  1185. }
  1186. $log->info( sprintf( "Current Alive Master: %s", $m->get_hostinfo() ) );
  1187. return $master;
  1188. }
  1189. sub stop_io_threads {
  1190. my $self = shift;
  1191. my $log = $self->{logger};
  1192. my @alive_slaves = $self->get_alive_slaves();
  1193. my $pm = new Parallel::ForkManager( $#alive_slaves + 1 );
  1194. foreach my $target (@alive_slaves) {
  1195. $target->stop_io_thread($target);
  1196. exit 0;
  1197. }
  1198. $pm->wait_all_children;
  1199. return 0;
  1200. }
  1201. sub check_repl_priv {
  1202. my $self = shift;
  1203. my @servers = $self->get_alive_servers();
  1204. foreach my $target (@servers) {
  1205. $target->check_repl_priv();
  1206. }
  1207. }
  1208. sub release_failover_advisory_lock {
  1209. my $self = shift;
  1210. my @servers = $self->get_alive_servers();
  1211. foreach my $target (@servers) {
  1212. $target->release_failover_advisory_lock();
  1213. }
  1214. }
  1215. sub get_current_servers_ascii {
  1216. my $self = shift;
  1217. my $orig_master = shift;
  1218. my @alive_slaves = $self->get_alive_slaves();
  1219. my $str = "$orig_master->{hostname} (current master)";
  1220. $str .= " ($orig_master->{node_label})"
  1221. if ( $orig_master->{node_label} );
  1222. $str .= "\n";
  1223. foreach my $slave (@alive_slaves) {
  1224. $str .= " +--" . "$slave->{hostname}";
  1225. $str .= " ($slave->{node_label})" if ( $slave->{node_label} );
  1226. $str .= "\n";
  1227. }
  1228. $str .= "\n";
  1229. return $str;
  1230. }
  1231. sub print_servers_ascii {
  1232. my $self = shift;
  1233. my $orig_master = shift;
  1234. my $log = $self->{logger};
  1235. my @alive_slaves = $self->get_alive_slaves();
  1236. my $str = "\n";
  1237. $str .= $self->get_current_servers_ascii($orig_master);
  1238. $log->info($str);
  1239. }
  1240. sub print_servers_migration_ascii {
  1241. my $self = shift;
  1242. my $orig_master = shift;
  1243. my $new_master = shift;
  1244. my $orig_master_is_new_slave = shift;
  1245. my $log = $self->{logger};
  1246. my @alive_slaves = $self->get_alive_slaves();
  1247. my $str = "\n";
  1248. $str .= "From:\n";
  1249. $str .= $self->get_current_servers_ascii($orig_master);
  1250. $str .= "To:\n";
  1251. $str .= "$new_master->{hostname} (new master)";
  1252. $str .= " ($new_master->{node_label})"
  1253. if ( $new_master->{node_label} );
  1254. $str .= "\n";
  1255. foreach my $slave (@alive_slaves) {
  1256. next if ( $slave->{id} eq $new_master->{id} );
  1257. $str .= " +--" . "$slave->{hostname}";
  1258. $str .= " ($slave->{node_label})" if ( $slave->{node_label} );
  1259. $str .= "\n";
  1260. }
  1261. if ($orig_master_is_new_slave) {
  1262. $str .= " +--" . "$orig_master->{hostname}";
  1263. $str .= " ($orig_master->{node_label})" if ( $orig_master->{node_label} );
  1264. $str .= "\n";
  1265. }
  1266. $log->info($str);
  1267. }
  1268. # for manual failover/switch only
  1269. sub manually_decide_new_master {
  1270. my $self = shift;
  1271. my $orig_master = shift;
  1272. my $new_master = shift;
  1273. my $log = $self->{logger};
  1274. printf(
  1275. "\nStarting master switch from %s to %s? (yes/NO): ",
  1276. $orig_master->get_hostinfo(),
  1277. $new_master->get_hostinfo()
  1278. );
  1279. my $ret = <STDIN>;
  1280. chomp($ret);
  1281. if ( lc($ret) !~ /^y/ ) {
  1282. print "Continue? (yes/NO): ";
  1283. $ret = <STDIN>;
  1284. chomp($ret);
  1285. if ( lc($ret) !~ /^y/ ) {
  1286. $orig_master->{not_error} = 1;
  1287. die "Not typed yes. Stopping.";
  1288. }
  1289. print "Enter new master host name: ";
  1290. $ret = <STDIN>;
  1291. chomp($ret);
  1292. $new_master = $self->get_alive_server_by_hostport( $ret, 3306 );
  1293. if ( !$new_master ) {
  1294. die "New server not found!\n";
  1295. }
  1296. printf "Master switch to %s. OK? (yes/NO): ", $new_master->get_hostinfo();
  1297. $ret = <STDIN>;
  1298. chomp($ret);
  1299. die "Not typed yes. Stopping. \n" if ( lc($ret) !~ /^y/ );
  1300. }
  1301. return $new_master;
  1302. }
  1303. sub check_replication_health {
  1304. my $self = shift;
  1305. my $allow_delay_seconds = shift;
  1306. $allow_delay_seconds = 1 unless ($allow_delay_seconds);
  1307. my $log = $self->{logger};
  1308. my @alive_slaves = $self->get_alive_slaves();
  1309. foreach my $target (@alive_slaves) {
  1310. $log->info("Checking replication health on $target->{hostname}..");
  1311. if ( !$target->current_slave_position() ) {
  1312. $log->error("Getting slave status failed!");
  1313. croak;
  1314. }
  1315. if ( $target->has_replication_problem($allow_delay_seconds) ) {
  1316. $log->error(" failed!");
  1317. croak;
  1318. }
  1319. else {
  1320. $log->info(" ok.");
  1321. }
  1322. }
  1323. }
  1324. 1;