PageRenderTime 59ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/MHA/ServerManager.pm

https://github.com/hirose31/mha4mysql-manager
Perl | 1587 lines | 1414 code | 121 blank | 52 comment | 197 complexity | 88ab98f650aafddec479c2b69ff76d49 MD5 | raw file
Possible License(s): GPL-2.0
  1. #!/usr/bin/env perl
  2. # Copyright (C) 2011 DeNA Co.,Ltd.
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc.,
  17. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. package MHA::ServerManager;
  19. use strict;
  20. use warnings FATAL => 'all';
  21. use Carp qw(croak);
  22. use English qw(-no_match_vars);
  23. use MHA::SlaveUtil;
  24. use MHA::DBHelper;
  25. use MHA::Server;
  26. use MHA::ManagerConst;
  27. use Parallel::ForkManager;
  28. sub new {
  29. my $class = shift;
  30. my $self = {
  31. servers => [],
  32. dead_servers => [],
  33. alive_servers => [],
  34. alive_slaves => [],
  35. failed_slaves => [],
  36. latest_slaves => [],
  37. oldest_slaves => [],
  38. unmanaged_slaves => [],
  39. orig_master => undef,
  40. new_master => undef,
  41. logger => undef,
  42. @_,
  43. };
  44. return bless $self, $class;
  45. }
  46. sub set_servers($$) {
  47. my $self = shift;
  48. my $servers_ref = shift;
  49. $self->{servers} = $servers_ref;
  50. }
  51. sub set_latest_slaves($$) {
  52. my $self = shift;
  53. my $servers_ref = shift;
  54. $self->{latest_slaves} = $servers_ref;
  55. }
  56. sub set_oldest_slaves($$) {
  57. my $self = shift;
  58. my $servers_ref = shift;
  59. $self->{oldest_slaves} = $servers_ref;
  60. }
  61. sub set_unmanaged_slaves($$) {
  62. my $self = shift;
  63. my $servers_ref = shift;
  64. $self->{unmanaged_slaves} = $servers_ref;
  65. }
  66. sub get_servers($) {
  67. my $self = shift;
  68. return @{ $self->{servers} };
  69. }
  70. sub get_dead_servers($) {
  71. my $self = shift;
  72. return @{ $self->{dead_servers} };
  73. }
  74. sub get_alive_servers($) {
  75. my $self = shift;
  76. return @{ $self->{alive_servers} };
  77. }
  78. sub get_alive_slaves($) {
  79. my $self = shift;
  80. return @{ $self->{alive_slaves} };
  81. }
  82. sub get_failed_slaves($) {
  83. my $self = shift;
  84. return @{ $self->{failed_slaves} };
  85. }
  86. sub get_latest_slaves($) {
  87. my $self = shift;
  88. return @{ $self->{latest_slaves} };
  89. }
  90. sub get_oldest_slaves($) {
  91. my $self = shift;
  92. return @{ $self->{oldest_slaves} };
  93. }
  94. sub get_unmanaged_slaves($) {
  95. my $self = shift;
  96. return @{ $self->{unmanaged_slaves} };
  97. }
  98. sub add_dead_server($$) {
  99. my $self = shift;
  100. my $server = shift;
  101. push @{ $self->{dead_servers} }, $server;
  102. }
  103. sub add_alive_server($$) {
  104. my $self = shift;
  105. my $server = shift;
  106. push @{ $self->{alive_servers} }, $server;
  107. }
  108. sub add_alive_slave($$) {
  109. my $self = shift;
  110. my $server = shift;
  111. push @{ $self->{alive_slaves} }, $server;
  112. }
  113. sub add_failed_slave($$) {
  114. my $self = shift;
  115. my $server = shift;
  116. push @{ $self->{failed_slaves} }, $server;
  117. }
  118. sub add_unmanaged_slave($$) {
  119. my $self = shift;
  120. my $server = shift;
  121. push @{ $self->{unmanaged_slaves} }, $server;
  122. }
  123. sub set_orig_master($$) {
  124. my $self = shift;
  125. my $server = shift;
  126. $self->{orig_master} = $server;
  127. $server->{orig_master} = 1;
  128. }
  129. sub get_orig_master($) {
  130. my $self = shift;
  131. return $self->{orig_master};
  132. }
  133. sub init_servers($) {
  134. my $self = shift;
  135. my $log = $self->{logger};
  136. my @servers = $self->get_servers();
  137. $self->{dead_servers} = [];
  138. $self->{alive_servers} = [];
  139. $self->{alive_slaves} = [];
  140. $self->{failed_slaves} = [];
  141. $self->{unmanaged_slaves} = [];
  142. foreach my $server (@servers) {
  143. if ( $server->{dead} ) {
  144. $self->add_dead_server($server);
  145. }
  146. elsif ( $server->{unmanaged} ) {
  147. $self->add_unmanaged_slave($server);
  148. }
  149. else {
  150. $self->add_alive_server($server);
  151. if ( $server->{not_slave} eq '0' && !$server->{orig_master} ) {
  152. if ( !$server->is_sql_thread_error() && !$server->{lack_relay_log} ) {
  153. $self->add_alive_slave($server);
  154. }
  155. else {
  156. $self->add_failed_slave($server);
  157. }
  158. }
  159. }
  160. }
  161. my @alive_servers = $self->get_alive_servers();
  162. if ( $#alive_servers <= -1 ) {
  163. $log->error("There is no alive server. We can't do failover");
  164. croak;
  165. }
  166. my @alive_slaves = $self->get_alive_slaves();
  167. if ( $#alive_slaves <= -1 ) {
  168. $log->error("There is no alive slave. We can't do failover");
  169. croak;
  170. }
  171. }
  172. sub init_binlog_server {
  173. my $binlog_server_ref = shift;
  174. my $log = shift;
  175. my @binlog_servers = @$binlog_server_ref;
  176. my $num_alive_binlog_servers = 0;
  177. foreach my $server (@binlog_servers) {
  178. unless ( $server->{logger} ) {
  179. $server->{logger} = $log;
  180. }
  181. if (
  182. MHA::HealthCheck::ssh_check_simple(
  183. $server->{ssh_user}, $server->{ssh_host},
  184. $server->{ssh_ip}, $server->{ssh_port},
  185. $server->{logger}, $server->{ssh_connection_timeout}
  186. )
  187. )
  188. {
  189. $log->warning("Failed to SSH to binlog server $server->{hostname}");
  190. $server->{ssh_reachable} = 0;
  191. }
  192. else {
  193. if (
  194. MHA::ManagerUtil::get_node_version(
  195. $server->{logger}, $server->{ssh_user}, $server->{ssh_host},
  196. $server->{ssh_ip}, $server->{ssh_port}
  197. )
  198. )
  199. {
  200. $log->info("Binlog server $server->{hostname} is reachable.");
  201. $server->{ssh_reachable} = 1;
  202. $num_alive_binlog_servers++;
  203. }
  204. else {
  205. $log->warning(
  206. "Failed to get MHA Node version from binlog server $server->{hostname}"
  207. );
  208. $server->{ssh_reachable} = 0;
  209. }
  210. }
  211. }
  212. if ( $#binlog_servers >= 0 && $num_alive_binlog_servers <= 0 ) {
  213. $log->error("Binlog Server is defined but there is no alive server.");
  214. croak;
  215. }
  216. }
  217. sub set_logger($$) {
  218. my $self = shift;
  219. my $logger = shift;
  220. $self->{logger} = $logger;
  221. }
  222. sub connect_all_and_read_server_status($$$$) {
  223. my $self = shift;
  224. my $dead_master_host = shift;
  225. my $dead_master_ip = shift;
  226. my $dead_master_port = shift;
  227. my $log = $self->{logger};
  228. my @servers = $self->get_servers();
  229. $log->debug("Connecting to servers..");
  230. my $should_die = 0;
  231. my $connection_checker = new Parallel::ForkManager( $#servers + 1 );
  232. $connection_checker->run_on_start(
  233. sub {
  234. my ( $pid, $target ) = @_;
  235. }
  236. );
  237. $connection_checker->run_on_finish(
  238. sub {
  239. my ( $pid, $exit_code, $target ) = @_;
  240. if ( $exit_code == $MHA::ManagerConst::MYSQL_DEAD_RC ) {
  241. $target->{dead} = 1;
  242. }
  243. elsif ($exit_code) {
  244. $should_die = 1;
  245. }
  246. }
  247. );
  248. foreach my $target (@servers) {
  249. unless ( $target->{logger} ) {
  250. $target->{logger} = $log;
  251. }
  252. $connection_checker->start($target) and next;
  253. eval {
  254. $SIG{INT} = $SIG{HUP} = $SIG{QUIT} = $SIG{TERM} = "DEFAULT";
  255. if ( $dead_master_host
  256. && $dead_master_ip
  257. && $dead_master_port )
  258. {
  259. if (
  260. $target->server_equals(
  261. $dead_master_host, $dead_master_ip, $dead_master_port
  262. )
  263. )
  264. {
  265. $connection_checker->finish($MHA::ManagerConst::MYSQL_DEAD_RC);
  266. }
  267. }
  268. my $rc = $target->connect_check(2);
  269. $connection_checker->finish($rc);
  270. };
  271. if ($@) {
  272. $log->error($@);
  273. undef $@;
  274. $connection_checker->finish(1);
  275. }
  276. $connection_checker->finish(0);
  277. }
  278. $connection_checker->wait_all_children;
  279. if ($should_die) {
  280. $log->error("Got fatal error, stopping operations");
  281. croak;
  282. }
  283. foreach my $target (@servers) {
  284. next if ( $target->{dead} );
  285. $target->connect_and_get_status();
  286. }
  287. $self->init_servers();
  288. $self->compare_slave_version();
  289. $log->debug("Connecting to servers done.");
  290. $self->validate_current_master();
  291. $self->{gtid_failover_mode} = $self->get_gtid_status();
  292. $log->info(
  293. sprintf( "GTID failover mode = %d", $self->{gtid_failover_mode} ) );
  294. }
  295. sub get_oldest_version($) {
  296. my $self = shift;
  297. my @servers = $self->get_alive_servers();
  298. my $oldest_version;
  299. foreach my $server (@servers) {
  300. if ( $server->{oldest_major_version} ) {
  301. $oldest_version = $server->{mysql_version};
  302. last;
  303. }
  304. }
  305. return $oldest_version;
  306. }
  307. sub compare_slave_version($) {
  308. my $self = shift;
  309. my @servers = $self->get_alive_servers();
  310. my $log = $self->{logger};
  311. $log->debug(" Comparing MySQL versions..");
  312. my $min_major_version;
  313. foreach (@servers) {
  314. my $dbhelper = $_->{dbhelper};
  315. next if ( $_->{dead} || $_->{not_slave} );
  316. my $parsed_major_version =
  317. MHA::NodeUtil::parse_mysql_major_version( $_->{mysql_version} );
  318. if (!$min_major_version
  319. || $parsed_major_version < $min_major_version )
  320. {
  321. $min_major_version = $parsed_major_version;
  322. }
  323. }
  324. foreach (@servers) {
  325. my $dbhelper = $_->{dbhelper};
  326. next if ( $_->{dead} || $_->{not_slave} );
  327. my $parsed_major_version =
  328. MHA::NodeUtil::parse_mysql_major_version( $_->{mysql_version} );
  329. if ( $min_major_version == $parsed_major_version ) {
  330. $_->{oldest_major_version} = 1;
  331. }
  332. else {
  333. $_->{oldest_major_version} = 0;
  334. }
  335. }
  336. $log->debug(" Comparing MySQL versions done.");
  337. }
  338. sub print_filter_rules($$) {
  339. my $self = shift;
  340. my $master = shift;
  341. my $log = $self->{logger};
  342. my $msg = "Bad Binlog/Replication filtering rules:\n";
  343. $msg .= $master->print_filter(1) if ( $master && !$master->{dead} );
  344. my @slaves = $self->get_alive_slaves();
  345. foreach my $slave (@slaves) {
  346. $msg .= $slave->print_filter();
  347. }
  348. $log->warning($msg);
  349. }
  350. sub validate_repl_filter($$) {
  351. my $self = shift;
  352. my $master = shift;
  353. my $log = $self->{logger};
  354. $log->info("Checking replication filtering settings..");
  355. my $binlog_do_db;
  356. my $binlog_ignore_db;
  357. # If master is alive
  358. if ( $master && !$master->{dead} ) {
  359. $binlog_do_db = $master->{Binlog_Do_DB};
  360. $binlog_ignore_db = $master->{Binlog_Ignore_DB};
  361. $log->info(
  362. " binlog_do_db= $binlog_do_db, binlog_ignore_db= $binlog_ignore_db");
  363. }
  364. my @slaves = $self->get_alive_slaves();
  365. my $replicate_do_db;
  366. my $replicate_ignore_db;
  367. my $replicate_do_table;
  368. my $replicate_ignore_table;
  369. my $replicate_wild_do_table;
  370. my $replicate_wild_ignore_table;
  371. foreach (@slaves) {
  372. $replicate_do_db = $_->{Replicate_Do_DB} unless ($replicate_do_db);
  373. $replicate_ignore_db = $_->{Replicate_Ignore_DB}
  374. unless ($replicate_ignore_db);
  375. $replicate_do_table = $_->{Replicate_Do_Table} unless ($replicate_do_table);
  376. $replicate_ignore_table = $_->{Replicate_Ignore_Table}
  377. unless ($replicate_ignore_table);
  378. $replicate_wild_do_table = $_->{Replicate_Wild_Do_Table}
  379. unless ($replicate_wild_do_table);
  380. $replicate_wild_ignore_table = $_->{Replicate_Wild_Ignore_Table}
  381. unless ($replicate_wild_ignore_table);
  382. if ( $_->{log_bin} ) {
  383. $binlog_do_db = $_->{Binlog_Do_DB} unless ($binlog_do_db);
  384. $binlog_ignore_db = $_->{Binlog_Ignore_DB} unless ($binlog_ignore_db);
  385. }
  386. if ( $replicate_do_db ne $_->{Replicate_Do_DB}
  387. || $replicate_ignore_db ne $_->{Replicate_Ignore_DB}
  388. || $replicate_do_table ne $_->{Replicate_Do_Table}
  389. || $replicate_ignore_table ne $_->{Replicate_Ignore_Table}
  390. || $replicate_wild_do_table ne $_->{Replicate_Wild_Do_Table}
  391. || $replicate_wild_ignore_table ne $_->{Replicate_Wild_Ignore_Table} )
  392. {
  393. $log->error(
  394. sprintf(
  395. "Replication filtering check failed on %s! All slaves must have same replication filtering rules. Check SHOW SLAVE STATUS output and set my.cnf correctly.",
  396. $_->get_hostinfo() )
  397. );
  398. $self->print_filter_rules($master);
  399. return 1;
  400. }
  401. if ( $_->{log_bin} ) {
  402. if ( $binlog_do_db ne $_->{Binlog_Do_DB}
  403. || $binlog_ignore_db ne $_->{Binlog_Ignore_DB} )
  404. {
  405. $log->error(
  406. sprintf(
  407. "Binlog filtering check failed on %s! All log-bin enabled servers must have same binlog filtering rules (same binlog-do-db and binlog-ignore-db). Check SHOW MASTER STATUS output and set my.cnf correctly.",
  408. $_->get_hostinfo() )
  409. );
  410. $self->print_filter_rules($master);
  411. return 1;
  412. }
  413. }
  414. }
  415. if ( $binlog_do_db && $replicate_do_db ) {
  416. if ( $binlog_do_db ne $replicate_do_db ) {
  417. $log->error(
  418. sprintf(
  419. "binlog_do_db on master(%s) must be the same as replicate_do_db on slaves(%s).",
  420. $binlog_do_db, $replicate_do_db
  421. )
  422. );
  423. $self->print_filter_rules($master);
  424. return 1;
  425. }
  426. }
  427. if ( $binlog_ignore_db && $replicate_ignore_db ) {
  428. if ( $binlog_ignore_db ne $replicate_ignore_db ) {
  429. $log->error(
  430. sprintf(
  431. "binlog_ignore_db on master(%s) must be the same as replicate_ignore_db on slaves(%s).",
  432. $binlog_ignore_db, $replicate_ignore_db
  433. )
  434. );
  435. $self->print_filter_rules($master);
  436. return 1;
  437. }
  438. }
  439. $log->info(" Replication filtering check ok.");
  440. return 0;
  441. }
  442. sub validate_num_alive_servers($$$) {
  443. my $self = shift;
  444. my $current_master = shift;
  445. my $ignore_fail_check = shift;
  446. my $log = $self->{logger};
  447. my @dead_servers = $self->get_dead_servers();
  448. my @failed_slaves = $self->get_failed_slaves();
  449. foreach (@dead_servers) {
  450. next if ( $_->{id} eq $current_master->{id} );
  451. next if ( $ignore_fail_check && $_->{ignore_fail} );
  452. $log->error(
  453. sprintf( " Server %s is dead, but must be alive! Check server settings.",
  454. $_->get_hostinfo() )
  455. );
  456. croak;
  457. }
  458. foreach (@failed_slaves) {
  459. next if ( $ignore_fail_check && $_->{ignore_fail} );
  460. $log->error(
  461. sprintf( " Replication on %s fails! Check server settings.",
  462. $_->get_hostinfo() )
  463. );
  464. croak;
  465. }
  466. return 0;
  467. }
  468. # Check the following
  469. # 1. All slaves are read_only (INFO)
  470. # 2. All slaves see the same master ip/port (ERROR)
  471. # 3. All slaves set relay_log_purge=0 (WARN)
  472. # 4. All slaves have same replication filter rules with a master (ERROR)
  473. # return 0: ok, others: NG
  474. sub validate_slaves($$$) {
  475. my $self = shift;
  476. my $check_repl_filter = shift;
  477. my $master = shift;
  478. my $log = $self->{logger};
  479. my @slaves = $self->get_alive_slaves();
  480. my ( $mip, $mport ) = ();
  481. my $error = 0;
  482. $log->info("Checking slave configurations..");
  483. foreach (@slaves) {
  484. if ( $_->{read_only} ne '1' ) {
  485. $log->info(
  486. sprintf( " read_only=1 is not set on slave %s.\n", $_->get_hostinfo() )
  487. );
  488. }
  489. if ( $_->{relay_purge} ne '0' && !$_->{has_gtid} ) {
  490. $log->warning(
  491. sprintf( " relay_log_purge=0 is not set on slave %s.\n",
  492. $_->get_hostinfo() )
  493. );
  494. }
  495. if ( $_->{log_bin} eq '0' ) {
  496. $log->warning(
  497. sprintf(
  498. " log-bin is not set on slave %s. This host cannot be a master.\n",
  499. $_->get_hostinfo() )
  500. );
  501. }
  502. }
  503. $error = $self->validate_repl_filter($master)
  504. if ($check_repl_filter);
  505. return $error;
  506. }
  507. sub get_alive_server_by_ipport {
  508. my $self = shift;
  509. my $ip = shift;
  510. my $port = shift;
  511. $self->get_server_by_ipport( $ip, $port, 1 );
  512. }
  513. sub get_server_by_ipport {
  514. my $self = shift;
  515. my $ip = shift;
  516. my $port = shift;
  517. my $alive_only = shift;
  518. my @servers;
  519. if ($alive_only) {
  520. @servers = $self->get_alive_servers();
  521. }
  522. else {
  523. @servers = $self->get_servers();
  524. }
  525. foreach (@servers) {
  526. if ( $_->{ip} eq $ip && $_->{port} == $port ) {
  527. return $_;
  528. }
  529. }
  530. return;
  531. }
  532. sub get_alive_server_by_hostport {
  533. my $self = shift;
  534. my $host = shift;
  535. my $port = shift;
  536. my @servers = $self->get_alive_servers();
  537. foreach (@servers) {
  538. if ( $_->{hostname} eq $host && $_->{port} == $port ) {
  539. return $_;
  540. }
  541. }
  542. return;
  543. }
  544. sub get_server_from_by_id {
  545. my $self = shift;
  546. my $servers_ref = shift;
  547. my $id = shift;
  548. my @servers = @$servers_ref;
  549. foreach (@servers) {
  550. if ( $_->{id} eq $id ) {
  551. return $_;
  552. }
  553. }
  554. return;
  555. }
  556. sub get_alive_server_by_id {
  557. my $self = shift;
  558. my $id = shift;
  559. my @alive_servers = $self->get_alive_servers();
  560. foreach (@alive_servers) {
  561. if ( $_->{id} eq $id ) {
  562. return $_;
  563. }
  564. }
  565. return;
  566. }
  567. sub get_alive_slave_by_id {
  568. my $self = shift;
  569. my $id = shift;
  570. my @alive_slaves = $self->get_alive_slaves();
  571. foreach (@alive_slaves) {
  572. if ( $_->{id} eq $id ) {
  573. return $_;
  574. }
  575. }
  576. return;
  577. }
  578. sub get_master_by_slave {
  579. my $self = shift;
  580. my $slave = shift;
  581. return $self->get_server_by_ipport( $slave->{Master_IP},
  582. $slave->{Master_Port} );
  583. }
  584. sub validate_current_master($) {
  585. my $self = shift;
  586. my $log = $self->{logger};
  587. my @alive_servers = $self->get_alive_servers();
  588. my %master_hash;
  589. my $num_slaves = 0;
  590. my $not_slave_servers = 0;
  591. foreach (@alive_servers) {
  592. if ( $_->{not_slave} eq '0' ) {
  593. $master_hash{"$_->{Master_IP}:$_->{Master_Port}"} = $_;
  594. $num_slaves++;
  595. }
  596. else {
  597. $not_slave_servers++;
  598. }
  599. }
  600. if ( $not_slave_servers >= 2 ) {
  601. $log->error(
  602. "There are $not_slave_servers non-slave servers! MHA manages at most one non-slave server. Check configurations."
  603. );
  604. croak;
  605. }
  606. if ( $num_slaves < 1 ) {
  607. $log->error(
  608. "There is not any alive slave! Check slave settings for details.");
  609. croak;
  610. }
  611. # verify masters exist in a config file
  612. my $master;
  613. foreach my $key ( keys(%master_hash) ) {
  614. my $slave = $master_hash{$key};
  615. $master = $self->get_master_by_slave($slave);
  616. unless ($master) {
  617. $log->error(
  618. sprintf(
  619. "Master %s:%d from which slave %s replicates is not defined in the configuration file!",
  620. $slave->{Master_IP}, $slave->{Master_Port},
  621. $slave->get_hostinfo()
  622. )
  623. );
  624. croak;
  625. }
  626. }
  627. my $real_master;
  628. if ( keys(%master_hash) >= 2 ) {
  629. $real_master = $self->get_primary_master( \%master_hash );
  630. }
  631. else {
  632. $real_master = $master;
  633. $self->set_orig_master($real_master);
  634. }
  635. $self->validate_master_ip_port($real_master);
  636. return $real_master;
  637. }
  638. sub validate_master_ip_port {
  639. my $self = shift;
  640. my $real_master = shift;
  641. my $log = $self->{logger};
  642. my $has_unmanaged_slaves = 0;
  643. my @alive_servers = $self->get_alive_servers();
  644. foreach my $slave (@alive_servers) {
  645. next if ( $slave->{id} eq $real_master->{id} );
  646. unless ( $self->get_alive_slave_by_id( $slave->{id} ) ) {
  647. $log->error(
  648. sprintf( "Server %s is alive, but does not work as a slave!",
  649. $slave->get_hostinfo() )
  650. );
  651. croak;
  652. }
  653. if (
  654. !(
  655. ( $slave->{Master_IP} eq $real_master->{ip} )
  656. && ( $slave->{Master_Port} == $real_master->{port} )
  657. )
  658. )
  659. {
  660. if ( $slave->{multi_tier_slave} ) {
  661. $slave->{unmanaged} = 1;
  662. $has_unmanaged_slaves = 1;
  663. }
  664. else {
  665. my $msg = sprintf(
  666. "Slave %s replicates from %s:%d, but real master is %s!",
  667. $slave->get_hostinfo(), $slave->{Master_Host},
  668. $slave->{Master_Port}, $real_master->get_hostinfo()
  669. );
  670. $log->error($msg);
  671. croak;
  672. }
  673. }
  674. }
  675. if ($has_unmanaged_slaves) {
  676. $self->init_servers();
  677. }
  678. }
  679. sub get_multi_master_print_info {
  680. my $self = shift;
  681. my $master_hash_ref = shift;
  682. my %master_hash = %$master_hash_ref;
  683. my $str = "";
  684. foreach my $key ( keys(%master_hash) ) {
  685. my $slave = $master_hash{$key};
  686. my $master = $self->get_master_by_slave($slave);
  687. $str .= "Master " . $master->get_hostinfo();
  688. $str .=
  689. ", replicating from $master->{Master_Host}($master->{Master_IP}:$master->{Master_Port})"
  690. if ( $master->{Master_Host} );
  691. $str .= ", read-only" if ( $master->{read_only} );
  692. $str .= ", dead" if ( $master->{dead} );
  693. $str .= "\n";
  694. }
  695. $str .= "\n";
  696. return $str;
  697. }
  698. sub get_primary_master {
  699. my $self = shift;
  700. my $master_hash_ref = shift;
  701. my $log = $self->{logger};
  702. my @alive_servers = $self->get_alive_servers();
  703. my %master_hash = %$master_hash_ref;
  704. my $num_real_masters = 0;
  705. my $real_master;
  706. foreach my $key ( keys(%master_hash) ) {
  707. my $slave = $master_hash{$key};
  708. my $master = $self->get_master_by_slave($slave);
  709. next if ( !$master->{dead} && $master->{read_only} );
  710. $real_master = $master;
  711. $num_real_masters++;
  712. }
  713. if ( $num_real_masters < 1 ) {
  714. $log->error(
  715. sprintf(
  716. "Multi-master configuration is detected, but all of them are read-only! Check configurations for details. Master configurations are as below: \n%s",
  717. $self->get_multi_master_print_info($master_hash_ref) )
  718. );
  719. croak;
  720. }
  721. elsif ( $num_real_masters >= 2 ) {
  722. $log->error(
  723. sprintf(
  724. "Multi-master configuration is detected, but two or more masters are either writable (read-only is not set) or dead! Check configurations for details. Master configurations are as below: \n%s",
  725. $self->get_multi_master_print_info($master_hash_ref) )
  726. );
  727. croak;
  728. }
  729. else {
  730. $self->set_orig_master($real_master);
  731. $log->info(
  732. sprintf(
  733. "Multi-master configuration is detected. Current primary(writable) master is %s",
  734. $real_master->get_hostinfo() )
  735. );
  736. $log->info(
  737. sprintf( "Master configurations are as below: \n%s",
  738. $self->get_multi_master_print_info($master_hash_ref) )
  739. );
  740. $self->init_servers();
  741. }
  742. return $real_master;
  743. }
  744. sub get_candidate_masters($) {
  745. my $self = shift;
  746. my $log = $self->{logger};
  747. my @servers = $self->get_servers();
  748. my @ret_servers = ();
  749. foreach (@servers) {
  750. next if ( $_->{dead} eq '1' );
  751. if ( $_->{candidate_master} >= 1 ) {
  752. push( @ret_servers, $_ );
  753. }
  754. }
  755. return @ret_servers;
  756. }
  757. sub print_dead_servers {
  758. my $self = shift;
  759. $self->print_servers( $self->{dead_servers} );
  760. }
  761. sub print_alive_servers {
  762. my $self = shift;
  763. my $log = $self->{logger};
  764. my @alive_servers = $self->get_alive_servers();
  765. foreach (@alive_servers) {
  766. $log->info( " " . $_->get_hostinfo() );
  767. }
  768. }
  769. sub print_alive_slaves {
  770. my $self = shift;
  771. $self->print_servers( $self->{alive_slaves} );
  772. }
  773. sub print_latest_slaves {
  774. my $self = shift;
  775. $self->print_servers( $self->{latest_slaves} );
  776. }
  777. sub print_oldest_slaves {
  778. my $self = shift;
  779. $self->print_servers( $self->{oldest_slaves} );
  780. }
  781. sub print_failed_slaves_if {
  782. my $self = shift;
  783. my $log = $self->{logger};
  784. my @failed_slaves = $self->get_failed_slaves();
  785. if ( $#failed_slaves >= 0 ) {
  786. $log->info("Failed Slaves:");
  787. $self->print_servers( $self->{failed_slaves} );
  788. }
  789. }
  790. sub print_unmanaged_slaves_if {
  791. my $self = shift;
  792. my $log = $self->{logger};
  793. my @unmanaged_slaves = $self->get_unmanaged_slaves();
  794. if ( $#unmanaged_slaves >= 0 ) {
  795. $log->info("Unmanaged Servers:");
  796. $self->print_servers( $self->{unmanaged_slaves} );
  797. }
  798. }
  799. sub print_servers {
  800. my ( $self, $servers_ref ) = @_;
  801. my @servers = @$servers_ref;
  802. foreach (@servers) {
  803. $_->print_server();
  804. }
  805. }
  806. sub disconnect_all($) {
  807. my $self = shift;
  808. my $log = $self->{logger};
  809. my @servers = $self->get_alive_servers();
  810. foreach (@servers) {
  811. $_->disconnect();
  812. }
  813. }
  814. # Check master is not reachable from all alive slaves
  815. # prerequisite: all slaves see the same master
  816. # return 0;ok 1: running
  817. sub is_master_reachable_from_slaves($$) {
  818. my $self = shift;
  819. my $slaves_ref = shift;
  820. my $log = $self->{logger};
  821. my @slaves = $self->get_alive_slaves();
  822. $log->info("Checking the current master is not reachable from all slaves..");
  823. foreach (@slaves) {
  824. my $dbhelper = $_->{dbhelper};
  825. $dbhelper->stop_io_thread();
  826. $dbhelper->start_io_thread();
  827. sleep(3);
  828. my %status = $dbhelper->check_slave_status();
  829. if ( $status{Status} ne '0' || !defined( $status{Slave_IO_Running} ) ) {
  830. $log->error(
  831. sprintf( "Got error when stopping/starting io thread on %s",
  832. $_->get_hostinfo() )
  833. );
  834. return 1;
  835. }
  836. if ( $status{Slave_IO_Running} eq "Yes" ) {
  837. $log->warning(
  838. sprintf( "Master is reachable from slave %s", $_->get_hostinfo() ) );
  839. return 1;
  840. }
  841. $dbhelper->stop_io_thread();
  842. $log->info(
  843. sprintf( " Master is not reachable from slave %s", $_->get_hostinfo() ) );
  844. }
  845. $log->info(" done.");
  846. return 0;
  847. }
  848. # checking slave status again before starting main operations.
  849. # alive slaves info was already fetched by connect_all_and_read_server_status,
  850. # so check_slave_status should not fail here. If it fails, we die here.
  851. sub read_slave_status($) {
  852. my $self = shift;
  853. my $log = $self->{logger};
  854. my @slaves = $self->get_alive_slaves();
  855. $log->debug("Fetching current slave status..");
  856. foreach (@slaves) {
  857. my $dbhelper = $_->{dbhelper};
  858. my ($sstatus) = ();
  859. my %status = $dbhelper->check_slave_status();
  860. # This should not happen so die if it happens
  861. if ( $status{Status} ) {
  862. my $msg = "Checking slave status failed.";
  863. $msg .= " err=$status{Errstr}" if ( $status{Errstr} );
  864. $log->error($msg);
  865. croak;
  866. }
  867. $_->{latest} = 0;
  868. $_->{Master_Log_File} = $status{Master_Log_File};
  869. $_->{Read_Master_Log_Pos} = $status{Read_Master_Log_Pos};
  870. $_->{Relay_Master_Log_File} = $status{Relay_Master_Log_File};
  871. $_->{Exec_Master_Log_Pos} = $status{Exec_Master_Log_Pos};
  872. $_->{Relay_Log_File} = $status{Relay_Log_File};
  873. $_->{Relay_Log_Pos} = $status{Relay_Log_Pos};
  874. $_->{Retrieved_Gtid_Set} = $status{Retrieved_Gtid_Set};
  875. $_->{Executed_Gtid_Set} = $status{Executed_Gtid_Set};
  876. }
  877. $log->debug(" Fetching current slave status done.");
  878. }
  879. sub start_sql_threads_if($) {
  880. my $self = shift;
  881. my @slaves = $self->get_alive_slaves();
  882. foreach my $slave (@slaves) {
  883. $slave->start_sql_thread_if();
  884. }
  885. }
  886. sub get_failover_advisory_locks($) {
  887. my $self = shift;
  888. my $log = $self->{logger};
  889. my @slaves = $self->get_alive_slaves();
  890. foreach my $slave (@slaves) {
  891. if ( $slave->get_failover_advisory_lock() ) {
  892. $log->error(
  893. sprintf(
  894. "Getting advisory lock failed on %s. Maybe failover script or purge_relay_logs script is running on the same slave?",
  895. $slave->get_hostinfo() )
  896. );
  897. croak;
  898. }
  899. }
  900. }
  901. sub identify_latest_slaves($$) {
  902. my $self = shift;
  903. my $find_oldest = shift;
  904. $find_oldest = 0 unless ($find_oldest);
  905. my $log = $self->{logger};
  906. my @slaves = $self->get_alive_slaves();
  907. my @latest = ();
  908. foreach (@slaves) {
  909. my $a = $latest[0]{Master_Log_File};
  910. my $b = $latest[0]{Read_Master_Log_Pos};
  911. if (
  912. !$find_oldest
  913. && (
  914. ( !$a && !defined($b) )
  915. || ( $_->{Master_Log_File} gt $latest[0]{Master_Log_File} )
  916. || ( ( $_->{Master_Log_File} ge $latest[0]{Master_Log_File} )
  917. && $_->{Read_Master_Log_Pos} > $latest[0]{Read_Master_Log_Pos} )
  918. )
  919. )
  920. {
  921. @latest = ();
  922. push( @latest, $_ );
  923. }
  924. elsif (
  925. $find_oldest
  926. && (
  927. ( !$a && !defined($b) )
  928. || ( $_->{Master_Log_File} lt $latest[0]{Master_Log_File} )
  929. || ( ( $_->{Master_Log_File} le $latest[0]{Master_Log_File} )
  930. && $_->{Read_Master_Log_Pos} < $latest[0]{Read_Master_Log_Pos} )
  931. )
  932. )
  933. {
  934. @latest = ();
  935. push( @latest, $_ );
  936. }
  937. elsif ( ( $_->{Master_Log_File} eq $latest[0]{Master_Log_File} )
  938. && ( $_->{Read_Master_Log_Pos} == $latest[0]{Read_Master_Log_Pos} ) )
  939. {
  940. push( @latest, $_ );
  941. }
  942. }
  943. foreach (@latest) {
  944. $_->{latest} = 1 if ( !$find_oldest );
  945. $_->{oldest} = 1 if ($find_oldest);
  946. }
  947. $log->info(
  948. sprintf(
  949. "The %s binary log file/position on all slaves is" . " %s:%d\n",
  950. $find_oldest ? "oldest" : "latest", $latest[0]{Master_Log_File},
  951. $latest[0]{Read_Master_Log_Pos}
  952. )
  953. );
  954. if ( $latest[0]{Retrieved_Gtid_Set} ) {
  955. $log->info(
  956. sprintf( "Retrieved Gtid Set: %s", $latest[0]{Retrieved_Gtid_Set} ) );
  957. }
  958. if ($find_oldest) {
  959. $self->set_oldest_slaves( \@latest );
  960. }
  961. else {
  962. $self->set_latest_slaves( \@latest );
  963. }
  964. }
  965. sub identify_oldest_slaves($) {
  966. my $self = shift;
  967. return $self->identify_latest_slaves(1);
  968. }
  969. # 1: higher
  970. # -1: older
  971. # 0: equal
  972. sub pos_cmp {
  973. my ( $self, $a_mlf, $a_mlp, $b_mlf, $b_mlp ) = @_;
  974. return 0 if ( $a_mlf eq $b_mlf && $a_mlp == $b_mlp );
  975. return -1 if ( $a_mlf lt $b_mlf || ( $a_mlf le $b_mlf && $a_mlp < $b_mlp ) );
  976. return 1;
  977. }
  978. sub set_no_master_if_older($$$) {
  979. my $self = shift;
  980. my $mlf = shift;
  981. my $mlp = shift;
  982. my @slaves = $self->get_alive_slaves();
  983. foreach (@slaves) {
  984. $_->{no_master} = 1
  985. if (
  986. $self->pos_cmp( $_->{Master_Log_File}, $_->{Read_Master_Log_Pos},
  987. $mlf, $mlp ) < 0
  988. );
  989. }
  990. }
  991. sub get_oldest_limit_pos($) {
  992. my $self = shift;
  993. my @slaves = $self->get_alive_slaves();
  994. my $target;
  995. foreach (@slaves) {
  996. next if ( $_->{ignore_fail} );
  997. my $a = $target->{Master_Log_File};
  998. my $b = $target->{Read_Master_Log_Pos};
  999. if (
  1000. ( !$a && !defined($b) )
  1001. || ( $_->{Master_Log_File} lt $target->{Master_Log_File} )
  1002. || ( ( $_->{Master_Log_File} le $target->{Master_Log_File} )
  1003. && $_->{Read_Master_Log_Pos} < $target->{Read_Master_Log_Pos} )
  1004. )
  1005. {
  1006. $target = $_;
  1007. }
  1008. }
  1009. return ( $target->{Master_Log_File}, $target->{Read_Master_Log_Pos} )
  1010. if ($target);
  1011. }
  1012. sub get_most_advanced_latest_slave($) {
  1013. my $self = shift;
  1014. my @latest = $self->get_latest_slaves();
  1015. my $target;
  1016. foreach my $slave (@latest) {
  1017. $target = $slave unless ($target);
  1018. if (
  1019. $slave->{Relay_Master_Log_File} gt $target->{Relay_Master_Log_File}
  1020. || ( $slave->{Relay_Master_Log_File} eq $target->{Relay_Master_Log_File}
  1021. && $slave->{Exec_Master_Log_Pos} > $target->{Exec_Master_Log_Pos} )
  1022. )
  1023. {
  1024. $target = $slave;
  1025. }
  1026. }
  1027. return $target;
  1028. }
  1029. # check slave is too behind master or not
  1030. # 0: no or acceptable delay
  1031. # 1: unacceptable delay (can not be a master)
  1032. sub check_slave_delay($$$) {
  1033. my $self = shift;
  1034. my $target = shift;
  1035. my $latest = shift;
  1036. my $log = $self->{logger};
  1037. $log->debug(
  1038. sprintf( "Checking replication delay on %s.. ", $target->get_hostinfo() ) );
  1039. if (
  1040. ( $latest->{Master_Log_File} gt $target->{Relay_Master_Log_File} )
  1041. || ( $latest->{Read_Master_Log_Pos} >
  1042. $target->{Exec_Master_Log_Pos} + 100000000 )
  1043. )
  1044. {
  1045. $log->warning(
  1046. sprintf(
  1047. " Slave %s SQL Thread delays too much. Latest log file:%s:%d, Current log file:%s:%d. This server is not selected as a new master because recovery will take long time.\n",
  1048. $target->get_hostinfo(), $latest->{Master_Log_File},
  1049. $latest->{Read_Master_Log_Pos}, $target->{Relay_Master_Log_File},
  1050. $target->{Exec_Master_Log_Pos}
  1051. )
  1052. );
  1053. return 1;
  1054. }
  1055. $log->debug(" ok.");
  1056. return 0;
  1057. }
  1058. # The following servers can not be master:
  1059. # - dead servers
  1060. # - Set no_master in conf files (i.e. DR servers)
  1061. # - log_bin is disabled
  1062. # - Major version is not the oldest
  1063. # - too much replication delay
  1064. sub get_bad_candidate_masters($$$) {
  1065. my $self = shift;
  1066. my $latest_slave = shift;
  1067. my $check_replication_delay = shift;
  1068. my $log = $self->{logger};
  1069. my @servers = $self->get_alive_slaves();
  1070. my @ret_servers = ();
  1071. foreach (@servers) {
  1072. if (
  1073. $_->{no_master} >= 1
  1074. || $_->{log_bin} eq '0'
  1075. || $_->{oldest_major_version} eq '0'
  1076. || (
  1077. $latest_slave
  1078. && ( $check_replication_delay
  1079. && $self->check_slave_delay( $_, $latest_slave ) >= 1 )
  1080. )
  1081. )
  1082. {
  1083. push( @ret_servers, $_ );
  1084. }
  1085. }
  1086. return @ret_servers;
  1087. }
  1088. sub is_target_bad_for_new_master {
  1089. my $self = shift;
  1090. my $target = shift;
  1091. my @bad = $self->get_bad_candidate_masters();
  1092. foreach (@bad) {
  1093. return 1 if ( $target->{id} eq $_->{id} );
  1094. }
  1095. return 0;
  1096. }
  1097. # Picking up new master
  1098. # If preferred node is specified, one of active preferred nodes will be new master.
  1099. # If the latest server behinds too much (i.e. stopping sql thread for online backups), we should not use it as a new master, but we should fetch relay log there. Even though preferred master is configured, it does not become a master if it's far behind.
  1100. sub select_new_master {
  1101. my $self = shift;
  1102. my $prio_new_master_host = shift;
  1103. my $prio_new_master_port = shift;
  1104. my $check_replication_delay = shift;
  1105. $check_replication_delay = 1 if ( !defined($check_replication_delay) );
  1106. my $log = $self->{logger};
  1107. my @latest = $self->get_latest_slaves();
  1108. my @slaves = $self->get_alive_slaves();
  1109. my @pref = $self->get_candidate_masters();
  1110. my @bad =
  1111. $self->get_bad_candidate_masters( $latest[0], $check_replication_delay );
  1112. if ( $prio_new_master_host && $prio_new_master_port ) {
  1113. my $new_master =
  1114. $self->get_alive_server_by_hostport( $prio_new_master_host,
  1115. $prio_new_master_port );
  1116. if ($new_master) {
  1117. my $a = $self->get_server_from_by_id( \@bad, $new_master->{id} );
  1118. unless ($a) {
  1119. $log->info("$prio_new_master_host can be new master.");
  1120. return $new_master;
  1121. }
  1122. else {
  1123. $log->error("$prio_new_master_host is bad as a new master!");
  1124. return;
  1125. }
  1126. }
  1127. else {
  1128. $log->error("$prio_new_master_host is not alive!");
  1129. return;
  1130. }
  1131. }
  1132. $log->info("Searching new master from slaves..");
  1133. $log->info(" Candidate masters from the configuration file:");
  1134. $self->print_servers( \@pref );
  1135. $log->info(" Non-candidate masters:");
  1136. $self->print_servers( \@bad );
  1137. return $latest[0]
  1138. if ( $#pref < 0 && $#bad < 0 && $latest[0]->{latest_priority} );
  1139. if ( $latest[0]->{latest_priority} ) {
  1140. $log->info(
  1141. " Searching from candidate_master slaves which have received the latest relay log events.."
  1142. ) if ( $#pref >= 0 );
  1143. foreach my $h (@latest) {
  1144. foreach my $p (@pref) {
  1145. if ( $h->{id} eq $p->{id} ) {
  1146. return $h
  1147. if ( !$self->get_server_from_by_id( \@bad, $p->{id} ) );
  1148. }
  1149. }
  1150. }
  1151. $log->info(" Not found.") if ( $#pref >= 0 );
  1152. }
  1153. #new master is not latest
  1154. $log->info(" Searching from all candidate_master slaves..")
  1155. if ( $#pref >= 0 );
  1156. foreach my $s (@slaves) {
  1157. foreach my $p (@pref) {
  1158. if ( $s->{id} eq $p->{id} ) {
  1159. my $a = $self->get_server_from_by_id( \@bad, $p->{id} );
  1160. return $s unless ($a);
  1161. }
  1162. }
  1163. }
  1164. $log->info(" Not found.") if ( $#pref >= 0 );
  1165. if ( $latest[0]->{latest_priority} ) {
  1166. $log->info(
  1167. " Searching from all slaves which have received the latest relay log events.."
  1168. );
  1169. foreach my $h (@latest) {
  1170. my $a = $self->get_server_from_by_id( \@bad, $h->{id} );
  1171. return $h unless ($a);
  1172. }
  1173. $log->info(" Not found.");
  1174. }
  1175. # none of latest servers can not be a master
  1176. $log->info(" Searching from all slaves..");
  1177. foreach my $s (@slaves) {
  1178. my $a = $self->get_server_from_by_id( \@bad, $s->{id} );
  1179. return $s unless ($a);
  1180. }
  1181. $log->info(" Not found.");
  1182. return;
  1183. }
  1184. sub get_new_master_binlog_position($$) {
  1185. my $self = shift;
  1186. my $target = shift; # master
  1187. my $dbhelper = $target->{dbhelper};
  1188. my $log = $self->{logger};
  1189. $log->info("Getting new master's binlog name and position..");
  1190. my ( $file, $pos, $a, $b, $gtid ) = $dbhelper->show_master_status();
  1191. if ( $file && defined($pos) ) {
  1192. $log->info(" $file:$pos");
  1193. if ( $self->is_gtid_auto_pos_enabled() ) {
  1194. $log->info(
  1195. sprintf(
  1196. " All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_AUTO_POSITION=1, MASTER_USER='%s', MASTER_PASSWORD='xxx';",
  1197. ( $target->{hostname} eq $target->{ip} )
  1198. ? $target->{hostname}
  1199. : ("$target->{hostname} or $target->{ip}"),
  1200. $target->{port},
  1201. $target->{repl_user}
  1202. )
  1203. );
  1204. }
  1205. else {
  1206. $log->info(
  1207. sprintf(
  1208. " All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_LOG_FILE='%s', MASTER_LOG_POS=%d, MASTER_USER='%s', MASTER_PASSWORD='xxx';",
  1209. ( $target->{hostname} eq $target->{ip} )
  1210. ? $target->{hostname}
  1211. : ("$target->{hostname} or $target->{ip}"),
  1212. $target->{port},
  1213. $file,
  1214. $pos,
  1215. $target->{repl_user}
  1216. )
  1217. );
  1218. }
  1219. }
  1220. else {
  1221. $log->error("Getting new master's binlog position failed!");
  1222. return;
  1223. }
  1224. return ( $file, $pos, $gtid );
  1225. }
  1226. sub change_master_and_start_slave {
  1227. my ( $self, $target, $master, $master_log_file, $master_log_pos, $log ) = @_;
  1228. $log = $self->{logger} unless ($log);
  1229. return if ( $target->{id} eq $master->{id} );
  1230. my $dbhelper = $target->{dbhelper};
  1231. $log->info(
  1232. sprintf(
  1233. " Resetting slave %s and starting replication from the new master %s..",
  1234. $target->get_hostinfo(),
  1235. $master->get_hostinfo()
  1236. )
  1237. );
  1238. $target->stop_slave($log) unless ( $target->{not_slave} );
  1239. $dbhelper->reset_slave() unless ( $target->{not_slave} );
  1240. my $addr =
  1241. $target->{use_ip_for_change_master}
  1242. ? $master->{ip}
  1243. : $master->{hostname};
  1244. if ( $self->is_gtid_auto_pos_enabled() && !$target->{is_mariadb} ) {
  1245. $dbhelper->change_master_gtid( $addr, $master->{port},
  1246. $master->{repl_user}, $master->{repl_password} );
  1247. }
  1248. else {
  1249. $dbhelper->change_master( $addr,
  1250. $master->{port}, $master_log_file, $master_log_pos, $master->{repl_user},
  1251. $master->{repl_password} );
  1252. }
  1253. $log->info(" Executed CHANGE MASTER.");
  1254. # After executing CHANGE MASTER, relay_log_purge is automatically disabled.
  1255. # If the original value is 0, we should turn to 0 explicitly.
  1256. if ( !$target->{has_gtid} ) {
  1257. unless ( $target->{relay_purge} ) {
  1258. $target->disable_relay_log_purge();
  1259. }
  1260. }
  1261. my $ret = $target->start_slave($log);
  1262. unless ($ret) {
  1263. $log->info(" Slave started.");
  1264. }
  1265. return $ret;
  1266. }
  1267. sub get_current_alive_master($) {
  1268. my $self = shift;
  1269. my $log = $self->{logger};
  1270. my $master = $self->get_orig_master();
  1271. unless ($master) {
  1272. $log->error(
  1273. "MySQL master is not correctly configured. Check master/slave settings");
  1274. croak;
  1275. }
  1276. my $m = $self->get_alive_server_by_id( $master->{id} );
  1277. unless ($m) {
  1278. $log->warning("MySQL master is not currently alive!");
  1279. return;
  1280. }
  1281. $log->info( sprintf( "Current Alive Master: %s", $m->get_hostinfo() ) );
  1282. return $master;
  1283. }
  1284. sub stop_io_threads {
  1285. my $self = shift;
  1286. my $log = $self->{logger};
  1287. my @alive_slaves = $self->get_alive_slaves();
  1288. foreach my $target (@alive_slaves) {
  1289. $target->stop_io_thread($target);
  1290. exit 0;
  1291. }
  1292. return 0;
  1293. }
  1294. sub check_repl_priv {
  1295. my $self = shift;
  1296. my @servers = $self->get_alive_servers();
  1297. foreach my $target (@servers) {
  1298. $target->check_repl_priv();
  1299. }
  1300. }
  1301. sub release_failover_advisory_lock {
  1302. my $self = shift;
  1303. my @servers = $self->get_alive_servers();
  1304. foreach my $target (@servers) {
  1305. $target->release_failover_advisory_lock();
  1306. }
  1307. }
  1308. sub get_current_servers_ascii {
  1309. my $self = shift;
  1310. my $orig_master = shift;
  1311. my @alive_slaves = $self->get_alive_slaves();
  1312. my $str = $orig_master->get_hostinfo() . " (current master)";
  1313. $str .= " ($orig_master->{node_label})"
  1314. if ( $orig_master->{node_label} );
  1315. $str .= "\n";
  1316. foreach my $slave (@alive_slaves) {
  1317. $str .= " +--" . $slave->get_hostinfo();
  1318. $str .= " ($slave->{node_label})" if ( $slave->{node_label} );
  1319. $str .= "\n";
  1320. }
  1321. $str .= "\n";
  1322. return $str;
  1323. }
  1324. sub print_servers_ascii {
  1325. my $self = shift;
  1326. my $orig_master = shift;
  1327. my $log = $self->{logger};
  1328. my @alive_slaves = $self->get_alive_slaves();
  1329. my $str = "\n";
  1330. $str .= $self->get_current_servers_ascii($orig_master);
  1331. $log->info($str);
  1332. }
  1333. sub print_servers_migration_ascii {
  1334. my $self = shift;
  1335. my $orig_master = shift;
  1336. my $new_master = shift;
  1337. my $orig_master_is_new_slave = shift;
  1338. my $log = $self->{logger};
  1339. my @alive_slaves = $self->get_alive_slaves();
  1340. my $str = "\n";
  1341. $str .= "From:\n";
  1342. $str .= $self->get_current_servers_ascii($orig_master);
  1343. $str .= "To:\n";
  1344. $str .= $new_master->get_hostinfo() . " (new master)";
  1345. $str .= " ($new_master->{node_label})"
  1346. if ( $new_master->{node_label} );
  1347. $str .= "\n";
  1348. foreach my $slave (@alive_slaves) {
  1349. next if ( $slave->{id} eq $new_master->{id} );
  1350. $str .= " +--" . $slave->get_hostinfo();
  1351. $str .= " ($slave->{node_label})" if ( $slave->{node_label} );
  1352. $str .= "\n";
  1353. }
  1354. if ($orig_master_is_new_slave) {
  1355. $str .= " +--" . $orig_master->get_hostinfo();
  1356. $str .= " ($orig_master->{node_label})" if ( $orig_master->{node_label} );
  1357. $str .= "\n";
  1358. }
  1359. $log->info($str);
  1360. }
  1361. # for manual failover/switch only
  1362. sub manually_decide_new_master {
  1363. my $self = shift;
  1364. my $orig_master = shift;
  1365. my $new_master = shift;
  1366. my $log = $self->{logger};
  1367. printf(
  1368. "\nStarting master switch from %s to %s? (yes/NO): ",
  1369. $orig_master->get_hostinfo(),
  1370. $new_master->get_hostinfo()
  1371. );
  1372. my $ret = <STDIN>;
  1373. chomp($ret);
  1374. if ( lc($ret) !~ /^y/ ) {
  1375. print "Continue? (yes/NO): ";
  1376. $ret = <STDIN>;
  1377. chomp($ret);
  1378. if ( lc($ret) !~ /^y/ ) {
  1379. $orig_master->{not_error} = 1;
  1380. die "Not typed yes. Stopping.";
  1381. }
  1382. print "Enter new master host name: ";
  1383. $ret = <STDIN>;
  1384. chomp($ret);
  1385. $new_master = $self->get_alive_server_by_hostport( $ret, 3306 );
  1386. if ( !$new_master ) {
  1387. die "New server not found!\n";
  1388. }
  1389. printf "Master switch to %s. OK? (yes/NO): ", $new_master->get_hostinfo();
  1390. $ret = <STDIN>;
  1391. chomp($ret);
  1392. die "Not typed yes. Stopping. \n" if ( lc($ret) !~ /^y/ );
  1393. }
  1394. return $new_master;
  1395. }
  1396. sub check_replication_health {
  1397. my $self = shift;
  1398. my $allow_delay_seconds = shift;
  1399. $allow_delay_seconds = 1 unless ($allow_delay_seconds);
  1400. my $log = $self->{logger};
  1401. my @alive_slaves = $self->get_alive_slaves();
  1402. foreach my $target (@alive_slaves) {
  1403. $log->info("Checking replication health on $target->{hostname}..");
  1404. if ( !$target->current_slave_position() ) {
  1405. $log->error("Getting slave status failed!");
  1406. croak;
  1407. }
  1408. if ( $target->has_replication_problem($allow_delay_seconds) ) {
  1409. $log->error(" failed!");
  1410. croak;
  1411. }
  1412. else {
  1413. $log->info(" ok.");
  1414. }
  1415. }
  1416. }
  1417. sub get_gtid_status($) {
  1418. my $self = shift;
  1419. my @servers = $self->get_alive_servers();
  1420. my @slaves = $self->get_alive_slaves();
  1421. return 0 if ( $#servers < 0 );
  1422. foreach (@servers) {
  1423. return 0 unless ( $_->{has_gtid} );
  1424. }
  1425. foreach (@slaves) {
  1426. return 0 unless ( $_->{Executed_Gtid_Set} );
  1427. }
  1428. foreach (@slaves) {
  1429. return 1
  1430. if ( defined( $_->{Auto_Position} )
  1431. && $_->{Auto_Position} == 1 );
  1432. return 1 if ( $_->{use_gtid_auto_pos} );
  1433. }
  1434. return 2;
  1435. }
  1436. sub is_gtid_auto_pos_enabled($) {
  1437. my $self = shift;
  1438. return 1 if ( $self->{gtid_failover_mode} == 1 );
  1439. return 0;
  1440. }
  1441. sub force_disable_log_bin_if_auto_pos_disabled($) {
  1442. my $self = shift;
  1443. my $log = $self->{logger};
  1444. if ( $self->{gtid_failover_mode} == 2 ) {
  1445. my @slaves = $self->get_alive_slaves();
  1446. $log->info("Forcing disable_log_bin since GTID auto pos is disabled");
  1447. foreach my $slave (@slaves) {
  1448. $slave->{disable_log_bin} = 1;
  1449. }
  1450. }
  1451. }
  1452. sub wait_until_in_sync($$$) {
  1453. my $self = shift;
  1454. my $waiter = shift;
  1455. my $advanced = shift;
  1456. my $log = $self->{logger};
  1457. my $ret;
  1458. my ( $file, $pos ) = $advanced->get_binlog_position();
  1459. $ret = $waiter->master_pos_wait( $file, $pos );
  1460. if ($ret) {
  1461. $log->error("Get error on waiting slave");
  1462. }
  1463. return $ret;
  1464. }
  1465. 1;