1.安装部署
安装依赖性:
[root@mysql-node10 ~]# dnf install cmake gcc-c++ openssl-devel
ncurses-devel.x86_64 libtirpc-devel-1.3.3-8.el7_4.x86_64.rpm rpcgen.x86_64
下载并解压源码包
[root@mysql-node10 ~]# tar zxf mysql-boost-5.7.44.tar.gz
[root@mysql-node10 ~]# cd /root/mysql-5.7.44
源码编译安装 mysql
[root@mysql-node10 mysql-5.7.44]# cmake \
-DCMAKE_INSTALL_PREFIX=/usr/local/mysql \ #指定安装路径
-DMYSQL_DATADIR=/data/mysql \ #指定数据目录
-DMYSQL_UNIX_ADDR=/data/mysql/mysql.sock \ #指定套接字文件
-DWITH_INNOBASE_STORAGE_ENGINE=1 \ #指定启用INNODB存储引擎,默认
用myisam
-DWITH_EXTRA_CHARSETS=all \ #扩展字符集
-DDEFAULT_CHARSET=utf8mb4 \ #指定默认字符集
-DDEFAULT_COLLATION=utf8mb4_unicode_ci \ #指定默认校验字符集
-DWITH_BOOST=/root/mysql-5.7.44/boost/boost_1_59_0/ #指定c++库依赖
[root@mysql-node10 mysql-5.7.44]# make -j2 #-j2 表示有几个
核心就跑几个进程
[root@mysql-node10 mysql-5.7.44# make install
部署 mysql
#生成数据目录
[root@mysql-node1 ~]# useradd -s /sbin/nologin -M mysql
[root@mysql-node1 ~]# mkdir -p /data/mysql
[root@mysql-node1 ~]# chown mysql.mysql /data/mysql/
#修改配置文件
[root@node10 my.cnf.d]# vim /etc/my.cnf
[mysqld]
datadir=/data/mysql #指定数据目录
socket=/data/mysql/mysql.sock #指定套接字
symbolic-links=0 #数据只能存放到数据目录中,禁止链接到数据目录
#数据库初始化建立mysql基本数据
[root@node10 ~]# mysqld --initialize --user=mysql
[root@node10 ~]# /etc/init.d/mysqld start
[root@node10 ~]# chkconfig mysqld on
#数据库安全初始化
[root@node10 ~]# mysql_secure_installation
Securing the MySQL server deployment.
2.主从复制
原理剖析
三个线程
实际上主从同步的原理就是基于 binlog 进行数据同步的。在主从复制过程中,会基于 3 个线程来操作,一个主库线程,两个从库线程。
- 二进制日志转储线程(Binlog dump thread)是一个主库线程。当从库线程连接的时候, 主库可以 将二进制日志发送给从库,当主库读取事件(Event)的时候,会在 Binlog 上加锁,读取完成之后,再将锁释放掉。
- 从库 I/O 线程会连接到主库,向主库发送请求更新 Binlog。这时从库的 I/O 线程就可以读取到主库的二进制日志转储线程发送的 Binlog 更新部分,并且拷贝到本地的中继日志(Relay log)。
- 从库 SQL 线程会读取从库中的中继日志,并且执行日志中的事件,将从库中的数据与主库保持同步。
复制三步骤
步骤 1 : Master 将写操作记录到二进制日志( binlog )。
步骤 2 : Slave 将 Master 的 binary log events 拷贝到它的中继日志( relay log );
步骤 3 : Slave 重做中继日志中的事件,将改变应用到自己的数据库中。 MySQL 复制是异步的且串行化的,而且重启后从接入点开始复制。
具体操作
1.slaves 端中设置了 master 端的 ip ,用户,日志,和日志的 Position ,通过这些信息取得 master 的认证及信息。
2.master 端在设定好 binlog 启动后会开启 binlog dump 的线程。
3.master 端的 binlog dump 把二进制的更新发送到 slave 端的。
4.slave 端开启两个线程,一个是 I/O 线程,一个是 sql 线程,i/o线程用于接收 master 端的二进制日志,此线程会在本地打开 relaylog 中继日志,并且保存到本地磁盘sql线程读取本地 relog 中继日志进行回放。
5. 什么时候我们需要多个 slave ?
当读取的而操作远远高与写操作时。我们采用一主多从架构
数据库外层接入负载均衡层并搭配高可用机制
架构缺陷
主从架构采用的是异步机制
master 更新完成后直接发送二进制日志到 slave ,但是 slaves 是否真正保存了数据 master 端不会检测 master端直接保存二进制日志到磁盘
当 master 端到 slave 端的网络出现问题时或者 master 端直接挂掉,二进制日志可能根本没有到达 slave
master 出现问题 slave 端接管 master ,这个过程中数据就丢失了
这样的问题出现就无法达到数据的强一致性,零数据丢失
配置master
[root@mysql1 mysql]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
symbolic-links=0
log-bin=mysql-bin #开启二进制日志
server-id=1[root@mysql1 mysql]# /etc/init.d/mysqld restart[root@mysql2 ~]# mysql -uroot -proot#进入数据库配置用户权限
mysql> create user 'haha'@'%' identified by 'haha';
Query OK, 0 rows affected (0.00 sec)mysql> grant replication slave on *.* to haha@'%';
Query OK, 0 rows affected (0.00 sec)mysql> show master status;
+------------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+------------------+----------+--------------+------------------+-------------------+
| mysql-bin.000001 | 1217| | | |
+------------------+----------+--------------+------------------+-------------------+
1 row in set (0.00 sec)
[root@mysql1 mysql]# cd /data/mysql/
[root@mysql1 mysql]# ls
auto.cnf ibdata1 mysql1.org.pid private_key.pem
ca-key.pem ib_logfile0 mysql-bin.000001 public_key.pem
ca.pem ib_logfile1 mysql-bin.index server-cert.pem
client-cert.pem ibtmp1 mysql.sock server-key.pem
client-key.pem mysql mysql.sock.lock sys
ib_buffer_pool mysql1.org.err performance_schema[root@mysql1 mysql]# mysqlbinlog mysql-bin.000001 #查看二进制日志文件
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
DELIMITER /*!*/;
# at 4
#240902 10:59:13 server id 1 end_log_pos 123 CRC32 0xb1754848 Start: binlog v 4, server v 5.7.44-log created 240902 10:59:13 at startup
# Warning: this binlog is either in use or was not closed properly.
ROLLBACK/*!*/;
BINLOG '
ASrVZg8BAAAAdwAAAHsAAAABAAQANS43LjQ0LWxvZwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAABKtVmEzgNAAgAEgAEBAQEEgAAXwAEGggAAAAICAgCAAAACgoKKioAEjQA
AUhIdbE=
'/*!*/;
# at 123
#240902 10:59:13 server id 1 end_log_pos 154 CRC32 0xa95637df Previous-GTIDs
# [empty]
# at 154
#240902 11:11:29 server id 1 end_log_pos 219 CRC32 0xd537d8cc Anonymous_GTID last_committed=0 sequence_number=1 rbr_only=no
SET @@SESSION.GTID_NEXT= 'ANONYMOUS'/*!*/;
# at 219
#240902 11:11:29 server id 1 end_log_pos 399 CRC32 0xad3281e4 Query thread_id=2 exec_time=0 error_code=0
SET TIMESTAMP=1725246689/*!*/;
SET @@session.pseudo_thread_id=2/*!*/;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
SET @@session.sql_mode=1436549152/*!*/;
SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
/*!\C utf8 *//*!*/;
SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=224/*!*/;
SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
CREATE USER 'haha'@'%' IDENTIFIED WITH 'mysql_native_password' AS '*85D0F19E5598AC04AC7B3FCF5383247D28FB59EF'
/*!*/;
# at 399
#240902 11:12:30 server id 1 end_log_pos 464 CRC32 0xfd8c223e Anonymous_GTID last_committed=1 sequence_number=2 rbr_only=no
SET @@SESSION.GTID_NEXT= 'ANONYMOUS'/*!*/;
# at 464
#240902 11:12:30 server id 1 end_log_pos 595 CRC32 0xa59e0988 Query thread_id=2 exec_time=0 error_code=0
SET TIMESTAMP=1725246750/*!*/;
GRANT REPLICATION SLAVE ON *.* TO 'haha'@'%'
/*!*/;
SET @@SESSION.GTID_NEXT= 'AUTOMATIC' /* added by mysqlbinlog */ /*!*/;
DELIMITER ;
# End of log file
/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
配置slave
[root@mysql2 ~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
symbolic-links=0
server-id=2[root@mysql2 ~]# /etc/init.d/mysqld restart
[root@mysql2 ~]# mysql -uroot -proot
mysql> change master to
master_host='172.25.254.130',master_user='haha',master_password='haha',master_log_file='mysql-bin.000001',master_log_pos=1217;
Query OK, 0 rows affected, 2 warnings (0.00 sec)mysql> show slave status\G
*************************** 1. row ***************************Slave_IO_State: Waiting for master to send eventMaster_Host: 172.25.254.130Master_User: hahaMaster_Port: 3306Connect_Retry: 60Master_Log_File: mysql-bin.000001Read_Master_Log_Pos: 1217Relay_Log_File: mysql2-relay-bin.000003Relay_Log_Pos: 320Relay_Master_Log_File: mysql-bin.000001Slave_IO_Running: Yes #传输是否成功Slave_SQL_Running: Yes #回放是否成功Replicate_Do_DB:Replicate_Ignore_DB:Replicate_Do_Table:Replicate_Ignore_Table:Replicate_Wild_Do_Table:Replicate_Wild_Ignore_Table:Last_Errno: 0Last_Error:Skip_Counter: 0Exec_Master_Log_Pos: 1217Relay_Log_Space: 528Until_Condition: NoneUntil_Log_File:Until_Log_Pos: 0Master_SSL_Allowed: NoMaster_SSL_CA_File:Master_SSL_CA_Path:Master_SSL_Cert:Master_SSL_Cipher:Master_SSL_Key:Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: NoLast_IO_Errno: 0Last_IO_Error:Last_SQL_Errno: 0Last_SQL_Error:Replicate_Ignore_Server_Ids:Master_Server_Id: 1Master_UUID: 84bc7655-68c7-11ef-aa09-000c29d16b0bMaster_Info_File: /data/mysql/master.infoSQL_Delay: 0SQL_Remaining_Delay: NULLSlave_SQL_Running_State: Slave has read all relay log; waiting for more updatesMaster_Retry_Count: 86400Master_Bind:Last_IO_Error_Timestamp:Last_SQL_Error_Timestamp:Master_SSL_Crl:Master_SSL_Crlpath:Retrieved_Gtid_Set:Executed_Gtid_Set:Auto_Position: 0Replicate_Rewrite_DB:Channel_Name:Master_TLS_Version:
1 row in set (0.00 sec)
测试
#master上配置
mysql> create database haha;
Query OK, 1 row affected (0.00 sec)mysql> use haha;
Database changed
mysql> create table student (id int not null,name varchar(10));
Query OK, 0 rows affected (0.00 sec)mysql> insert into student values(1,'zhangsan');
Query OK, 1 row affected (0.00 sec)mysql> insert into student values(2,'lisi');
Query OK, 1 row affected (0.00 sec)mysql> insert into student values(3,'wangwu');
Query OK, 1 row affected (0.00 sec)mysql> select * from student;
+----+----------+
| id | name |
+----+----------+
| 1 | zhangsan |
| 2 | lisi |
| 3 | wangwu |
+----+----------+
3 rows in set (0.00 sec)#slave上查看
mysql> select * from student;
+----+----------+
| id | name |
+----+----------+
| 1 | zhangsan |
| 2 | lisi |
| 3 | wangwu |
+----+----------+
3 rows in set (0.00 sec)
延迟复制
- 延迟复制时用来控制 sql 线程的,和 i/o 线程无关
- 这个延迟复制不是i/o 线程过段时间来复制, i/o 是正常工作的
- 是日志已经保存在slave 端了,那个 sql 要等多久进行回放
mysql> stop slave sql_thread;
Query OK, 0 rows affected (0.00 sec)mysql> change master to master_delay=60;
Query OK, 0 rows affected (0.00 sec)mysql> start slave sql_thread;mysql> show slave status\G
@@@@@@@@@@@@@@@@@@省略Master_UUID: 84bc7655-68c7-11ef-aa09-000c29d16b0bMaster_Info_File: /data/mysql/master.infoSQL_Delay: 60SQL_Remaining_Delay: NULLSlave_SQL_Running_State:Master_Retry_Count: 86400@@@@@@@@@@@@@@@@@@省略
测试:在master 中写入数据后过了延迟时间才能被查询到
慢日志查询
- 慢查询,顾名思义,执行很慢的查询
- 当执行SQL超过long_query_time参数设定的时间阈值(默认10s)时,就被认为是慢查询,这个SQL语句就是需要优化的
- 慢查询被记录在慢查询日志里
- 慢查询日志默认是不开启的
- 如果需要优化SQL语句,就可以开启这个功能,它可以让你很容易地知道哪些语句是需要优化的。
mysql> show variables like 'slow%';
+---------------------+-----------------------------+
| Variable_name | Value |
+---------------------+-----------------------------+
| slow_launch_time | 2 |
| slow_query_log | OFF |
| slow_query_log_file | /data/mysql/mysql1-slow.log |
+---------------------+-----------------------------+
3 rows in set (0.00 sec)mysql> show variables like 'long%';
+-----------------+-----------+
| Variable_name | Value |
+-----------------+-----------+
| long_query_time | 10.000000 |
+-----------------+-----------+
1 row in set (0.01 sec)mysql> set global slow_query_log=on;
Query OK, 0 rows affected (0.00 sec)mysql> set long_query_time=4;
Query OK, 0 rows affected (0.00 sec)mysql> show variables like 'long%';
+-----------------+----------+
| Variable_name | Value |
+-----------------+----------+
| long_query_time | 4.000000 |
+-----------------+----------+
1 row in set (0.00 sec)mysql> show variables like 'slow%';
+---------------------+-----------------------------+
| Variable_name | Value |
+---------------------+-----------------------------+
| slow_launch_time | 2 |
| slow_query_log | ON |
| slow_query_log_file | /data/mysql/mysql1-slow.log |
+---------------------+-----------------------------+
3 rows in set (0.00 sec)mysql> \q
Bye#日志生成文件
[root@mysql1 mysql]# cat /data/mysql/mysql1-slow.log
/usr/local/mysql/bin/mysqld, Version: 5.7.44-log (Source distribution). started with:
Tcp port: 3306 Unix socket: /data/mysql/mysql.sock
Time Id Command Argument
测试
mysql> select sleep(10);
+-----------+
| sleep(10) |
+-----------+
| 0 |
+-----------+
1 row in set (10.00 sec)mysql> \q
Bye
[root@mysql1 mysql]# cat /data/mysql/mysql1-slow.log
/usr/local/mysql/bin/mysqld, Version: 5.7.44-log (Source distribution). started with:
Tcp port: 3306 Unix socket: /data/mysql/mysql.sock
Time Id Command Argument
# Time: 2024-09-02T07:54:20.121606Z
# User@Host: root[root] @ localhost [] Id: 8
# Query_time: 10.000301 Lock_time: 0.000000 Rows_sent: 1 Rows_examined: 0
SET timestamp=1725263660;
select sleep(10);
mysql的并行复制
查看 slave 中的线程信息
mysql> show processlist;
+----+-------------+-----------+------+---------+------+--------------------------------------------------------+------------------+
| Id | User | Host | db | Command | Time | State | Info |
+----+-------------+-----------+------+---------+------+--------------------------------------------------------+------------------+
| 5 | root | localhost | haha | Query | 0 | starting | show processlist |
| 16 | system user | | NULL | Connect | 3207 | Waiting for master to send event | NULL |
| 17 | system user | | NULL | Connect | 3166 | Slave has read all relay log; waiting for more updates | NULL |
+----+-------------+-----------+------+---------+------+--------------------------------------------------------+------------------+
3 rows in set (0.00 sec)
默认情况下 slave 中使用的是 sql 单线程回放
在 master 中时多用户读写,如果使用 sql 单线程回放那么会造成组从延迟严重
开启 MySQL 的多线程回放可以解决上述问题
[root@mysql2 mysql]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
symbolic-links=0
server-id=2gtid_mode=ON
enforce-gtid-consistency=ON
slave-parallel-type=LOGICAL_CLOCK #基于组提交
slave-parallel-workers=16 #开启线程数量
master_info_repository=TABLE #master信息在表中记录,默认记录在/data/mysql/master.info
relay_log_info_repository=TABLE #回放日志信息在表中记录,默认记录在/data/mysql/relay-log.info
relay_log_recovery=ON #日志回放恢复功能开启[root@mysql2 mysql]# /etc/init.d/mysqld restartmysql> show processlist;
+----+-------------+-----------+------+---------+------+--------------------------------------------------------+------------------+
| Id | User | Host | db | Command | Time | State | Info |
+----+-------------+-----------+------+---------+------+--------------------------------------------------------+------------------+
| 2 | system user | | NULL | Connect | 19 | Slave has read all relay log; waiting for more updates | NULL |
| 3 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 4 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 5 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 6 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 8 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 9 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 10 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 11 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 12 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 13 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 14 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 15 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 16 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 17 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 18 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 19 | system user | | NULL | Connect | 19 | Waiting for an event from Coordinator | NULL |
| 20 | root | localhost | NULL | Query | 0 | starting | show processlist |
+----+-------------+-----------+------+---------+------+--------------------------------------------------------+------------------+
18 rows in set (0.00 sec)
3.半同步模式
半同步模式原理
1. 用户线程写入完成后 master 中的 dump 会把日志推送到 slave 端2.slave 中的 io 线程接收后保存到 relaylog 中继日志3. 保存完成后 slave 向 master 端返回 ack4. 在未接受到 slave 的 ack 时 master 端时不做提交的,一直处于等待当收到 ack 后提交到存储引擎5. 在 5.6 版本中用到的时 after_commit 模式, after_commit 模式时先提交在等待 ack 返回后输出 ok
gtid模式
当未启用 gtid 时我们要考虑的问题
在 master 端的写入时多用户读写,在 slave 端的复制时单线程日志回放,所以 slave 端一定会延迟与
master 端
这种延迟在 slave 端的延迟可能会不一致,当 master 挂掉后 slave 接管,一般会挑选一个和 master 延迟日志最接近的充当新的master
那么为接管 master 的主机继续充当 slave 角色并会指向到新的 master 上,作为其 slave
这时候按照之前 的配置我们需要知道新的 master 上的 pos 的 id ,但是我们无法确定新的 master 和 slave 之 间差多少
当激活 GITD 之后
当 master 出现问题后, slave2 和 master 的数据最接近,会被作为新的 master
slave1 指向新的 master ,但是他不会去检测新的 master 的 pos id ,只需要继续读取自己 gtid_next 即可
设置 gtid
#在master端开启gtid模式
[root@mysql1 ~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
log-bin=mysql-bin
server-id=1
gtid_mode=ON
enforce-gtid-consistency=ON
symbolic-links=0mysql> show master status;
+------------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+------------------+----------+--------------+------------------+-------------------+
| mysql-bin.000004 | 154 | | | |
+------------------+----------+--------------+------------------+-------------------+
1 row in set (0.00 sec)#在slave端开启gtid模式
[root@mysql2 ~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server-id=2
log-bin=mysql-bin
gtid_mode=ON
enforce-gtid-consistency=ON
symbolic-links=0mysql> stop slave;
Query OK, 0 rows affected (0.00 sec)mysql> change master to master_host='172.25.254.130',master_user='haha',master_password='haha',master_auto_position=1;
Query OK, 0 rows affected, 2 warnings (0.00 sec)mysql> start slave;
Query OK, 0 rows affected (0.01 sec)mysql> show slave status\G
*************************** 1. row ***************************Slave_IO_State: Waiting for master to send eventMaster_Host: 172.25.254.130Master_User: hahaMaster_Port: 3306Connect_Retry: 60Master_Log_File: mysql-bin.000004Read_Master_Log_Pos: 154Relay_Log_File: mysql2-relay-bin.000002Relay_Log_Pos: 367Relay_Master_Log_File: mysql-bin.000004Slave_IO_Running: YesSlave_SQL_Running: YesReplicate_Do_DB:Replicate_Ignore_DB:Replicate_Do_Table:Replicate_Ignore_Table:Replicate_Wild_Do_Table:Replicate_Wild_Ignore_Table:Last_Errno: 0Last_Error:Skip_Counter: 0Exec_Master_Log_Pos: 154Relay_Log_Space: 575Until_Condition: NoneUntil_Log_File:Until_Log_Pos: 0Master_SSL_Allowed: NoMaster_SSL_CA_File:Master_SSL_CA_Path:Master_SSL_Cert:Master_SSL_Cipher:Master_SSL_Key:Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: NoLast_IO_Errno: 0Last_IO_Error:Last_SQL_Errno: 0Last_SQL_Error:Replicate_Ignore_Server_Ids:Master_Server_Id: 1Master_UUID: 84bc7655-68c7-11ef-aa09-000c29d16b0bMaster_Info_File: /data/mysql/master.infoSQL_Delay: 60SQL_Remaining_Delay: NULLSlave_SQL_Running_State: Slave has read all relay log; waiting for more updatesMaster_Retry_Count: 86400Master_Bind:Last_IO_Error_Timestamp:Last_SQL_Error_Timestamp:Master_SSL_Crl:Master_SSL_Crlpath:Retrieved_Gtid_Set:Executed_Gtid_Set:Auto_Position: 1 #功能开启Replicate_Rewrite_DB:Channel_Name:Master_TLS_Version:
1 row in set (0.00 sec)[root@mysql3 ~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server-id=3
log-bin=mysql-bin
gtid_mode=ON
enforce-gtid-consistency=ON
symbolic-links=0mysql> stop slave;
Query OK, 0 rows affected, 1 warning (0.00 sec)mysql> change master to master_host='172.25.254.130',master_user='haha',master_password='haha',master_auto_position=1;
Query OK, 0 rows affected, 2 warnings (0.01 sec)mysql> start slave;
Query OK, 0 rows affected (0.00 sec)mysql> show slave status\G
*************************** 1. row ***************************Slave_IO_State: Waiting for master to send eventMaster_Host: 172.25.254.130Master_User: hahaMaster_Port: 3306Connect_Retry: 60Master_Log_File: mysql-bin.000004Read_Master_Log_Pos: 154Relay_Log_File: mysql3-relay-bin.000002Relay_Log_Pos: 367Relay_Master_Log_File: mysql-bin.000004Slave_IO_Running: YesSlave_SQL_Running: YesReplicate_Do_DB:Replicate_Ignore_DB:Replicate_Do_Table:Replicate_Ignore_Table:Replicate_Wild_Do_Table:Replicate_Wild_Ignore_Table:Last_Errno: 0Last_Error:Skip_Counter: 0Exec_Master_Log_Pos: 154Relay_Log_Space: 575Until_Condition: NoneUntil_Log_File:Until_Log_Pos: 0Master_SSL_Allowed: NoMaster_SSL_CA_File:Master_SSL_CA_Path:Master_SSL_Cert:Master_SSL_Cipher:Master_SSL_Key:Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: NoLast_IO_Errno: 0Last_IO_Error:Last_SQL_Errno: 0Last_SQL_Error:Replicate_Ignore_Server_Ids:Master_Server_Id: 1Master_UUID: 84bc7655-68c7-11ef-aa09-000c29d16b0bMaster_Info_File: /data/mysql/master.infoSQL_Delay: 0SQL_Remaining_Delay: NULLSlave_SQL_Running_State: Slave has read all relay log; waiting for more updatesMaster_Retry_Count: 86400Master_Bind:Last_IO_Error_Timestamp:Last_SQL_Error_Timestamp:Master_SSL_Crl:Master_SSL_Crlpath:Retrieved_Gtid_Set:Executed_Gtid_Set:Auto_Position: 1 #功能开启Replicate_Rewrite_DB:Channel_Name:Master_TLS_Version:
1 row in set (0.00 sec)
启用半同步模式
在master端配置启用半同步模式
mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so'; #安装半同步插件
Query OK, 0 rows affected (0.00 sec)mysql> select plugin_name,plugin_status from information_schema.plugins where plugin_name like '%semi%';
+----------------------+---------------+
| plugin_name | plugin_status |
+----------------------+---------------+
| rpl_semi_sync_master | ACTIVE |
+----------------------+---------------+
1 row in set (0.00 sec)mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name | Value |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled | OFF |
| rpl_semi_sync_master_timeout | 10000 |
| rpl_semi_sync_master_trace_level | 32 |
| rpl_semi_sync_master_wait_for_slave_count | 1 |
| rpl_semi_sync_master_wait_no_slave | ON |
| rpl_semi_sync_master_wait_point | AFTER_SYNC |
+-------------------------------------------+------------+
6 rows in set (0.00 sec)mysql> show status like 'rpl_semi_sync%';
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | OFF |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
+--------------------------------------------+-------+
14 rows in set (0.00 sec)mysql> set global rpl_semi_sync_master_enabled=1; #打开半同步功能
Query OK, 0 rows affected (0.00 sec)mysql> select plugin_name,plugin_status from information_schema.plugins where plugin_name like '%semi%';
+----------------------+---------------+
| plugin_name | plugin_status |
+----------------------+---------------+
| rpl_semi_sync_master | ACTIVE |
+----------------------+---------------+
1 row in set (0.00 sec)mysql> show variables like 'rpl_semi_sync%'; #查看半同步功能状态
+-------------------------------------------+------------+
| Variable_name | Value |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled | ON |
| rpl_semi_sync_master_timeout | 10000 |
| rpl_semi_sync_master_trace_level | 32 |
| rpl_semi_sync_master_wait_for_slave_count | 1 |
| rpl_semi_sync_master_wait_no_slave | ON |
| rpl_semi_sync_master_wait_point | AFTER_SYNC |
+-------------------------------------------+------------+
6 rows in set (0.00 sec)mysql> show status like 'rpl_semi_sync%';
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
+--------------------------------------------+-------+
14 rows in set (0.00 sec)mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name | Value |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled | ON |
| rpl_semi_sync_master_timeout | 10000 |
| rpl_semi_sync_master_trace_level | 32 |
| rpl_semi_sync_master_wait_for_slave_count | 1 |
| rpl_semi_sync_master_wait_no_slave | ON |
| rpl_semi_sync_master_wait_point | AFTER_SYNC |
+-------------------------------------------+------------+
6 rows in set (0.01 sec)mysql> show status like 'rpl_semi_sync%';
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
+--------------------------------------------+-------+
14 rows in set (0.00 sec)[root@mysql1 ~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
log-bin=mysql-bin
server-id=1
gtid_mode=ON
enforce-gtid-consistency=ON
rpl_semi_sync_master_enabled=1 #开启1半同步功能
symbolic-links=0[root@mysql1 ~]# /etc/init.d/mysqld restart
在slave端配置启用半同步模式
mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so';
Query OK, 0 rows affected (0.01 sec)mysql> select plugin_name,plugin_status from information_schema.plugins where plugin_name like '%semi%';
+----------------------+---------------+
| plugin_name | plugin_status |
+----------------------+---------------+
| rpl_semi_sync_master | ACTIVE |
+----------------------+---------------+
1 row in set (0.00 sec)mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name | Value |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled | OFF |
| rpl_semi_sync_master_timeout | 10000 |
| rpl_semi_sync_master_trace_level | 32 |
| rpl_semi_sync_master_wait_for_slave_count | 1 |
| rpl_semi_sync_master_wait_no_slave | ON |
| rpl_semi_sync_master_wait_point | AFTER_SYNC |
+-------------------------------------------+------------+
6 rows in set (0.00 sec)mysql> show status like 'rpl_semi_sync%';
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | OFF |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
+--------------------------------------------+-------+
14 rows in set (0.00 sec)mysql> set global rpl_semi_sync_master_enabled=1;
Query OK, 0 rows affected (0.00 sec)mysql> stop slave io_thread; #重启io线程,半同步才能生效
Query OK, 0 rows affected (0.00 sec)
mysql> start slave io_thread; ##重启io线程,半同步才能生效
Query OK, 0 rows affected (0.00 sec)mysql> select plugin_name,plugin_status from information_schema.plugins where plugin_name like '%semi%';
+----------------------+---------------+
| plugin_name | plugin_status |
+----------------------+---------------+
| rpl_semi_sync_master | ACTIVE |
+----------------------+---------------+
1 row in set (0.00 sec)mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name | Value |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled | ON |
| rpl_semi_sync_master_timeout | 10000 |
| rpl_semi_sync_master_trace_level | 32 |
| rpl_semi_sync_master_wait_for_slave_count | 1 |
| rpl_semi_sync_master_wait_no_slave | ON |
| rpl_semi_sync_master_wait_point | AFTER_SYNC |
+-------------------------------------------+------------+
6 rows in set (0.00 sec)mysql> show status like 'rpl_semi_sync%';
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
+--------------------------------------------+-------+
14 rows in set (0.00 sec)[root@mysql2 ~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server-id=2
log-bin=mysql-bin
gtid_mode=ON
enforce-gtid-consistency=ON
rpl_semi_sync_master_enabled=1
symbolic-links=0[root@mysql2 ~]# /etc/init.d/mysqld restart#mysql3一模一样的配置
4.mysql高可用之组复制 (MGR)
MySQL Group Replication( 简称 MGR ) 是 MySQL 官方于 2016 年 12 月推出的一个全新的高可用与高扩 展的解决方案
组复制是 MySQL 5.7.17 版本出现的新特性,它提供了高可用、高扩展、高可靠的 MySQL 集群服务
MySQL 组复制分单主模式和多主模式,传统的 mysql 复制技术仅解决了数据同步的问题
MGR 对属于同一组的服务器自动进行协调。对于要提交的事务,组成员必须就全局事务序列中给定事务的顺序达成一致
提交或回滚事务由每个服务器单独完成,但所有服务器都必须做出相同的决定
如果存在网络分区,导致成员无法达成事先定义的分割策略,则在解决此问题之前系统不会继续进行,
这是一种内置的自动裂脑保护机制
MGR 由组通信系统 ( Group Communication System , GCS ) 协议支持,该系统提供故障检测机制、组成员服务以及安全且有序的消息传递
组复制流程
首先我们将多个节点共同组成一个复制组,在执行读写( RW )事务的时候,需要通过一致性协议层(Consensus 层)的同意,也就是读写事务想要进行提交,必须要经过组里 “ 大多数人 ” (对应 Node 节点)的同意,大多数指的是同意的节点数量需要大于(N/2+1) ,这样才可以进行提交,而不是原发起方一个说了算。而针对只读(RO )事务则不需要经过组内同意,直接提交即可。
注意 :节点数量不能超过9台
组复制单主和多主模式
single-primary mode( 单写或单主模式 )
单写模式 group 内只有一台节点可写可读,其他节点只可以读。当主服务器失败时,会自动选择新的主服务器
multi-primary mode( 多写或多主模式 )
组内的所有机器都是 primary 节点,同时可以进行读写操作,并且数据是最终一致的。
实现mysql组复制
为了避免出错,在所有节点中从新生成数据库数据
master端配置
[root@mysql1 ~]#rm -rf /data/mysql/*[root@mysql1 ~]# vim /etc/my.cnf[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
symbolic-links=0
server-id=1
disabled_storage_engines="MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,MEMORY"
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
binlog_checksum=NONE
log_slave_updates=ON
log_bin=binlog
binlog_format=ROW
plugin_load_add='group_replication.so'
transaction_write_set_extraction=XXHASH64
group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
group_replication_start_on_boot=off
group_replication_local_address="172.25.254.130:33061"
group_replication_group_seeds="172.25.254.130:33061,172.25.254.150:33061,172.25.254.160:33061"
group_replication_ip_whitelist="172.25.254.0/24,127.0.0.1/8"
group_replication_bootstrap_group=off
group_replication_single_primary_mode=OFF
group_replication_enforce_update_everywhere_checks=ON
group_replication_allow_local_disjoint_gtids_join=1[root@mysql1 ~]# /etc/init.d/mysqld start
[root@mysql1 ~]# mysql -urootmysql> alter user root@localhost identified by 'root';
Query OK, 0 rows affected (0.00 sec)mysql> set sql_log_bin=0;
Query OK, 0 rows affected (0.00 sec)mysql> create user hehe@'%' identified by 'hehe';
Query OK, 0 rows affected (0.00 sec)mysql> grant replication slave on *.* to hehe@'%';
Query OK, 0 rows affected (0.00 sec)mysql> flush privileges;
Query OK, 0 rows affected (0.01 sec)mysql> set sql_log_bin=1;
Query OK, 0 rows affected (0.00 sec)mysql> change master to master_user='hehe',master_password='hehe' for channel 'group_replication_recovery';
Query OK, 0 rows affected, 2 warnings (0.00 sec)mysql> set global group_replication_bootstrap_group=ON;
Query OK, 0 rows affected (0.00 sec)mysql> start group_replication;
Query OK, 0 rows affected, 1 warning (6.60 sec)mysql> set global group_replication_bootstrap_group=OFF;
Query OK, 0 rows affected (0.00 sec)mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-------------+-------------+--------------+
| CHANNEL_NAME | MEMBER_ID | MEMBER_HOST | MEMBER_PORT | MEMBER_STATE |
+---------------------------+--------------------------------------+-------------+-------------+--------------+
| group_replication_applier | 986d5406-69cd-11ef-a05f-000c29d16b0b | mysql1.org | 3306 | ONLINE |
+---------------------------+--------------------------------------+-------------+-------------+--------------+
1 row in set (0.01 sec)[root@mysql1 ~]# scp /etc/my.cnf root@172.25.254.150:/etc/my.cnf
[root@mysql1 ~]# scp /etc/my.cnf root@172.25.254.160:/etc/my.cnf
在slave端配置
[root@mysql2 ~]# rm -rf /data/mysql/*[root@mysql2 ~]# vim /etc/my.cnf
datadir=/data/mysql
socket=/data/mysql/mysql.sock
symbolic-links=0
server-id=2
disabled_storage_engines="MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,MEMORY"
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
binlog_checksum=NONE
log_slave_updates=ON
log_bin=binlog
binlog_format=ROW
plugin_load_add='group_replication.so'
transaction_write_set_extraction=XXHASH64
group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
group_replication_start_on_boot=off
group_replication_local_address="172.25.254.150:33061"
group_replication_group_seeds="172.25.254.130:33061,172.25.254.150:33061,172.25.254.160:33061"
group_replication_ip_whitelist="172.25.254.0/24,127.0.0.1/8"
group_replication_bootstrap_group=off
group_replication_single_primary_mode=OFF
group_replication_enforce_update_everywhere_checks=ON
group_replication_allow_local_disjoint_gtids_join=1[root@mysql1 ~]# mysqld --user=mysql --initialize
[root@mysql1 ~]# /etc/init.d/mysqld start
[root@mysql1 ~]# mysql -urootmysql> alter user root@localhost identified by 'root';
Query OK, 0 rows affected (0.00 sec)mysql> set sql_log_bin=0;
Query OK, 0 rows affected (0.00 sec)mysql> create user hehe@'%' identified by 'hehe';
Query OK, 0 rows affected (0.00 sec)mysql> grant replication slave on *.* to hehe@'%';
Query OK, 0 rows affected (0.00 sec)mysql> flush privileges;
Query OK, 0 rows affected (0.01 sec)mysql> set sql_log_bin=1;
Query OK, 0 rows affected (0.00 sec)mysql> change master to master_user='hehe',master_password='hehe' for channel 'group_replication_recovery';
Query OK, 0 rows affected, 2 warnings (0.00 sec)mysql> start group_replication;
Query OK, 0 rows affected, 1 warning (6.60 sec)mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-------------+-------------+--------------+
| CHANNEL_NAME | MEMBER_ID | MEMBER_HOST | MEMBER_PORT | MEMBER_STATE |
+---------------------------+--------------------------------------+-------------+-------------+--------------+
| group_replication_applier | 5bc11f89-69e6-11ef-b7cd-000c297763e2 | mysql2.org | 3306 | ONLINE |
| group_replication_applier | 986d5406-69cd-11ef-a05f-000c29d16b0b | mysql1.org | 3306 | ONLINE |
| group_replication_applier | d52163db-69e7-11ef-bb44-000c294d5dec | mysql3.org | 3306 | ONLINE |
+---------------------------+--------------------------------------+-------------+-------------+--------------+
3 rows in set (0.00 sec)
测试
在每个节点都可以完成读写,其他设备上均可同步(省略)
5.mysql-router(mysql路由)
MySQL Router
- 是一个对应用程序透明的InnoDB Cluster连接路由服务,提供负载均衡、应用连接故障转移和客户端路由。
- 利用路由器的连接路由特性,用户可以编写应用程序来连接到路由器,并令路由器使用相应的路由策略来处理连接,使其连接到正确的MySQL数据库服务器
Mysql route 的部署方式
我们需要在所有的数据库主机之外再开一台主机 mysql-router
##安装mysql-router
[root@mysql1 ~]# rpm -ivh mysql-router-community-8.4.0-1.el7.x86_64.rpm
#配置mysql-router
[root@mysql1 ~]# vim /etc/mysqlrouter/mysqlrouter.conf
[routing:ro]
bind_address = 0.0.0.0
bind_port = 7001
destinations =172.25.254.150:3306,172.25.254.160:3306
routing_strategy = round-robin[root@mysql1 ~]# systemctl start mysqlrouter.service
测试:
#端口已开启
[root@mysql1 ~]# netstat -antlupe | grep 7001
tcp 0 0 0.0.0.0:7001 0.0.0.0:* LISTEN 997 24641 2136/mysqlrouter
tcp 0 0 172.25.254.130:52704 172.25.254.130:7001 TIME_WAIT 0 0 -#注意:测试之前,确保mysql-router上的mysql关闭,其余两台mysql服务器开启,并且两台mysql服务器上有允许远程登录的用户[root@mysql1 ~]# mysql -uhehe -phehe -h 172.25.254.130 -P 7001
mysql: [Warning] Using a password on the command line interface can be insecure.
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.7.44-log Source distributionCopyright (c) 2000, 2023, Oracle and/or its affiliates.Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.mysql> select @@server_id;
+-------------+
| @@server_id |
+-------------+
| 3 |
+-------------+
1 row in set (0.00 sec)mysql> \q
Bye
[root@mysql1 ~]# mysql -uhehe -phehe -h 172.25.254.130 -P 7001
mysql: [Warning] Using a password on the command line interface can be insecure.
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 5.7.44-log Source distributionCopyright (c) 2000, 2023, Oracle and/or its affiliates.Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.mysql> select @@server_id;
+-------------+
| @@server_id |
+-------------+
| 2 |
+-------------+
1 row in set (0.01 sec)
6.mysql高可用之MHA
为什么要用 MHA ?
Master 的单点故障问题
什么是 MHA ?
- MHA(Master High Availability)是一套优秀的MySQL高可用环境下故障切换和主从复制的软件。
- MHA 的出现就是解决MySQL 单点的问题。
- MySQL故障切换过程中,MHA能做到0-30秒内自动完成故障切换操作。
- MHA能在故障切换的过程中最大程度上保证数据的一致性,以达到真正意义上的高可用。
MHA 的组成
- MHA由两部分组成:MHAManager (管理节点) MHA Node (数据库节点),
- MHA Manager 可以单独部署在一台独立的机器上管理多个master-slave集群,也可以部署在一台slave 节点上。
- MHA Manager 会定时探测集群中的 master 节点。
- 当 master 出现故障时,它可以自动将最新数据的 slave 提升为新的 master, 然后将所有其他的slave 重新指向新的 master。
MHA 的特点
- 自动故障切换过程中,MHA从宕机的主服务器上保存二进制日志,最大程度的保证数据不丢失
- 使用半同步复制,可以大大降低数据丢失的风险,如果只有一个slave已经收到了最新的二进制日志,MHA可以将最新的二进制日志应用于其他所有的slave服务器上,因此可以保证所有节点的数据一致性
- 目前MHA支持一主多从架构,最少三台服务,即一主两从
故障切换备选主库的算法
1 .一般判断从库的是从( position/GTID )判断优劣,数据有差异,最接近于 master 的 slave ,成为备选
主。
2 .数据一致的情况下,按照配置文件顺序,选择备选主库。
3 .设定有权重( candidate_master=1 ),按照权重强制指定备选主。
( 1 )默认情况下如果一个 slave 落后 master 100M 的 relay logs 的话,即使有权重,也会失效。
( 2 )如果 check_repl_delay=0 的话,即使落后很多日志,也强制选择其为备选主。
MHA 工作原理
- 目前MHA主要支持一主多从的架构,要搭建MHA,要求一个复制集群必须最少有3台数据库服务器,一主二从,即一台充当Master,台充当备用Master,另一台充当从库。
- MHA Node 运行在每台 MySQL 服务器上
- MHAManager 会定时探测集群中的master 节点
- 当master 出现故障时,它可以自动将最新数据的slave 提升为新的master
- 然后将所有其他的slave 重新指向新的master,VIP自动漂移到新的master。
- 整个故障转移过程对应用程序完全透明。
MHA部署实施
搭建主两从架构
#在master节点上
[root@mysql1 ~]# /etc/init.d/mysqld stop
Shutting down MySQL............ SUCCESS!
[root@mysql1 ~]# rm -rf /data/mysql/*
[root@mysql1 ~]# vim /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server-id=1
log-bin=mysql-bin
gtid_mode=ON
log_slave_updates=ON
enforce-gtid-consistency=ON
symbolic-links=0[root@mysql1 ~]# mysqld --user mysql --initialize
[root@mysql1 ~]# /etc/init.d/mysqld start
Starting MySQL.Logging to '/data/mysql/mysql1.org.err'.SUCCESS!
[root@mysql1 ~]# mysql_secure_installation[root@mysql1 ~]#mysql -urootmysql> create user 'test'@'%' identified by 'test';
Query OK, 0 rows affected (0.01 sec)mysql> grant replication slave on *.* to test@'%';
Query OK, 0 rows affected (0.00 sec)mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so';
Query OK, 0 rows affected (0.01 sec)mysql> set global rpl_semi_sync_master_enabled=1;
Query OK, 0 rows affected (0.00 sec)mysql> \q
Bye
#在slave1和slave2上配置
[root@mysql2 ~]# /etc/init.d/mysqld stop
[root@mysql2 ~]# rm -rf /data/mysql/*
[root@mysql2 ~]# vim /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server-id=2
log-bin=mysql-bin
gtid_mode=ON
log_slave_updates=ON
enforce-gtid-consistency=ON
symbolic-links=0[root@mysql2 ~]# mysqld --user mysql --initialize
[[root@mysql2 ~]# /etc/init.d/mysqld start
[root@mysql2 ~]# mysql -uroot
mysql> alter user root@localhost identified by 'root';
Query OK, 0 rows affected (0.00 sec)mysql> change master to master_host='172.25.254.130',master_user='test',master_password='test',master_auto_position=1;
Query OK, 0 rows affected, 2 warnings (0.01 sec)mysql> start slave;
Query OK, 0 rows affected (0.00 sec)mysql> install plugin rpl_semi_sync_slave soname 'semisync_slave.so';
Query OK, 0 rows affected (0.00 sec)mysql> set global rpl_semi_sync_slave_enabled=1;
Query OK, 0 rows affected (0.00 sec)mysql> stop slave io_thread;
Query OK, 0 rows affected (0.00 sec)mysql> start slave io_thread;
Query OK, 0 rows affected (0.00 sec)mysql> show status like 'Rpl_semi_sync%';
+----------------------------+-------+
| Variable_name | Value |
+----------------------------+-------+
| Rpl_semi_sync_slave_status | ON |
+----------------------------+-------+
1 row in set (0.00 sec)
安装MHA所需要的软件
[root@mysql-mha ~]# ls
anaconda-ks.cfg master_ip_failover master_ip_online_change MHA-7.zip
[root@mysql-mha ~]# unzip MHA-7.zip
Archive: MHA-7.zipcreating: MHA-7/inflating: MHA-7/mha4mysql-manager-0.58-0.el7.centos.noarch.rpminflating: MHA-7/mha4mysql-manager-0.58.tar.gzinflating: MHA-7/mha4mysql-node-0.58-0.el7.centos.noarch.rpminflating: MHA-7/perl-Config-Tiny-2.14-7.el7.noarch.rpminflating: MHA-7/perl-Email-Date-Format-1.002-15.el7.noarch.rpminflating: MHA-7/perl-Log-Dispatch-2.41-1.el7.1.noarch.rpminflating: MHA-7/perl-Mail-Sender-0.8.23-1.el7.noarch.rpminflating: MHA-7/perl-Mail-Sendmail-0.79-21.el7.noarch.rpminflating: MHA-7/perl-MIME-Lite-3.030-1.el7.noarch.rpminflating: MHA-7/perl-MIME-Types-1.38-2.el7.noarch.rpminflating: MHA-7/perl-Net-Telnet-3.03-19.el7.noarch.rpminflating: MHA-7/perl-Parallel-ForkManager-1.18-2.el7.noarch.rpm
[root@mysql-mha ~]# ls
anaconda-ks.cfg master_ip_failover master_ip_online_change MHA-7 MHA-7.zip
[root@mysql-mha ~]# cd MHA-7
[root@mysql-mha MHA-7]# ls
mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
mha4mysql-manager-0.58.tar.gz
mha4mysql-node-0.58-0.el7.centos.noarch.rpm
perl-Config-Tiny-2.14-7.el7.noarch.rpm
perl-Email-Date-Format-1.002-15.el7.noarch.rpm
perl-Log-Dispatch-2.41-1.el7.1.noarch.rpm
perl-Mail-Sender-0.8.23-1.el7.noarch.rpm
perl-Mail-Sendmail-0.79-21.el7.noarch.rpm
perl-MIME-Lite-3.030-1.el7.noarch.rpm
perl-MIME-Types-1.38-2.el7.noarch.rpm
perl-Net-Telnet-3.03-19.el7.noarch.rpm
perl-Parallel-ForkManager-1.18-2.el7.noarch.rpm[root@mysql-mha MHA-7]# ssh-keygen[root@mysql-mha MHA-7]# ssh-copy-id -i /root/.ssh/id_rsa.pub root@172.25.254.130
[root@mysql-mha MHA-7]# ssh-copy-id -i /root/.ssh/id_rsa.pub root@172.25.254.150
[root@mysql-mha MHA-7]# ssh-copy-id -i /root/.ssh/id_rsa.pub root@172.25.254.160[root@mysql-mha MHA-7]# yum install *.rpm -y[root@mysql-mha MHA-7]# ls
mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
mha4mysql-manager-0.58.tar.gz
mha4mysql-node-0.58-0.el7.centos.noarch.rpm
perl-Config-Tiny-2.14-7.el7.noarch.rpm
perl-Email-Date-Format-1.002-15.el7.noarch.rpm
perl-Log-Dispatch-2.41-1.el7.1.noarch.rpm
perl-Mail-Sender-0.8.23-1.el7.noarch.rpm
perl-Mail-Sendmail-0.79-21.el7.noarch.rpm
perl-MIME-Lite-3.030-1.el7.noarch.rpm
perl-MIME-Types-1.38-2.el7.noarch.rpm
perl-Net-Telnet-3.03-19.el7.noarch.rpm
perl-Parallel-ForkManager-1.18-2.el7.noarch.rpm[root@mysql-mha MHA-7]# scp mha4mysql-node-0.58-0.el7.centos.noarch.rpm root@172.25.254.130:/root
[root@mysql-mha MHA-7]# scp mha4mysql-node-0.58-0.el7.centos.noarch.rpm root@172.25.254.150:/root
[root@mysql-mha MHA-7]# scp mha4mysql-node-0.58-0.el7.centos.noarch.rpm root@172.25.254.160:/root#在slave安装
[root@mysql1 ~]# yum install mha4mysql-node-0.58-0.el7.centos.noarch.rpm -y
[root@mysql2 ~]# yum install mha4mysql-node-0.58-0.el7.centos.noarch.rpm -y
[root@mysql3 ~]# yum install mha4mysql-node-0.58-0.el7.centos.noarch.rpm -y
配置MHA 的管理环境
因为我们当前只有一套主从,所以我们只需要写一个配置文件即可rpm包中没有为我们准备配置文件的模板可以解压源码包后在samples 中找到配置文件的模板文件
[root@mysql-mha MHA-7]# mkdir /etc/masterha
[root@mysql-mha MHA-7]# ls /etc/masterha/
[root@mysql-mha MHA-7]# tar zxf mha4mysql-manager-0.58.tar.gz
[root@mysql-mha MHA-7]# cd mha4mysql-manager-0.58
[root@mysql-mha mha4mysql-manager-0.58]# ll
总用量 44
-rw-rw-r-- 1 root root 78 3月 23 2018 AUTHORS
drwxrwxr-x 2 root root 252 3月 23 2018 bin
-rw-rw-r-- 1 root root 17987 3月 23 2018 COPYING
drwxrwxr-x 2 root root 94 3月 23 2018 debian
drwxrwxr-x 3 root root 17 3月 23 2018 lib
-rwxrwxr-x 1 root root 846 3月 23 2018 Makefile.PL
-rw-rw-r-- 1 root root 4875 3月 23 2018 MANIFEST
-rw-rw-r-- 1 root root 233 3月 23 2018 MANIFEST.SKIP
-rw-rw-r-- 1 root root 244 3月 23 2018 README
drwxrwxr-x 2 root root 35 3月 23 2018 rpm
drwxrwxr-x 4 root root 33 3月 23 2018 samples
drwxrwxr-x 2 root root 49 3月 23 2018 t
drwxrwxr-x 3 root root 53 3月 23 2018 tests
[root@mysql-mha mha4mysql-manager-0.58]# ls
AUTHORS COPYING lib MANIFEST README samples tests
bin debian Makefile.PL MANIFEST.SKIP rpm t
[root@mysql-mha mha4mysql-manager-0.58]# cd samples/conf/
[root@mysql-mha conf]# ls
app1.cnf masterha_default.cnf
[root@mysql-mha conf]# cat masterha_default.cnf app1.cnf > /etc/masterha/app1.cnf
[root@mysql-mha conf]# cd /etc/masterha/
[root@mysql-mha masterha]# ls
app1.cnf
[root@mysql-mha masterha]# vim app1.cnf
[server default]
user=root #mysql管理员用户,因为需要做自动化配置
password=root #mysql密码
ssh_user=root #ssh远程登录用户
master_binlog_dir= /data/mysql #二进制日志目录
remote_workdir=/tmp #远程工作目录
#此参数是为了提供冗余检测,方式是mha主机网络自身的问题无法连接到数据库节点,为集群之外的主机
secondary_check_script= masterha_secondary_check -s 172.25.254.130 -s 172.25.254.131
ping_interval=3 #每隔3s检测一次
# master_ip_failover_script= /script/masterha/master_ip_failover #发生故障后调用的脚本,用来迁移vip
# shutdown_script= /script/masterha/power_manager #电源管理脚本
# report_script= /script/masterha/send_report #当发生故障后用此脚本发邮件或告警通知
# master_ip_online_change_script= /script/masterha/master_ip_online_change #在线切换时调用的vip迁移脚本,手动
[server default]
manager_workdir=/etc/masterha #mha工作目录
manager_log=/etc/masterha/manager.log #mha日志[server1]
hostname=172.25.254.130
candidate_master=1 #可能作为master的主机
check_repl_delay=0
##默认情况下如果一个slave落后master 100M的relay logs的话
#MHA将不会选择该slave作为一个新的master
#因为对于这个slave的恢复需要花费很长时间
#通过设置check_repl_delay=0
#MHA触发切换在选择一个新的master的时候将会忽略复制延时
#这个参数对于设置了candidate_master=1的主机非常有用
#因为这个候选主在切换的过程中一定是新的master[server2]
hostname=172.25.254.150
candidate_master=1 #可能作为master的主机
check_repl_delay=0[server3]
hostname=172.25.254.160
no_master=1 #不会作为master的主机
检测配置:检测网络及ssh免密
#在数据节点master端
mysql> GRANT ALL ON *.* TO root@'%' identified by 'root'; #允许root远程登陆[root@mysql-mha .ssh]# masterha_check_ssh --conf=/etc/masterha/app1.cnf
Wed Sep 4 21:44:43 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed Sep 4 21:44:43 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Wed Sep 4 21:44:43 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Wed Sep 4 21:44:43 2024 - [info] Starting SSH connection tests..
Wed Sep 4 21:44:44 2024 - [debug]
Wed Sep 4 21:44:43 2024 - [debug] Connecting via SSH from root@172.25.254.130(172.25.254.130:22) to root@172.25.254.150(172.25.254.150:22)..
Wed Sep 4 21:44:43 2024 - [debug] ok.
Wed Sep 4 21:44:43 2024 - [debug] Connecting via SSH from root@172.25.254.130(172.25.254.130:22) to root@172.25.254.160(172.25.254.160:22)..
Wed Sep 4 21:44:43 2024 - [debug] ok.
Wed Sep 4 21:44:44 2024 - [debug]
Wed Sep 4 21:44:43 2024 - [debug] Connecting via SSH from root@172.25.254.150(172.25.254.150:22) to root@172.25.254.130(172.25.254.130:22)..
Warning: Permanently added '172.25.254.130' (ECDSA) to the list of known hosts.
Wed Sep 4 21:44:43 2024 - [debug] ok.
Wed Sep 4 21:44:43 2024 - [debug] Connecting via SSH from root@172.25.254.150(172.25.254.150:22) to root@172.25.254.160(172.25.254.160:22)..
Warning: Permanently added '172.25.254.160' (ECDSA) to the list of known hosts.
Wed Sep 4 21:44:44 2024 - [debug] ok.
Wed Sep 4 21:44:45 2024 - [debug]
Wed Sep 4 21:44:44 2024 - [debug] Connecting via SSH from root@172.25.254.160(172.25.254.160:22) to root@172.25.254.130(172.25.254.130:22)..
Warning: Permanently added '172.25.254.130' (ECDSA) to the list of known hosts.
Wed Sep 4 21:44:44 2024 - [debug] ok.
Wed Sep 4 21:44:44 2024 - [debug] Connecting via SSH from root@172.25.254.160(172.25.254.160:22) to root@172.25.254.150(172.25.254.150:22)..
Warning: Permanently added '172.25.254.150' (ECDSA) to the list of known hosts.
Wed Sep 4 21:44:44 2024 - [debug] ok.
Wed Sep 4 21:44:45 2024 - [info] All SSH connection tests passed successfully.[root@mysql-mha .ssh]# masterha_check_repl --conf=/etc/masterha/app1.cnf
Wed Sep 4 21:45:16 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed Sep 4 21:45:16 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Wed Sep 4 21:45:16 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Wed Sep 4 21:45:16 2024 - [info] MHA::MasterMonitor version 0.58.
Wed Sep 4 21:45:17 2024 - [info] GTID failover mode = 1
Wed Sep 4 21:45:17 2024 - [info] Dead Servers:
Wed Sep 4 21:45:17 2024 - [info] Alive Servers:
Wed Sep 4 21:45:17 2024 - [info] 172.25.254.130(172.25.254.130:3306)
Wed Sep 4 21:45:17 2024 - [info] 172.25.254.150(172.25.254.150:3306)
Wed Sep 4 21:45:17 2024 - [info] 172.25.254.160(172.25.254.160:3306)
Wed Sep 4 21:45:17 2024 - [info] Alive Slaves:
Wed Sep 4 21:45:17 2024 - [info] 172.25.254.150(172.25.254.150:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Wed Sep 4 21:45:17 2024 - [info] GTID ON
Wed Sep 4 21:45:17 2024 - [info] Replicating from 172.25.254.130(172.25.254.130:3306)
Wed Sep 4 21:45:17 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Sep 4 21:45:17 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Wed Sep 4 21:45:17 2024 - [info] GTID ON
Wed Sep 4 21:45:17 2024 - [info] Replicating from 172.25.254.130(172.25.254.130:3306)
Wed Sep 4 21:45:17 2024 - [info] Not candidate for the new Master (no_master is set)
Wed Sep 4 21:45:17 2024 - [info] Current Alive Master: 172.25.254.130(172.25.254.130:3306)
Wed Sep 4 21:45:17 2024 - [info] Checking slave configurations..
Wed Sep 4 21:45:17 2024 - [info] read_only=1 is not set on slave 172.25.254.150(172.25.254.150:3306).
Wed Sep 4 21:45:17 2024 - [info] read_only=1 is not set on slave 172.25.254.160(172.25.254.160:3306).
Wed Sep 4 21:45:17 2024 - [info] Checking replication filtering settings..
Wed Sep 4 21:45:17 2024 - [info] binlog_do_db= , binlog_ignore_db=
Wed Sep 4 21:45:17 2024 - [info] Replication filtering check ok.
Wed Sep 4 21:45:17 2024 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Wed Sep 4 21:45:17 2024 - [info] Checking SSH publickey authentication settings on the current master..
Wed Sep 4 21:45:17 2024 - [info] HealthCheck: SSH to 172.25.254.130 is reachable.
Wed Sep 4 21:45:17 2024 - [info]
172.25.254.130(172.25.254.130:3306) (current master)+--172.25.254.150(172.25.254.150:3306)+--172.25.254.160(172.25.254.160:3306)Wed Sep 4 21:45:17 2024 - [info] Checking replication health on 172.25.254.150..
Wed Sep 4 21:45:17 2024 - [info] ok.
Wed Sep 4 21:45:17 2024 - [info] Checking replication health on 172.25.254.160..
Wed Sep 4 21:45:17 2024 - [info] ok.
Wed Sep 4 21:45:17 2024 - [warning] master_ip_failover_script is not defined.
Wed Sep 4 21:45:17 2024 - [warning] shutdown_script is not defined.
Wed Sep 4 21:45:17 2024 - [info] Got exit code 0 (Not master dead).MySQL Replication Health is OK.
MHA的故障切换
MHA 的故障切换过程
共包括以下的步骤:
1. 配置文件检查阶段,这个阶段会检查整个集群配置文件配置
2. 宕机的 master 处理,这个阶段包括虚拟 ip 摘除操作,主机关机操作
3. 复制 dead master 和最新 slave 相差的 relay log ,并保存到 MHA Manger 具体的目录下
4. 识别含有最新更新的 slave
5. 应用从 master 保存的二进制日志事件( binlog events )
6. 提升一个 slave 为新的 master 进行复制
7. 使其他的 slave 连接新的 master 进行复制
master 未出现故障手动切换
[root@mysql-mha ~]# masterha_master_switch \
--conf=/etc/masterha/app1.cnf \ #指定配置文件
--master_state=alive \ #指定master节点状态
--new_master_host=172.25.254.150 \ #指定新master节点
--new_master_port=3306 \ #指定新master节点端口
--orig_master_is_new_slave \ #原始master会变成新的slave
--running_updates_limit=10000 \ #切换超时时间#切换过程
[root@mysql-mha ~]# masterha_master_switch --conf=/etc/masterha/app1.cnf --master_state=alive --new_master_host=172.25.254.150 --new_master_port=3306 --orig_master_is_new_slave --running_updates_limit=10000
Sat Sep 7 08:51:32 2024 - [info] MHA::MasterRotate version 0.58.
Sat Sep 7 08:51:32 2024 - [info] Starting online master switch..
Sat Sep 7 08:51:32 2024 - [info]
Sat Sep 7 08:51:32 2024 - [info] * Phase 1: Configuration Check Phase..
Sat Sep 7 08:51:32 2024 - [info]
Sat Sep 7 08:51:32 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Sep 7 08:51:32 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Sat Sep 7 08:51:32 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Sat Sep 7 08:51:33 2024 - [info] GTID failover mode = 1
Sat Sep 7 08:51:33 2024 - [info] Current Alive Master: 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 08:51:33 2024 - [info] Alive Slaves:
Sat Sep 7 08:51:33 2024 - [info] 172.25.254.150(172.25.254.150:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 08:51:33 2024 - [info] GTID ON
Sat Sep 7 08:51:33 2024 - [info] Replicating from 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 08:51:33 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 08:51:33 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 08:51:33 2024 - [info] GTID ON
Sat Sep 7 08:51:33 2024 - [info] Replicating from 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 08:51:33 2024 - [info] Not candidate for the new Master (no_master is set)It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 172.25.254.130(172.25.254.130:3306)? (YES/no): yes
Sat Sep 7 08:51:36 2024 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Sat Sep 7 08:51:36 2024 - [info] ok.
Sat Sep 7 08:51:36 2024 - [info] Checking MHA is not monitoring or doing failover..
Sat Sep 7 08:51:36 2024 - [info] Checking replication health on 172.25.254.150..
Sat Sep 7 08:51:36 2024 - [info] ok.
Sat Sep 7 08:51:36 2024 - [info] Checking replication health on 172.25.254.160..
Sat Sep 7 08:51:36 2024 - [info] ok.
Sat Sep 7 08:51:36 2024 - [info] 172.25.254.150 can be new master.
Sat Sep 7 08:51:36 2024 - [info]
From:
172.25.254.130(172.25.254.130:3306) (current master)+--172.25.254.150(172.25.254.150:3306)+--172.25.254.160(172.25.254.160:3306)To:
172.25.254.150(172.25.254.150:3306) (new master)+--172.25.254.160(172.25.254.160:3306)+--172.25.254.130(172.25.254.130:3306)Starting master switch from 172.25.254.130(172.25.254.130:3306) to 172.25.254.150(172.25.254.150:3306)? (yes/NO): yes
Sat Sep 7 08:51:40 2024 - [info] Checking whether 172.25.254.150(172.25.254.150:3306) is ok for the new master..
Sat Sep 7 08:51:40 2024 - [info] ok.
Sat Sep 7 08:51:40 2024 - [info] 172.25.254.130(172.25.254.130:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Sat Sep 7 08:51:40 2024 - [info] 172.25.254.130(172.25.254.130:3306): Resetting slave pointing to the dummy host.
Sat Sep 7 08:51:40 2024 - [info] ** Phase 1: Configuration Check Phase completed.
Sat Sep 7 08:51:40 2024 - [info]
Sat Sep 7 08:51:40 2024 - [info] * Phase 2: Rejecting updates Phase..
Sat Sep 7 08:51:40 2024 - [info]
master_ip_online_change_script is not defined. If you do not disable writes on the current master manually, applications keep writing on the current master. Is it ok to proceed? (yes/NO): yes
Sat Sep 7 08:51:57 2024 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Sat Sep 7 08:51:57 2024 - [info] Executing FLUSH TABLES WITH READ LOCK..
Sat Sep 7 08:51:57 2024 - [info] ok.
Sat Sep 7 08:51:57 2024 - [info] Orig master binlog:pos is mysql-bin.000002:1119.
Sat Sep 7 08:51:57 2024 - [info] Waiting to execute all relay logs on 172.25.254.150(172.25.254.150:3306)..
Sat Sep 7 08:51:57 2024 - [info] master_pos_wait(mysql-bin.000002:1119) completed on 172.25.254.150(172.25.254.150:3306). Executed 0 events.
Sat Sep 7 08:51:57 2024 - [info] done.
Sat Sep 7 08:51:57 2024 - [info] Getting new master's binlog name and position..
Sat Sep 7 08:51:57 2024 - [info] mysql-bin.000002:1363
Sat Sep 7 08:51:57 2024 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='172.25.254.150', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='test', MASTER_PASSWORD='xxx';
Sat Sep 7 08:51:57 2024 - [info]
Sat Sep 7 08:51:57 2024 - [info] * Switching slaves in parallel..
Sat Sep 7 08:51:57 2024 - [info]
Sat Sep 7 08:51:57 2024 - [info] -- Slave switch on host 172.25.254.160(172.25.254.160:3306) started, pid: 1770
Sat Sep 7 08:51:57 2024 - [info]
Sat Sep 7 08:51:59 2024 - [info] Log messages from 172.25.254.160 ...
Sat Sep 7 08:51:59 2024 - [info]
Sat Sep 7 08:51:57 2024 - [info] Waiting to execute all relay logs on 172.25.254.160(172.25.254.160:3306)..
Sat Sep 7 08:51:57 2024 - [info] master_pos_wait(mysql-bin.000002:1119) completed on 172.25.254.160(172.25.254.160:3306). Executed 0 events.
Sat Sep 7 08:51:57 2024 - [info] done.
Sat Sep 7 08:51:57 2024 - [info] Resetting slave 172.25.254.160(172.25.254.160:3306) and starting replication from the new master 172.25.254.150(172.25.254.150:3306)..
Sat Sep 7 08:51:57 2024 - [info] Executed CHANGE MASTER.
Sat Sep 7 08:51:58 2024 - [info] Slave started.
Sat Sep 7 08:51:59 2024 - [info] End of log messages from 172.25.254.160 ...
Sat Sep 7 08:51:59 2024 - [info]
Sat Sep 7 08:51:59 2024 - [info] -- Slave switch on host 172.25.254.160(172.25.254.160:3306) succeeded.
Sat Sep 7 08:51:59 2024 - [info] Unlocking all tables on the orig master:
Sat Sep 7 08:51:59 2024 - [info] Executing UNLOCK TABLES..
Sat Sep 7 08:51:59 2024 - [info] ok.
Sat Sep 7 08:51:59 2024 - [info] Starting orig master as a new slave..
Sat Sep 7 08:51:59 2024 - [info] Resetting slave 172.25.254.130(172.25.254.130:3306) and starting replication from the new master 172.25.254.150(172.25.254.150:3306)..
Sat Sep 7 08:51:59 2024 - [info] Executed CHANGE MASTER.
Sat Sep 7 08:52:00 2024 - [info] Slave started.
Sat Sep 7 08:52:00 2024 - [info] All new slave servers switched successfully.
Sat Sep 7 08:52:00 2024 - [info]
Sat Sep 7 08:52:00 2024 - [info] * Phase 5: New master cleanup phase..
Sat Sep 7 08:52:00 2024 - [info]
Sat Sep 7 08:52:00 2024 - [info] 172.25.254.150: Resetting slave info succeeded.
Sat Sep 7 08:52:00 2024 - [info] Switching master to 172.25.254.150(172.25.254.150:3306) completed successfully.
检测
[root@mysql-mha ~]# masterha_check_ssh --conf=/etc/masterha/app1.cnf Sat Sep 7 09:01:28 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Sep 7 09:01:28 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:01:28 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:01:28 2024 - [info] Starting SSH connection tests..
Sat Sep 7 09:01:29 2024 - [debug]
Sat Sep 7 09:01:28 2024 - [debug] Connecting via SSH from root@172.25.254.130(172.25.254.130:22) to root@172.25.254.150(172.25.254.150:22)..
Sat Sep 7 09:01:28 2024 - [debug] ok.
Sat Sep 7 09:01:28 2024 - [debug] Connecting via SSH from root@172.25.254.130(172.25.254.130:22) to root@172.25.254.160(172.25.254.160:22)..
Sat Sep 7 09:01:28 2024 - [debug] ok.
Sat Sep 7 09:01:29 2024 - [debug]
Sat Sep 7 09:01:28 2024 - [debug] Connecting via SSH from root@172.25.254.150(172.25.254.150:22) to root@172.25.254.130(172.25.254.130:22)..
Sat Sep 7 09:01:29 2024 - [debug] ok.
Sat Sep 7 09:01:29 2024 - [debug] Connecting via SSH from root@172.25.254.150(172.25.254.150:22) to root@172.25.254.160(172.25.254.160:22)..
Sat Sep 7 09:01:29 2024 - [debug] ok.
Sat Sep 7 09:01:30 2024 - [debug]
Sat Sep 7 09:01:29 2024 - [debug] Connecting via SSH from root@172.25.254.160(172.25.254.160:22) to root@172.25.254.130(172.25.254.130:22)..
Sat Sep 7 09:01:29 2024 - [debug] ok.
Sat Sep 7 09:01:29 2024 - [debug] Connecting via SSH from root@172.25.254.160(172.25.254.160:22) to root@172.25.254.150(172.25.254.150:22)..
Sat Sep 7 09:01:29 2024 - [debug] ok.
Sat Sep 7 09:01:30 2024 - [info] All SSH connection tests passed successfully.
[root@mysql-mha ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf Sat Sep 7 09:01:40 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Sep 7 09:01:40 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:01:40 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:01:40 2024 - [info] MHA::MasterMonitor version 0.58.
Sat Sep 7 09:01:41 2024 - [info] GTID failover mode = 1
Sat Sep 7 09:01:41 2024 - [info] Dead Servers:
Sat Sep 7 09:01:41 2024 - [info] Alive Servers:
Sat Sep 7 09:01:41 2024 - [info] 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:01:41 2024 - [info] 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:01:41 2024 - [info] 172.25.254.160(172.25.254.160:3306)
Sat Sep 7 09:01:41 2024 - [info] Alive Slaves:
Sat Sep 7 09:01:41 2024 - [info] 172.25.254.130(172.25.254.130:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:01:41 2024 - [info] GTID ON
Sat Sep 7 09:01:41 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:01:41 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 09:01:41 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:01:41 2024 - [info] GTID ON
Sat Sep 7 09:01:41 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:01:41 2024 - [info] Not candidate for the new Master (no_master is set)
Sat Sep 7 09:01:41 2024 - [info] Current Alive Master: 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:01:41 2024 - [info] Checking slave configurations..
Sat Sep 7 09:01:41 2024 - [info] read_only=1 is not set on slave 172.25.254.160(172.25.254.160:3306).
Sat Sep 7 09:01:41 2024 - [info] Checking replication filtering settings..
Sat Sep 7 09:01:41 2024 - [info] binlog_do_db= , binlog_ignore_db=
Sat Sep 7 09:01:41 2024 - [info] Replication filtering check ok.
Sat Sep 7 09:01:41 2024 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Sat Sep 7 09:01:41 2024 - [info] Checking SSH publickey authentication settings on the current master..
Sat Sep 7 09:01:42 2024 - [info] HealthCheck: SSH to 172.25.254.150 is reachable.
Sat Sep 7 09:01:42 2024 - [info]
172.25.254.150(172.25.254.150:3306) (current master)+--172.25.254.130(172.25.254.130:3306)+--172.25.254.160(172.25.254.160:3306)Sat Sep 7 09:01:42 2024 - [info] Checking replication health on 172.25.254.130..
Sat Sep 7 09:01:42 2024 - [info] ok.
Sat Sep 7 09:01:42 2024 - [info] Checking replication health on 172.25.254.160..
Sat Sep 7 09:01:42 2024 - [info] ok.
Sat Sep 7 09:01:42 2024 - [warning] master_ip_failover_script is not defined.
Sat Sep 7 09:01:42 2024 - [warning] shutdown_script is not defined.
Sat Sep 7 09:01:42 2024 - [info] Got exit code 0 (Not master dead).MySQL Replication Health is OK.
master故障手动切换
#模拟master故障
[root@mysql2 ~]# /etc/init.d/mysqld stop
Shutting down MySQL........... SUCCESS!#--ignore_last_failover 表示忽略在/etc/masterha/目录中在切换过程中生成的锁文件
[root@mysql-mha ~]# masterha_master_switch --master_state=dead --conf=/etc/masterha/app1.cnf --dead_master_host=172.25.254.150 --dead_master_port=3306 --new_master_host=172.25.254.130 --new_master_port=3306 --ignore_last_failover
--dead_master_ip=<dead_master_ip> is not set. Using 172.25.254.150.
Sat Sep 7 09:06:39 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Sep 7 09:06:39 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:06:39 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:06:39 2024 - [info] MHA::MasterFailover version 0.58.
Sat Sep 7 09:06:39 2024 - [info] Starting master failover.
Sat Sep 7 09:06:39 2024 - [info]
Sat Sep 7 09:06:39 2024 - [info] * Phase 1: Configuration Check Phase..
Sat Sep 7 09:06:39 2024 - [info]
Sat Sep 7 09:06:40 2024 - [info] GTID failover mode = 1
Sat Sep 7 09:06:40 2024 - [info] Dead Servers:
Sat Sep 7 09:06:40 2024 - [info] 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:06:40 2024 - [info] Checking master reachability via MySQL(double check)...
Sat Sep 7 09:06:40 2024 - [info] ok.
Sat Sep 7 09:06:40 2024 - [info] Alive Servers:
Sat Sep 7 09:06:40 2024 - [info] 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:06:40 2024 - [info] 172.25.254.160(172.25.254.160:3306)
Sat Sep 7 09:06:40 2024 - [info] Alive Slaves:
Sat Sep 7 09:06:40 2024 - [info] 172.25.254.130(172.25.254.130:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:06:40 2024 - [info] GTID ON
Sat Sep 7 09:06:40 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:06:40 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 09:06:40 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:06:40 2024 - [info] GTID ON
Sat Sep 7 09:06:40 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:06:40 2024 - [info] Not candidate for the new Master (no_master is set)
Master 172.25.254.150(172.25.254.150:3306) is dead. Proceed? (yes/NO): yes
Sat Sep 7 09:07:06 2024 - [info] Starting GTID based failover.
Sat Sep 7 09:07:06 2024 - [info]
Sat Sep 7 09:07:06 2024 - [info] ** Phase 1: Configuration Check Phase completed.
Sat Sep 7 09:07:06 2024 - [info]
Sat Sep 7 09:07:06 2024 - [info] * Phase 2: Dead Master Shutdown Phase..
Sat Sep 7 09:07:06 2024 - [info]
Sat Sep 7 09:07:11 2024 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.25.254.150! at /usr/share/perl5/vendor_perl/MHA/HealthCheck.pm line 343, <STDIN> line 1.
Sat Sep 7 09:07:11 2024 - [info] Forcing shutdown so that applications never connect to the current master..
Sat Sep 7 09:07:11 2024 - [warning] master_ip_failover_script is not set. Skipping invalidating dead master IP address.
Sat Sep 7 09:07:11 2024 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Sat Sep 7 09:07:11 2024 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Sat Sep 7 09:07:11 2024 - [info]
Sat Sep 7 09:07:11 2024 - [info] * Phase 3: Master Recovery Phase..
Sat Sep 7 09:07:11 2024 - [info]
Sat Sep 7 09:07:11 2024 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Sat Sep 7 09:07:11 2024 - [info]
Sat Sep 7 09:07:11 2024 - [info] The latest binary log file/position on all slaves is mysql-bin.000002:1363
Sat Sep 7 09:07:11 2024 - [info] Retrieved Gtid Set: 6408ca0c-6cad-11ef-9898-000c297763e2:1
Sat Sep 7 09:07:11 2024 - [info] Latest slaves (Slaves that received relay log files to the latest):
Sat Sep 7 09:07:11 2024 - [info] 172.25.254.130(172.25.254.130:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:07:11 2024 - [info] GTID ON
Sat Sep 7 09:07:11 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:07:11 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 09:07:11 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:07:11 2024 - [info] GTID ON
Sat Sep 7 09:07:11 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:07:11 2024 - [info] Not candidate for the new Master (no_master is set)
Sat Sep 7 09:07:11 2024 - [info] The oldest binary log file/position on all slaves is mysql-bin.000002:1363
Sat Sep 7 09:07:11 2024 - [info] Retrieved Gtid Set: 6408ca0c-6cad-11ef-9898-000c297763e2:1
Sat Sep 7 09:07:11 2024 - [info] Oldest slaves:
Sat Sep 7 09:07:11 2024 - [info] 172.25.254.130(172.25.254.130:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:07:11 2024 - [info] GTID ON
Sat Sep 7 09:07:11 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:07:11 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 09:07:11 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:07:11 2024 - [info] GTID ON
Sat Sep 7 09:07:11 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:07:11 2024 - [info] Not candidate for the new Master (no_master is set)
Sat Sep 7 09:07:11 2024 - [info]
Sat Sep 7 09:07:11 2024 - [info] * Phase 3.3: Determining New Master Phase..
Sat Sep 7 09:07:11 2024 - [info]
Sat Sep 7 09:07:11 2024 - [info] 172.25.254.130 can be new master.
Sat Sep 7 09:07:11 2024 - [info] New master is 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:07:11 2024 - [info] Starting master failover..
Sat Sep 7 09:07:11 2024 - [info]
From:
172.25.254.150(172.25.254.150:3306) (current master)+--172.25.254.130(172.25.254.130:3306)+--172.25.254.160(172.25.254.160:3306)To:
172.25.254.130(172.25.254.130:3306) (new master)+--172.25.254.160(172.25.254.160:3306)Starting master switch from 172.25.254.150(172.25.254.150:3306) to 172.25.254.130(172.25.254.130:3306)? (yes/NO): yes
Sat Sep 7 09:07:16 2024 - [info] New master decided manually is 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:07:16 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info] * Phase 3.3: New Master Recovery Phase..
Sat Sep 7 09:07:16 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info] Waiting all logs to be applied..
Sat Sep 7 09:07:16 2024 - [info] done.
Sat Sep 7 09:07:16 2024 - [info] Getting new master's binlog name and position..
Sat Sep 7 09:07:16 2024 - [info] mysql-bin.000002:1363
Sat Sep 7 09:07:16 2024 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='172.25.254.130', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='test', MASTER_PASSWORD='xxx';
Sat Sep 7 09:07:16 2024 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000002, 1363, 6408ca0c-6cad-11ef-9898-000c297763e2:1,
c67cab0f-6cac-11ef-9eae-000c29d16b0b:1-4
Sat Sep 7 09:07:16 2024 - [warning] master_ip_failover_script is not set. Skipping taking over new master IP address.
Sat Sep 7 09:07:16 2024 - [info] Setting read_only=0 on 172.25.254.130(172.25.254.130:3306)..
Sat Sep 7 09:07:16 2024 - [info] ok.
Sat Sep 7 09:07:16 2024 - [info] ** Finished master recovery successfully.
Sat Sep 7 09:07:16 2024 - [info] * Phase 3: Master Recovery Phase completed.
Sat Sep 7 09:07:16 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info] * Phase 4: Slaves Recovery Phase..
Sat Sep 7 09:07:16 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info] * Phase 4.1: Starting Slaves in parallel..
Sat Sep 7 09:07:16 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info] -- Slave recovery on host 172.25.254.160(172.25.254.160:3306) started, pid: 1837. Check tmp log /etc/masterha/172.25.254.160_3306_20240907090638.log if it takes time..
Sat Sep 7 09:07:17 2024 - [info]
Sat Sep 7 09:07:17 2024 - [info] Log messages from 172.25.254.160 ...
Sat Sep 7 09:07:17 2024 - [info]
Sat Sep 7 09:07:16 2024 - [info] Resetting slave 172.25.254.160(172.25.254.160:3306) and starting replication from the new master 172.25.254.130(172.25.254.130:3306)..
Sat Sep 7 09:07:16 2024 - [info] Executed CHANGE MASTER.
Sat Sep 7 09:07:16 2024 - [info] Slave started.
Sat Sep 7 09:07:16 2024 - [info] gtid_wait(6408ca0c-6cad-11ef-9898-000c297763e2:1,
c67cab0f-6cac-11ef-9eae-000c29d16b0b:1-4) completed on 172.25.254.160(172.25.254.160:3306). Executed 0 events.
Sat Sep 7 09:07:17 2024 - [info] End of log messages from 172.25.254.160.
Sat Sep 7 09:07:17 2024 - [info] -- Slave on host 172.25.254.160(172.25.254.160:3306) started.
Sat Sep 7 09:07:17 2024 - [info] All new slave servers recovered successfully.
Sat Sep 7 09:07:17 2024 - [info]
Sat Sep 7 09:07:17 2024 - [info] * Phase 5: New master cleanup phase..
Sat Sep 7 09:07:17 2024 - [info]
Sat Sep 7 09:07:17 2024 - [info] Resetting slave info on the new master..
Sat Sep 7 09:07:17 2024 - [info] 172.25.254.130: Resetting slave info succeeded.
Sat Sep 7 09:07:17 2024 - [info] Master failover to 172.25.254.130(172.25.254.130:3306) completed successfully.
Sat Sep 7 09:07:17 2024 - [info]----- Failover Report -----app1: MySQL Master failover 172.25.254.150(172.25.254.150:3306) to 172.25.254.130(172.25.254.130:3306) succeededMaster 172.25.254.150(172.25.254.150:3306) is down!Check MHA Manager logs at mysql-mha for details.Started manual(interactive) failover.
Selected 172.25.254.130(172.25.254.130:3306) as a new master.
172.25.254.130(172.25.254.130:3306): OK: Applying all logs succeeded.
172.25.254.160(172.25.254.160:3306): OK: Slave started, replicating from 172.25.254.130(172.25.254.130:3306)
172.25.254.130(172.25.254.130:3306): Resetting slave info succeeded.
Master failover to 172.25.254.130(172.25.254.130:3306) completed successfully.
恢复故障mysql节点
[root@mysql2 ~]# /etc/init.d/mysqld start[root@mysql2 ~]# mysql -uroot -p
mysql> change master to master_host='172.25.254.130',master_user='test',master_password='test',master_auto_position=1;
mysql> start slave;#测试
[root@mysql-mha ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf Sat Sep 7 09:26:21 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Sep 7 09:26:21 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:26:21 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Sat Sep 7 09:26:21 2024 - [info] MHA::MasterMonitor version 0.58.
Sat Sep 7 09:26:22 2024 - [info] GTID failover mode = 1
Sat Sep 7 09:26:22 2024 - [info] Dead Servers:
Sat Sep 7 09:26:22 2024 - [info] Alive Servers:
Sat Sep 7 09:26:22 2024 - [info] 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:26:22 2024 - [info] 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 09:26:22 2024 - [info] 172.25.254.160(172.25.254.160:3306)
Sat Sep 7 09:26:22 2024 - [info] Alive Slaves:
Sat Sep 7 09:26:22 2024 - [info] 172.25.254.150(172.25.254.150:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:26:22 2024 - [info] GTID ON
Sat Sep 7 09:26:22 2024 - [info] Replicating from 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:26:22 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 09:26:22 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 09:26:22 2024 - [info] GTID ON
Sat Sep 7 09:26:22 2024 - [info] Replicating from 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:26:22 2024 - [info] Not candidate for the new Master (no_master is set)
Sat Sep 7 09:26:22 2024 - [info] Current Alive Master: 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 09:26:22 2024 - [info] Checking slave configurations..
Sat Sep 7 09:26:22 2024 - [info] read_only=1 is not set on slave 172.25.254.150(172.25.254.150:3306).
Sat Sep 7 09:26:22 2024 - [info] read_only=1 is not set on slave 172.25.254.160(172.25.254.160:3306).
Sat Sep 7 09:26:22 2024 - [info] Checking replication filtering settings..
Sat Sep 7 09:26:22 2024 - [info] binlog_do_db= , binlog_ignore_db=
Sat Sep 7 09:26:22 2024 - [info] Replication filtering check ok.
Sat Sep 7 09:26:22 2024 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Sat Sep 7 09:26:22 2024 - [info] Checking SSH publickey authentication settings on the current master..
Sat Sep 7 09:26:22 2024 - [info] HealthCheck: SSH to 172.25.254.130 is reachable.
Sat Sep 7 09:26:22 2024 - [info]
172.25.254.130(172.25.254.130:3306) (current master)+--172.25.254.150(172.25.254.150:3306)+--172.25.254.160(172.25.254.160:3306)Sat Sep 7 09:26:22 2024 - [info] Checking replication health on 172.25.254.150..
Sat Sep 7 09:26:22 2024 - [info] ok.
Sat Sep 7 09:26:22 2024 - [info] Checking replication health on 172.25.254.160..
Sat Sep 7 09:26:22 2024 - [info] ok.
Sat Sep 7 09:26:22 2024 - [warning] master_ip_failover_script is not defined.
Sat Sep 7 09:26:22 2024 - [warning] shutdown_script is not defined.
Sat Sep 7 09:26:22 2024 - [info] Got exit code 0 (Not master dead).MySQL Replication Health is OK.
自动切换
[root@mysql-mha masterha]# rm -fr app1.failover.complete #删除切换锁文件#监控程序通过指定配置文件监控master状态,当master出问题后自动切换并退出避免重复做故障切换[root@mysql-mha masterha]# masterha_manager --conf=/etc/masterha/app1.cnf[root@mysql-mha ~]# cat /etc/masterha/manager.log[root@mysql1 ~]# /etc/init.d/mysqld start
Shutting down MySQL........... SUCCESS!mysql> change master to master_host='172.25.254.150',master_user='test',master_password='test',master_auto_position=1;mysql> start slave;[root@mysql-mha masterha]# masterha_check_repl --conf=/etc/masterha/app1.cnf
Sat Sep 7 10:10:21 2024 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Sep 7 10:10:21 2024 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Sat Sep 7 10:10:21 2024 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Sat Sep 7 10:10:21 2024 - [info] MHA::MasterMonitor version 0.58.
Sat Sep 7 10:10:22 2024 - [info] GTID failover mode = 1
Sat Sep 7 10:10:22 2024 - [info] Dead Servers:
Sat Sep 7 10:10:22 2024 - [info] Alive Servers:
Sat Sep 7 10:10:22 2024 - [info] 172.25.254.130(172.25.254.130:3306)
Sat Sep 7 10:10:22 2024 - [info] 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 10:10:22 2024 - [info] 172.25.254.160(172.25.254.160:3306)
Sat Sep 7 10:10:22 2024 - [info] Alive Slaves:
Sat Sep 7 10:10:22 2024 - [info] 172.25.254.130(172.25.254.130:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 10:10:22 2024 - [info] GTID ON
Sat Sep 7 10:10:22 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 10:10:22 2024 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Sep 7 10:10:22 2024 - [info] 172.25.254.160(172.25.254.160:3306) Version=5.7.44-log (oldest major version between slaves) log-bin:enabled
Sat Sep 7 10:10:22 2024 - [info] GTID ON
Sat Sep 7 10:10:22 2024 - [info] Replicating from 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 10:10:22 2024 - [info] Not candidate for the new Master (no_master is set)
Sat Sep 7 10:10:22 2024 - [info] Current Alive Master: 172.25.254.150(172.25.254.150:3306)
Sat Sep 7 10:10:22 2024 - [info] Checking slave configurations..
Sat Sep 7 10:10:22 2024 - [info] read_only=1 is not set on slave 172.25.254.130(172.25.254.130:3306).
Sat Sep 7 10:10:22 2024 - [info] read_only=1 is not set on slave 172.25.254.160(172.25.254.160:3306).
Sat Sep 7 10:10:22 2024 - [info] Checking replication filtering settings..
Sat Sep 7 10:10:22 2024 - [info] binlog_do_db= , binlog_ignore_db=
Sat Sep 7 10:10:22 2024 - [info] Replication filtering check ok.
Sat Sep 7 10:10:22 2024 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Sat Sep 7 10:10:22 2024 - [info] Checking SSH publickey authentication settings on the current master..
Sat Sep 7 10:10:27 2024 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.25.254.150! at /usr/share/perl5/vendor_perl/MHA/HealthCheck.pm line 343.
Sat Sep 7 10:10:27 2024 - [info]
172.25.254.150(172.25.254.150:3306) (current master)+--172.25.254.130(172.25.254.130:3306)+--172.25.254.160(172.25.254.160:3306)Sat Sep 7 10:10:27 2024 - [info] Checking replication health on 172.25.254.130..
Sat Sep 7 10:10:27 2024 - [info] ok.
Sat Sep 7 10:10:27 2024 - [info] Checking replication health on 172.25.254.160..
Sat Sep 7 10:10:27 2024 - [info] ok.
Sat Sep 7 10:10:27 2024 - [warning] master_ip_failover_script is not defined.
Sat Sep 7 10:10:27 2024 - [warning] shutdown_script is not defined.
Sat Sep 7 10:10:27 2024 - [info] Got exit code 0 (Not master dead).MySQL Replication Health is OK.
为MHA添加VIP功能
[root@mysql-mha ~]# ls
anaconda-ks.cfg master_ip_failover master_ip_online_change MHA-7 MHA-7.zip[root@mysql-mha ~]# cp master_ip_failover master_ip_online_change /usr/local/bin/[root@mysql-mha ~]# chmod +x /usr/local/bin/master_ip_*[root@mysql-mha ~]# vim /usr/local/bin/master_ip_failover
my $vip = '172.25.254.100/24';
my $ssh_start_vip = "/sbin/ip addr add $vip dev eth0";
my $ssh_stop_vip = "/sbin/ip addr del $vip dev eth0";[root@mysql-mha ~]# vim /usr/local/bin/master_ip_online_change
my $vip = '172.25.254.100/24';
my $ssh_start_vip = "/sbin/ip addr add $vip dev eth0";
my $ssh_stop_vip = "/sbin/ip addr del $vip dev eth0";[root@mysql-mha ~]# masterha_manager --conf=/etc/masterha/app1.cnf[root@mysql-mha masterha]# masterha_manager --conf=/etc/masterha/app1.cnf #启动监
控程序[root@mysql1 ~]# ip a a 172.25.254.100 dev eth0 #在master节点添加VIP
模拟故障
[root@mysql1 ~]# /etc/init.d/mysqld stop #关闭主节点服务[root@mysql2 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00inet 127.0.0.1/8 scope host lovalid_lft forever preferred_lft foreverinet6 ::1/128 scope hostvalid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000link/ether 00:0c:29:77:63:e2 brd ff:ff:ff:ff:ff:ffinet 172.25.254.150/24 brd 172.25.254.255 scope global noprefixroute eth0valid_lft forever preferred_lft foreverinet 172.25.254.131/32 scope global eth0valid_lft forever preferred_lft foreverinet 172.25.254.100/24 scope global secondary eth0 #vip漂移到新的master主机上valid_lft forever preferred_lft foreverinet6 fe80::20c:29ff:fe77:63e2/64 scope linkvalid_lft forever preferred_lft forever
恢复故障
[root@mysql1 ~]# /etc/init.d/mysqld start[root@mysql-mha masterha]# rm app1.failover.complete[root@mysql1 ~]#mysql -uroot -pmysql> change master to master_host='172.25.254.150',master_user='test',master_password='test',master_auto_position=1;mysql>start slave;
手动切换后查看 vip 变化
[root@mysql-mha masterha]# masterha_master_switch --conf=/etc/masterha/app1.cnf --master_state=alive --new_master_host=172.25.254.130 --new_master_port=3306 --orig_master_is_new_slave --running_updates_limit=10000[root@mysql1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00inet 127.0.0.1/8 scope host lovalid_lft forever preferred_lft foreverinet6 ::1/128 scope hostvalid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000link/ether 00:0c:29:d1:6b:0b brd ff:ff:ff:ff:ff:ffinet 172.25.254.130/24 brd 172.25.254.255 scope global noprefixroute eth0valid_lft forever preferred_lft foreverinet 172.25.254.100/24 scope global secondary eth0 #vip漂移回来valid_lft forever preferred_lft foreverinet6 fe80::20c:29ff:fed1:6b0b/64 scope linkvalid_lft forever preferred_lft forever