Could someone review this patch for MDEV-5509? I am in particular worried that this might change some subtle behaviour of Seconds_Behind_Master in unexpected ways. Of course I tried my best to make sure such could not occur, but the semantics is really tricky and poorly documented, and I do not think we have good coverage in the test suite.
So I would like to have one other developer check it carefully as well and see if I've missed something. - Kristian. kniel...@knielsen-hq.org writes: > At http://bazaar.launchpad.net/~maria-captains/maria/10.0 > > ------------------------------------------------------------ > revno: 3959 > revision-id: kniel...@knielsen-hq.org-20140108100044-iszrbhddvxkfne71 > parent: kniel...@knielsen-hq.org-20140107105703-sn31yvf80batk1yx > committer: kniel...@knielsen-hq.org > branch nick: mariadb-10.0-base > timestamp: Wed 2014-01-08 11:00:44 +0100 > message: > MDEV-5509: Seconds_behind_master incorrect in parallel replication > > The problem was a race between the SQL driver thread and the worker threads. > The SQL driver thread would set rli->last_master_timestamp to zero to > mark that it has caught up with the master, while the worker threads would > set it to the timestamp of the executed event. This can happen out-of-order > in parallel replication, causing the "caught up" status to be overwritten > and Seconds_Behind_Master to wrongly grow when the slave is idle. > > To fix, introduce a separate flag rli->sql_thread_caught_up to mark that the > SQL driver thread is caught up. This avoids issues with worker threads > overwriting the SQL driver thread status. In parallel replication, we then > make SHOW SLAVE STATUS check in addition that all worker threads are idle > before showing Seconds_Behind_Master as 0 due to slave idle. > === added file 'mysql-test/suite/rpl/r/rpl_parallel2.result' > --- a/mysql-test/suite/rpl/r/rpl_parallel2.result 1970-01-01 00:00:00 > +0000 > +++ b/mysql-test/suite/rpl/r/rpl_parallel2.result 2014-01-08 10:00:44 > +0000 > @@ -0,0 +1,18 @@ > +include/rpl_init.inc [topology=1->2] > +*** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel > replication *** > +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; > +include/stop_slave.inc > +SET GLOBAL slave_parallel_threads=5; > +include/start_slave.inc > +CREATE TABLE t1 (a INT PRIMARY KEY, b INT); > +CALL mtr.add_suppression("Unsafe statement written to the binary log using > statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because > it uses a system function that may return a different value on the slave"); > +INSERT INTO t1 VALUES (1,sleep(2)); > +Warnings: > +Note 1592 Unsafe statement written to the binary log using statement > format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a > system function that may return a different value on the slave. > +Seconds_Behind_Master should be zero here because the slave is fully caught > up and idle. > +Seconds_Behind_Master = '0' > +include/stop_slave.inc > +SET GLOBAL slave_parallel_threads=@old_parallel_threads; > +include/start_slave.inc > +DROP TABLE t1; > +include/rpl_end.inc > > === added file 'mysql-test/suite/rpl/t/rpl_parallel2.test' > --- a/mysql-test/suite/rpl/t/rpl_parallel2.test 1970-01-01 00:00:00 > +0000 > +++ b/mysql-test/suite/rpl/t/rpl_parallel2.test 2014-01-08 10:00:44 > +0000 > @@ -0,0 +1,41 @@ > +--source include/have_binlog_format_statement.inc > +--let $rpl_topology=1->2 > +--source include/rpl_init.inc > + > +--echo *** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel > replication *** > + > +--connection server_2 > +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; > +--source include/stop_slave.inc > +SET GLOBAL slave_parallel_threads=5; > +--source include/start_slave.inc > + > +--connection server_1 > +CREATE TABLE t1 (a INT PRIMARY KEY, b INT); > +CALL mtr.add_suppression("Unsafe statement written to the binary log using > statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because > it uses a system function that may return a different value on the slave"); > +--save_master_pos > + > +--connection server_2 > +--sync_with_master > + > +--connection server_1 > +INSERT INTO t1 VALUES (1,sleep(2)); > +--save_master_pos > + > +--connection server_2 > +--sync_with_master > + > +--echo Seconds_Behind_Master should be zero here because the slave is fully > caught up and idle. > +--let $status_items= Seconds_Behind_Master > +--source include/show_slave_status.inc > + > + > +--connection server_2 > +--source include/stop_slave.inc > +SET GLOBAL slave_parallel_threads=@old_parallel_threads; > +--source include/start_slave.inc > + > +--connection server_1 > +DROP TABLE t1; > + > +--source include/rpl_end.inc > > === modified file 'sql/rpl_parallel.cc' > --- a/sql/rpl_parallel.cc 2014-01-03 11:20:53 +0000 > +++ b/sql/rpl_parallel.cc 2014-01-08 10:00:44 +0000 > @@ -766,6 +766,27 @@ rpl_parallel::wait_for_done() > } > > > +bool > +rpl_parallel::workers_idle() > +{ > + struct rpl_parallel_entry *e; > + uint32 i, max_i; > + > + max_i= domain_hash.records; > + for (i= 0; i < max_i; ++i) > + { > + bool active; > + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); > + mysql_mutex_lock(&e->LOCK_parallel_entry); > + active= e->current_sub_id > e->last_committed_sub_id; > + mysql_mutex_unlock(&e->LOCK_parallel_entry); > + if (active) > + break; > + } > + return (i == max_i); > +} > + > + > /* > do_event() is executed by the sql_driver_thd thread. > It's main purpose is to find a thread that can execute the query. > > === modified file 'sql/rpl_parallel.h' > --- a/sql/rpl_parallel.h 2013-11-05 11:01:26 +0000 > +++ b/sql/rpl_parallel.h 2014-01-08 10:00:44 +0000 > @@ -117,6 +117,7 @@ struct rpl_parallel { > void reset(); > rpl_parallel_entry *find(uint32 domain_id); > void wait_for_done(); > + bool workers_idle(); > bool do_event(rpl_group_info *serial_rgi, Log_event *ev, > ulonglong event_size); > }; > > === modified file 'sql/rpl_rli.cc' > --- a/sql/rpl_rli.cc 2013-12-17 09:50:34 +0000 > +++ b/sql/rpl_rli.cc 2014-01-08 10:00:44 +0000 > @@ -56,7 +56,7 @@ Relay_log_info::Relay_log_info(bool is_s > is_fake(FALSE), > #endif > group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0), > - last_master_timestamp(0), slave_skip_counter(0), > + last_master_timestamp(0), sql_thread_caught_up(true), > slave_skip_counter(0), > abort_pos_wait(0), slave_run_id(0), sql_driver_thd(), > inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), > until_log_pos(0), retried_trans(0), executed_entries(0), > @@ -1287,9 +1287,14 @@ void Relay_log_info::stmt_done(my_off_t > (probably ok - except in some very rare cases, only consequence > is that value may take some time to display in > Seconds_Behind_Master - not critical). > + > + In parallel replication, we take care to not set last_master_timestamp > + backwards, in case of out-of-order calls here. > */ > if (!(event_creation_time == 0 && > - IF_DBUG(debug_not_change_ts_if_art_event > 0, 1))) > + IF_DBUG(debug_not_change_ts_if_art_event > 0, 1)) && > + !(rgi->is_parallel_exec && event_creation_time <= > last_master_timestamp) > + ) > last_master_timestamp= event_creation_time; > } > DBUG_VOID_RETURN; > > === modified file 'sql/rpl_rli.h' > --- a/sql/rpl_rli.h 2013-11-08 14:14:18 +0000 > +++ b/sql/rpl_rli.h 2014-01-08 10:00:44 +0000 > @@ -221,6 +221,12 @@ class Relay_log_info : public Slave_repo > bool sql_force_rotate_relay; > > time_t last_master_timestamp; > + /* > + The SQL driver thread sets this true while it is waiting at the end of > the > + relay log for more events to arrive. SHOW SLAVE STATUS uses this to > report > + Seconds_Behind_Master as zero while the SQL thread is so waiting. > + */ > + bool sql_thread_caught_up; > > void clear_until_condition(); > > > === modified file 'sql/slave.cc' > --- a/sql/slave.cc 2013-12-16 12:48:32 +0000 > +++ b/sql/slave.cc 2014-01-08 10:00:44 +0000 > @@ -2615,8 +2615,24 @@ static bool send_show_master_info_data(T > if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) && > mi->rli.slave_running) > { > - long time_diff= ((long)(time(0) - mi->rli.last_master_timestamp) > - - mi->clock_diff_with_master); > + long time_diff; > + bool idle; > + time_t stamp= mi->rli.last_master_timestamp; > + > + if (!stamp) > + idle= true; > + else > + { > + idle= mi->rli.sql_thread_caught_up; > + if (opt_slave_parallel_threads > 0 && idle && > + !mi->rli.parallel.workers_idle()) > + idle= false; > + } > + if (idle) > + time_diff= 0; > + else > + { > + time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master); > /* > Apparently on some systems time_diff can be <0. Here are possible > reasons related to MySQL: > @@ -2632,13 +2648,15 @@ static bool send_show_master_info_data(T > slave is 2. At SHOW SLAVE STATUS time, assume that the difference > between timestamp of slave and rli->last_master_timestamp is 0 > (i.e. they are in the same second), then we get 0-(2-1)=-1 as a > result. > - This confuses users, so we don't go below 0: hence the max(). > + This confuses users, so we don't go below 0. > > last_master_timestamp == 0 (an "impossible" timestamp 1970) is a > special marker to say "consider we have caught up". > */ > - protocol->store((longlong)(mi->rli.last_master_timestamp ? > - max(0, time_diff) : 0)); > + if (time_diff < 0) > + time_diff= 0; > + } > + protocol->store((longlong)time_diff); > } > else > { > @@ -6096,6 +6114,7 @@ static Log_event* next_event(rpl_group_i > > if (hot_log) > mysql_mutex_unlock(log_lock); > + rli->sql_thread_caught_up= false; > DBUG_RETURN(ev); > } > if (opt_reckless_slave) // For mysql-test > @@ -6133,12 +6152,10 @@ static Log_event* next_event(rpl_group_i > Seconds_Behind_Master would be zero only when master has no > more updates in binlog for slave. The heartbeat can be sent > in a (small) fraction of slave_net_timeout. Until it's done > - rli->last_master_timestamp is temporarely (for time of > - waiting for the following event) reset whenever EOF is > - reached. > + rli->sql_thread_caught_up is temporarely (for time of waiting for > + the following event) set whenever EOF is reached. > */ > - time_t save_timestamp= rli->last_master_timestamp; > - rli->last_master_timestamp= 0; > + rli->sql_thread_caught_up= true; > > DBUG_ASSERT(rli->relay_log.get_open_count() == > rli->cur_log_old_open_count); > @@ -6264,7 +6281,7 @@ static Log_event* next_event(rpl_group_i > rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd); > // re-acquire data lock since we released it earlier > mysql_mutex_lock(&rli->data_lock); > - rli->last_master_timestamp= save_timestamp; > + rli->sql_thread_caught_up= false; > continue; > } > /* > > _______________________________________________ > commits mailing list > comm...@mariadb.org > https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp