Hello Sergei,

Thank you for the review comments. Please find my replies inline.

On 01/04/21 12:38 am, Sergei Golubchik wrote:
Hi, Sujatha!

Note, it's a review of the combined `git diff e5fc78 21a84e`

On Mar 31, Sujatha wrote:
revision-id: 21a84e23cf3 (mariadb-10.5.2-307-g21a84e23cf3)
parent(s): 4e6de585ff7
author: Sujatha<sujatha.sivaku...@mariadb.com>
committer: Sujatha<sujatha.sivaku...@mariadb.com>
timestamp: 2020-11-30 13:22:32 +0530
message:

MDEV-16437: merge 5.7 P_S replication instrumentation and tables
That was pretty good, thanks!
Just few small comments below, no big changes will be needed.

diff --git a/mysql-test/suite/multi_source/simple.result 
b/mysql-test/suite/multi_source/simple.result
index a66d49e88cb..b57e146feb5 100644
--- a/mysql-test/suite/multi_source/simple.result
+++ b/mysql-test/suite/multi_source/simple.result
@@ -21,7 +21,21 @@ show all slaves status;
  Connection_name       Slave_SQL_State Slave_IO_State  Master_Host     
Master_User     Master_Port     Connect_Retry   Master_Log_File 
Read_Master_Log_Pos     Relay_Log_File  Relay_Log_Pos   Relay_Master_Log_File   
Slave_IO_Running        Slave_SQL_Running       Replicate_Do_DB 
Replicate_Ignore_DB     Replicate_Do_Table      Replicate_Ignore_Table  
Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table     Last_Errno      
Last_Error      Skip_Counter    Exec_Master_Log_Pos     Relay_Log_Space 
Until_Condition Until_Log_File  Until_Log_Pos   Master_SSL_Allowed      
Master_SSL_CA_File      Master_SSL_CA_Path      Master_SSL_Cert 
Master_SSL_Cipher       Master_SSL_Key  Seconds_Behind_Master   
Master_SSL_Verify_Server_Cert   Last_IO_Errno   Last_IO_Error   Last_SQL_Errno  
Last_SQL_Error  Replicate_Ignore_Server_Ids     Master_Server_Id        
Master_SSL_Crl  Master_SSL_Crlpath      Using_Gtid      Gtid_IO_Pos     
Replicate_Do_Domain_Ids Replicate_Ignore_Domain_Ids     Parallel_Mode   
SQL_Delay       SQL_Remaining_Delay     Slave_SQL_Running_State 
Slave_DDL_Groups        Slave_Non_Transactional_Groups  
Slave_Transactional_Groups      Retried_transactions    Max_relay_log_size      
Executed_log_entries    Slave_received_heartbeats       Slave_heartbeat_period  
Gtid_Slave_Pos
  slave1        Slave has read all relay log; waiting for more updates  Waiting for master to send 
event        127.0.0.1       root    MYPORT_1        60      master-bin.000001       
<read_master_log_pos>     mysqld-relay-bin-slave1.000002  <relay_log_pos>   
master-bin.000001       Yes     Yes                                                     0               
0       <read_master_log_pos>     <relay_log_space1>        None            0       No      
                                        0       No      0               0                       1       
                No                              optimistic      0       NULL    Slave has read all 
relay log; waiting for more updates  0       0       0       0       1073741824      7       0       
60.000  
  slave2        Slave has read all relay log; waiting for more updates  Waiting for master to send 
event        127.0.0.1       root    MYPORT_2        60      master-bin.000001       
<read_master_log_pos>     mysqld-relay-bin-slave2.000002  <relay_log_pos>   
master-bin.000001       Yes     Yes                                                     0               
0       <read_master_log_pos>     <relay_log_space1>        None            0       No      
                                        0       No      0               0                       2       
                No                              optimistic      0       NULL    Slave has read all 
relay log; waiting for more updates  0       0       0       0       1073741824      7       0       
60.000  
+#
+# MDEV:16437: merge 5.7 P_S replication instrumentation and tables
+#
+select * from performance_schema.replication_connection_configuration;
+CONNECTION_NAME        HOST    PORT    USER    USING_GTID      SSL_ALLOWED     
SSL_CA_FILE     SSL_CA_PATH     SSL_CERTIFICATE SSL_CIPHER      SSL_KEY 
SSL_VERIFY_SERVER_CERTIFICATE   SSL_CRL_FILE    SSL_CRL_PATH    
CONNECTION_RETRY_INTERVAL       CONNECTION_RETRY_COUNT  HEARTBEAT_INTERVAL      
IGNORE_SERVER_IDS       REPL_DO_DOMAIN_IDS      REPL_IGNORE_DOMAIN_IDS
+slave2 #       #       root    NO      NO                                      
        NO                      60      86400   60.000                  
+slave1 #       #       root    NO      NO                                      
        NO                      60      86400   60.000                  
Isn't the host always 127.0.0.1 ? Why to mask it?


Sure.

Also, may be it'd be easier to read the result, if you print it vertically?
It looks that CONNECTION_RETRY_COUNT is 86400.
And 86400 is clearly a timeout, not a retry count.

Will use "--query_vertical". Thank you.


Regarding the timeout, actually the above table just displays the user specified connection configuration.

In case, user has not provided any value for 'CONNECTION_RETRY_COUNT' it will hold the default value(86400).

Please refer the following link.

https://mariadb.com/kb/en/mysqld-options/#-master-retry-count


  start all slaves;
+#
+# MDEV:16437: merge 5.7 P_S replication instrumentation and tables
+#
+select * from performance_schema.replication_applier_status_by_coordinator;
+CONNECTION_NAME        THREAD_ID       SERVICE_STATE   LAST_SEEN_TRANSACTION   
LAST_ERROR_NUMBER       LAST_ERROR_MESSAGE      LAST_ERROR_TIMESTAMP    
LAST_TRANS_RETRY_COUNT
+slave2 #       ON              0               0000-00-00 00:00:00     0
+slave1 #       ON              0               0000-00-00 00:00:00     0
  stop slave 'slave1';
  show slave 'slave1' status;
  Slave_IO_State        
diff --git a/mysql-test/suite/rpl/include/rpl_deadlock.test 
b/mysql-test/suite/rpl/include/rpl_deadlock.test
index e9191d5fcd8..bccbe044a36 100644
--- a/mysql-test/suite/rpl/include/rpl_deadlock.test
+++ b/mysql-test/suite/rpl/include/rpl_deadlock.test
@@ -59,6 +59,16 @@ let $status_var_comparsion= >;
  connection slave;
  SELECT COUNT(*) FROM t2;
  COMMIT;
+
+--echo
+--echo # Test that the performance schema coulumn shows > 0 values.
+--echo
+
+--let $assert_text= current number of retries should be more than the value 
saved before deadlock.
+--let $assert_cond= [SELECT COUNT_TRANSACTIONS_RETRIES FROM 
performance_schema.replication_applier_status, COUNT_TRANSACTIONS_RETRIES, 1] > 
"$slave_retried_transactions"
+--source include/assert.inc
what's wrong with simple

   SELECT COUNT_TRANSACTIONS_RETRIES > $slave_retried_transactions FROM 
performance_schema.replication_applier_status

?


'asserts' are preferred by upstream team and they find asserts to be more readable rather than comparing SELECT OUTPUTS from result files.

Hence you will find this pattern. Should I replace them with SELECTS? Please let me know.

+
+source include/check_slave_is_running.inc;
  sync_with_master;
# Check the data
diff --git a/mysql-test/suite/rpl/r/rpl_perfschema_connect_config.result 
b/mysql-test/suite/rpl/r/rpl_perfschema_connect_config.result
new file mode 100644
index 00000000000..4ace84ffac4
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_perfschema_connect_config.result
@@ -0,0 +1,124 @@
+include/master-slave.inc
+[connection master]
+# Asserted this: On master, the table should return an empty set.
+connection slave;
+
+# Verify that SELECT works for every field and produces an output
+# similar to the corresponding field in SHOW SLAVE STATUS(SSS).
+
+include/assert.inc [Value returned by SSS and PS table for Host should be 
same.]
+include/assert.inc [Value returned by SSS and PS table for Port should be 
same.]
+include/assert.inc [Value returned by SSS and PS table for User should be 
same.]
+include/assert.inc [Value returned by SSS and PS table for Using_Gtid should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_Allowed should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_CA_File should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_CA_Path should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_Certificate 
should be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_Cipher should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_Key should be 
same.]
+include/assert.inc [Value returned by SSS and PS table for 
SSL_Verify_Server_Certificate should be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_Crl_File should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for SSL_Crl_Path should 
be same.]
+include/assert.inc [Value returned by SSS and PS table for 
Connection_Retry_Interval should be same.]
+include/assert.inc [Value returned by PS table for Connection_Retry_Count 
should be 10.]
this is just unreadable

+
+# Heartbeat_Interval is part of I_S and P_S. We will compare the
+# two to make sure both match.
...
diff --git a/mysql-test/suite/rpl/t/rpl_perfschema_applier_config.test 
b/mysql-test/suite/rpl/t/rpl_perfschema_applier_config.test
new file mode 100644
index 00000000000..132d9912222
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_perfschema_applier_config.test
@@ -0,0 +1,96 @@
+# ==== Purpose ====
+#
+# This test script serves as the functionality testing for the table
+# performance_schema.replication_applier_configuration. Test for ddl and dml
+# operations is a part of the perfschema suite. The ddl/dml tests are named:
+# 1) ddl_replication_applier_configuration.test and
+# 2) dml_replication_applier_configuration.test.
+#
+# The follwing scenarios are tested in this script:
+#
+#  - Verify that output is same as SSS on a fresh slave.
+#  - Verify that the value of this field is correct after STOP SLAVE.
+#  - Verify that, when desired delay is set, the value is shown corectly.
+#  - Verify that the value is preserved after STOP SLAVE.
+#  - Verify that, when desired delay is reset, the value is shown corectly.
+#
+#  ==== Related Worklog ====
+#
+#  MDEV-16437: merge 5.7 P_S replication instrumentation and tables
+#
+
+source include/master-slave.inc;
+source include/have_binlog_format_mixed.inc;
master-slave should be included last


Sure.

+
+let $assert_text= On master, the table should return an empty set.;
+let $assert_cond= count(*) = 0 from 
performance_schema.replication_applier_configuration;
+source include/assert.inc;
again, what's wrong with just

   select * from performance_schema.replication_applier_configuration;

or, may be, if you want to be very explicit

  select count(*) as 'must be 0' from 
performance_schema.replication_applier_configuration;

+
+--connection slave
+
+--echo
+--echo # Verify that SELECT works and produces an output similar to
+--echo # the corresponding field in SHOW SLAVE STATUS(SSS) in all scenarios.
+--echo
+
+--echo
+--echo # Verify that output is same as SSS on a fresh slave.
+--echo
+
+let $sss_value= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1);
+let $ps_value= query_get_value(select Desired_Delay from 
performance_schema.replication_applier_configuration, Desired_Delay, 1);
+let $assert_text= Value returned by SSS and PS table for Desired_Delay should 
be same.;
+let $assert_cond= "$sss_value" = "$ps_value";
+source include/assert.inc;
I'll stop commenting on every assert.inc, but please, please, stop overusing 
them.

+
+--echo
+--echo # Verify that the value of this field is correct after STOP SLAVE.
+--echo
+
+source include/stop_slave.inc;
+
+let $sss_value= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1);
+let $ps_value= query_get_value(select Desired_Delay from 
performance_schema.replication_applier_configuration, Desired_Delay, 1);
+let $assert_text= Value returned by SSS and PS table for Desired_Delay should 
be same.;
+let $assert_cond= "$sss_value" = "$ps_value";
+source include/assert.inc;
+
+--echo
+--echo # Verify that, when desired delay is set, the value is shown corectly.
+--echo
+
+eval change master to master_delay= 2;
+source include/start_slave.inc;
+
+let $sss_value= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1);
+let $ps_value= query_get_value(select Desired_Delay from 
performance_schema.replication_applier_configuration, Desired_Delay, 1);
+let $assert_text= Value returned by SSS and PS table for Desired_Delay should 
be same.;
+let $assert_cond= "$sss_value" = "$ps_value";
+source include/assert.inc;
+
+--echo
+--echo # Verify that the value is preserved after STOP SLAVE.
+--echo
+
+source include/stop_slave.inc;
+
+let $ss_value= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1);
+let $ps_value= query_get_value(select Desired_Delay from 
performance_schema.replication_applier_configuration, Desired_Delay, 1);
+let $assert_text= Value returned by SSS and PS table for Desired_Delay should 
be same.;
+let $assert_cond= "$sss_value" = "$ps_value";
+source include/assert.inc;
+
+--echo
+--echo # Verify that, when desired delay is reset, the value is shown corectly.
+--echo
+
+eval change master to master_delay= 0;
+source include/start_slave.inc;
+
+let $sss_value= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1);
+let $ps_value= query_get_value(select Desired_Delay from 
performance_schema.replication_applier_configuration, Desired_Delay, 1);
+let $assert_text= Value returned by SSS and PS table for Desired_Delay should 
be same.;
+let $assert_cond= "$sss_value" = "$ps_value";
+source include/assert.inc;
+
+source include/rpl_end.inc;
diff --git a/mysql-test/suite/rpl/t/rpl_perfschema_applier_status.test 
b/mysql-test/suite/rpl/t/rpl_perfschema_applier_status.test
new file mode 100644
index 00000000000..52ee14cef2a
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_perfschema_applier_status.test
@@ -0,0 +1,72 @@
+# ==== Purpose ====
+#
+# This test script serves as the functionality testing for the table
+# performance_schema.replication_applier_status. Test for ddl and dml
+# operations is a part of the perfschema suite. The ddl/dml tests are named:
+# 1) ddl_replication_applier_status.test and
+# 2) dml_replication_applier_status.test.
+#
+# The follwing scenarios are tested in this script:
+#
+#  - Verify that output is same as SSS on a fresh slave.
+#  - Verify that the value of this field is correct after STOP SLAVE.
+#  - Remaining delay is not tested.
+#  - Count_trnsaction is partially tested here making sure it can be queried.
+#    More testing in rpl/include/rpl_deadlock.test
+#
+#  ==== Related Worklog ====
+#
+# MDEV-16437: merge 5.7 P_S replication instrumentation and tables
+#
+
+source include/master-slave.inc;
+source include/have_binlog_format_mixed.inc;
master-slave must always be last (I'll stop commenting on that too)


Sure.

+
+let $assert_text= On master, the table should return an empty set.;
+let $assert_cond= count(*) = 0 from 
performance_schema.replication_applier_status;
+source include/assert.inc;
+
+--connection slave
+
+--echo
+--echo # Verify that SELECT works and produces an output similar to
+--echo # the corresponding field in SHOW SLAVE STATUS(SSS) in all scenarios.
+--echo
+
+--echo
+--echo # Verify that output is same as SSS on a fresh slave.
+--echo
+
+let $sss_value= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
+let $ps_value= query_get_value(select Service_State from 
performance_schema.replication_applier_status, Service_State, 1);
+let $assert_text= SSS shows Slave_SQL_Running as "Yes". So, Service_State from this PS 
table should be "ON".;
+let $assert_cond= "$sss_value" = "Yes" AND "$ps_value"= "ON";
+source include/assert.inc;
+
+let $ss_value= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', 
Value, 1);
+let $ps_value= query_get_value(select count_transactions_retries from 
performance_schema.replication_applier_status, count_transactions_retries, 1);
+let $assert_text= COUNT_TRANSACTION_RETRIES should be equal to 
Slave_retried_transactions.;
+let $assert_cond= "$ps_value"= "$ss_value";
+source include/assert.inc;
+
+--echo
+--echo # Verify that the fields show the correct values after STOP SLAVE.
+--echo
+
+source include/stop_slave.inc;
+
+let $sss_value= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
+let $ps_value= query_get_value(select Service_State from 
performance_schema.replication_applier_status, Service_State, 1);
+let $assert_text= SSS shows Slave_SQL_Running as "No". So, Service_State from this PS 
table should be "OFF".;
+let $assert_cond= "$sss_value" = "No" AND "$ps_value"= "OFF";
+source include/assert.inc;
+
+let $ss_value= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', 
Value, 1);
+let $ps_value= query_get_value(select count_transactions_retries from 
performance_schema.replication_applier_status, count_transactions_retries, 1);
+let $assert_text= COUNT_TRANSACTION_RETRIES should be equal to 
Slave_retried_transactions.;
+let $assert_cond= "$ps_value"= "$ss_value";
+source include/assert.inc;
+
+source include/start_slave.inc;
+source include/rpl_end.inc;
+
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index 4d47689ac18..946d138d618 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -50,13 +57,6 @@ class Domain_id_filter
    */
    DYNAMIC_ARRAY m_domain_ids[2];
-public:
-  /* domain id list types */
-  enum enum_list_type {
-    DO_DOMAIN_IDS= 0,
-    IGNORE_DOMAIN_IDS
-  };
-
Why did you move it up? Does it make any difference?


Actually I didn't move, I changed the access specifier of 'm_domain_ids[2] from 'private' to 'public',

so that I can use it from PFS code as shown below.

File:table_replication_connection_configuration.cc

convert_array_to_str(&mi->domain_id_filter.m_domain_ids[Domain_id_filter::DO_DOMAIN_IDS]);
convert_array_to_str(&mi->domain_id_filter.m_domain_ids[Domain_id_filter::IGNORE_DOMAIN_IDS]);


    Domain_id_filter();
~Domain_id_filter();
diff --git 
a/storage/perfschema/table_replication_applier_status_by_coordinator.cc 
b/storage/perfschema/table_replication_applier_status_by_coordinator.cc
index beb8620b240..30cf56ce0c2 100644
--- a/storage/perfschema/table_replication_applier_status_by_coordinator.cc
+++ b/storage/perfschema/table_replication_applier_status_by_coordinator.cc
@@ -55,12 +55,14 @@ table_replication_applier_status_by_coordinator::m_share=
    sizeof(pos_t), /* ref length */
    &m_table_lock,
    { C_STRING_WITH_LEN("CREATE TABLE 
replication_applier_status_by_coordinator("
-  "CHANNEL_NAME CHAR(64) collate utf8_general_ci not null,"
+  "CONNECTION_NAME VARCHAR(256) collate utf8_general_ci not null,"
please, don't rename existing columns, let's keep calling it CHANNEL_NAME 
everywhere.


Sure.


    "THREAD_ID BIGINT UNSIGNED,"
    "SERVICE_STATE ENUM('ON','OFF') not null,"
+  "LAST_SEEN_TRANSACTION CHAR(57) not null,"
I think it's better to put new columns at the end.


Sure.


    "LAST_ERROR_NUMBER INTEGER not null,"
    "LAST_ERROR_MESSAGE VARCHAR(1024) not null,"
-  "LAST_ERROR_TIMESTAMP TIMESTAMP(0) not null)") },
+  "LAST_ERROR_TIMESTAMP TIMESTAMP(0) not null,"
+  "LAST_TRANS_RETRY_COUNT INTEGER not null)") },
    false  /* perpetual */
  };
@@ -104,15 +106,7 @@ int table_replication_applier_status_by_coordinator::rnd_next(void)
    {
      mi= (Master_info *)my_hash_element(&master_info_index->master_info_hash, 
m_pos.m_index);
- /*
-      Construct and display SQL Thread's (Coordinator) information in
-      'replication_applier_status_by_coordinator' table only in the case of
-      multi threaded slave mode. Code should do nothing in the case of single
-      threaded slave mode. In case of single threaded slave mode SQL Thread's
-      status will be reported as part of
-      'replication_applier_status_by_worker' table.
-    */
is this no longer true?


Yes. Irrespective of single or multi threaded slave mode,

Worker thread information will be available in : replication_applier_status_by_worker (If slave_parallel_threads > 0)

Co-ordinator thread information will be available in : replication_applier_status_by_coordinator (For both single and multi threaded case)


-    if (mi && mi->host[0] && /*mi->rli.get_worker_count() > */ 0)
+    if (mi && mi->host[0])
      {
        make_row(mi);
        m_next_pos.set_after(&m_pos);
@@ -147,13 +141,20 @@ int 
table_replication_applier_status_by_coordinator::rnd_pos(const void *pos)
  void table_replication_applier_status_by_coordinator::make_row(Master_info 
*mi)
  {
    m_row_exists= false;
+  rpl_gtid gtid;
+  char buf[10+1+10+1+20+1];
+  String str(buf, sizeof(buf), system_charset_info);
There's a special template for it:

   StringBuffer<10+1+10+1+20+1> str;


Sure.


+  bool first= true;
+
+  str.length(0);
not needed if you use StringBuffer<>


Sure.


DBUG_ASSERT(mi != NULL); mysql_mutex_lock(&mi->rli.data_lock); - m_row.channel_name_length= static_cast<uint>(mi->connection_name.length);
-  memcpy(m_row.channel_name, mi->connection_name.str, 
m_row.channel_name_length);
+  gtid= mi->rli.last_seen_gtid;
+  m_row.connection_name_length= static_cast<uint>(mi->connection_name.length);
+  memcpy(m_row.connection_name, mi->connection_name.str, 
m_row.connection_name_length);
if (mi->rli.slave_running)
    {
@@ -175,6 +176,18 @@ void 
table_replication_applier_status_by_coordinator::make_row(Master_info *mi)
    else
      m_row.service_state= PS_RPL_NO;
+ if ((gtid.seq_no > 0 &&
+        !rpl_slave_state_tostring_helper(&str, &gtid, &first)))
+  {
+    strmake(m_row.last_seen_transaction,str.ptr(), str.length());
+    m_row.last_seen_transaction_length= str.length();
+  }
+  else
+  {
+    m_row.last_seen_transaction_length= 0;
+    memcpy(m_row.last_seen_transaction, "", 1);
a strange way to write m_row.last_seen_transaction[0]= 0, but ok :)

+  }
+
    mysql_mutex_lock(&mi->rli.err_lock);
m_row.last_error_number= (long int) mi->rli.last_error().number;
Regards,
Sergei
VP of MariaDB Server Engineering
andsecur...@mariadb.org

Please provide your suggestions on above replies.


Thank you

S.Sujatha




_______________________________________________
Mailing list: https://launchpad.net/~maria-developers
Post to     : maria-developers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~maria-developers
More help   : https://help.launchpad.net/ListHelp

Reply via email to