This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c53e87aa IMPALA-12833: Enabled 
'catalogd_ha_reset_metadata_on_failover' by default
7c53e87aa is described below

commit 7c53e87aa166bb77cd2e31646ff913302912d3fd
Author: wzhou-code <[email protected]>
AuthorDate: Wed Feb 21 20:08:17 2024 -0800

    IMPALA-12833: Enabled 'catalogd_ha_reset_metadata_on_failover' by default
    
    The standby catalogd may have stale metadata for some reason, like
    event processor could have hung or could be just behind in processing
    events. Also the standby catalogd doesn't get invalidate requests from
    coordinators so we should probably reset its metadata when it becomes
    active to avoid stale metadata.
    
    This patch set the default value of catalog server starting flag
    'catalogd_ha_reset_metadata_on_failover' as true so that catalogd
    will reset its metadata when it becomes active. Also makes the flag
    as hidden option.
    
    Testing:
     - Looped to run unit-tests for catalog HA and statestore HA without
       failure.
     - Passed core tests
    
    Change-Id: Ibc7c529f34b70734a700ac0d9d58b7e5b0215f8d
    Reviewed-on: http://gerrit.cloudera.org:8080/21051
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Michael Smith <[email protected]>
    Reviewed-by: Abhishek Rawat <[email protected]>
---
 be/src/catalog/catalog-server.cc         | 8 ++++++--
 tests/custom_cluster/test_catalogd_ha.py | 4 ++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/be/src/catalog/catalog-server.cc b/be/src/catalog/catalog-server.cc
index 3402959b0..529d9523c 100644
--- a/be/src/catalog/catalog-server.cc
+++ b/be/src/catalog/catalog-server.cc
@@ -152,8 +152,12 @@ DEFINE_bool(enable_skipping_older_events, false, "This 
configuration is used to
 DEFINE_int32(catalog_operation_log_size, 100, "Number of catalog operation log 
records "
     "to retain in catalogd. If -1, the operation log has unbounded size.");
 
-DEFINE_bool(catalogd_ha_reset_metadata_on_failover, false, "If true, reset all 
metadata "
-    "when the catalogd becomes active.");
+// The standby catalogd may have stale metadata for some reason, like event 
processor
+// could have hung or could be just behind in processing events. Also the 
standby
+// catalogd doesn't get invalidate requests from coordinators so we should 
probably
+// reset its metadata when it becomes active to avoid stale metadata.
+DEFINE_bool_hidden(catalogd_ha_reset_metadata_on_failover, true, "If true, 
reset all "
+    "metadata when the catalogd becomes active.");
 
 DEFINE_int32(topic_update_log_gc_frequency, 1000, "Frequency at which the 
entries "
     "of the catalog topic update log are garbage collected. An entry may 
survive "
diff --git a/tests/custom_cluster/test_catalogd_ha.py 
b/tests/custom_cluster/test_catalogd_ha.py
index 62575ef66..bf812db5f 100644
--- a/tests/custom_cluster/test_catalogd_ha.py
+++ b/tests/custom_cluster/test_catalogd_ha.py
@@ -184,6 +184,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
   @CustomClusterTestSuite.with_args(
     statestored_args="--use_subscriber_id_as_catalogd_priority=true "
                      "--statestore_heartbeat_frequency_ms=1000",
+    catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
     start_args="--enable_catalogd_ha")
   def test_catalogd_auto_failover(self):
     """Tests for Catalog Service auto fail over without failed RPCs."""
@@ -201,6 +202,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
     statestored_args="--use_subscriber_id_as_catalogd_priority=true "
                      "--statestore_heartbeat_frequency_ms=1000 "
                      
"--debug_actions=SEND_UPDATE_CATALOGD_RPC_FIRST_ATTEMPT:[email protected]",
+    catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
     start_args="--enable_catalogd_ha")
   def test_catalogd_auto_failover_with_failed_rpc(self):
     """Tests for Catalog Service auto fail over with failed RPCs."""
@@ -283,6 +285,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
   @CustomClusterTestSuite.with_args(
     statestored_args="--use_subscriber_id_as_catalogd_priority=true "
                      "--statestore_heartbeat_frequency_ms=1000",
+    catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
     start_args="--enable_catalogd_ha")
   def test_catalogd_manual_failover(self):
     """Tests for Catalog Service manual fail over without failed RPCs."""
@@ -300,6 +303,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
     statestored_args="--use_subscriber_id_as_catalogd_priority=true "
                      "--statestore_heartbeat_frequency_ms=1000 "
                      
"--debug_actions=SEND_UPDATE_CATALOGD_RPC_FIRST_ATTEMPT:[email protected]",
+    catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
     start_args="--enable_catalogd_ha")
   def test_catalogd_manual_failover_with_failed_rpc(self):
     """Tests for Catalog Service manual fail over with failed RPCs."""

Reply via email to