This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new bafd19030 IMPALA-13143: Fix flaky test_catalogd_failover_with_sync_ddl
bafd19030 is described below
commit bafd1903069163f38812d7fa42f9c4d2f7218fcf
Author: wzhou-code <[email protected]>
AuthorDate: Thu Jun 6 21:56:49 2024 -0700
IMPALA-13143: Fix flaky test_catalogd_failover_with_sync_ddl
The test_catalogd_failover_with_sync_ddl test which was added to
custom_cluster/test_catalogd_ha.py in IMPALA-13134 failed on s3.
The test relies on specific timing with a sleep injected via a
debug action so that the DDL query is still running when catalogd
failover is triggered. The failures were caused by slowly restarting
for catalogd on s3 so that the query finished before catalogd
failover was triggered.
This patch fixed the issue by increasing the sleep time for s3 builds
and other slow builds.
Testing:
- Ran the test 100 times in a loop on s3.
Change-Id: I15bb6aae23a2f544067f993533e322969372ebd5
Reviewed-on: http://gerrit.cloudera.org:8080/21491
Reviewed-by: Riza Suminto <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
tests/custom_cluster/test_catalogd_ha.py | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/tests/custom_cluster/test_catalogd_ha.py
b/tests/custom_cluster/test_catalogd_ha.py
index f8ebff6b9..10c1109a6 100644
--- a/tests/custom_cluster/test_catalogd_ha.py
+++ b/tests/custom_cluster/test_catalogd_ha.py
@@ -20,16 +20,24 @@ import json
import logging
import re
import requests
+import time
from beeswaxd.BeeswaxService import QueryState
+from builtins import round
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.environ import build_flavor_timeout
-from tests.util.filesystem_utils import get_fs_path
+from tests.util.filesystem_utils import IS_S3, get_fs_path
from time import sleep
LOG = logging.getLogger('catalogd_ha_test')
DEFAULT_STATESTORE_SERVICE_PORT = 24000
DEFAULT_CATALOG_SERVICE_PORT = 26000
+SLOW_BUILD_SYNC_DDL_DELAY_S = 20
+SYNC_DDL_DELAY_S = build_flavor_timeout(
+ 10, slow_build_timeout=SLOW_BUILD_SYNC_DDL_DELAY_S)
+# s3 can behave as a slow build.
+if IS_S3:
+ SYNC_DDL_DELAY_S = SLOW_BUILD_SYNC_DDL_DELAY_S
class TestCatalogdHA(CustomClusterTestSuite):
@@ -438,7 +446,8 @@ class TestCatalogdHA(CustomClusterTestSuite):
@CustomClusterTestSuite.with_args(
statestored_args="--use_subscriber_id_as_catalogd_priority=true",
-
catalogd_args="--debug_actions='catalogd_wait_sync_ddl_version_delay:SLEEP@5000'",
+
catalogd_args="--debug_actions='catalogd_wait_sync_ddl_version_delay:SLEEP@{0}'"
+ .format(SYNC_DDL_DELAY_S * 1000),
start_args="--enable_catalogd_ha")
def test_catalogd_failover_with_sync_ddl(self, unique_database):
"""Tests for Catalog Service force fail-over when running DDL with SYNC_DDL
@@ -459,6 +468,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
handle = client.execute_async(ddl_query.format(database=unique_database))
# Restart standby catalogd with force_catalogd_active as true.
+ start_s = time.time()
catalogds[1].kill()
catalogds[1].start(wait_until_ready=True,
additional_args="--force_catalogd_active=true")
@@ -467,9 +477,14 @@ class TestCatalogdHA(CustomClusterTestSuite):
catalogd_service_1.wait_for_metric_value(
"catalog-server.active-status", expected_value=False, timeout=15)
assert(not
catalogd_service_1.get_metric_value("catalog-server.active-status"))
+ elapsed_s = time.time() - start_s
+ assert elapsed_s < SYNC_DDL_DELAY_S, \
+ "Catalogd failover took %s seconds to complete" % (elapsed_s)
+ LOG.info("Catalogd failover took %s seconds to complete" %
round(elapsed_s, 1))
# Verify that the query is failed due to the Catalogd HA fail-over.
- self.wait_for_state(handle, QueryState.EXCEPTION, 30, client=client)
+ self.wait_for_state(
+ handle, QueryState.EXCEPTION, SYNC_DDL_DELAY_S * 2 + 10, client=client)
client.close()
@CustomClusterTestSuite.with_args(