This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 48c4d31344eeedfb988d7bc2a715f265a23fb0d9
Author: Riza Suminto <[email protected]>
AuthorDate: Fri Jun 6 19:23:17 2025 -0700

    IMPALA-14130: Remove wait_num_tables arg in start-impala-cluster.py
    
    IMPALA-13850 changed the behavior of bin/start-impala-cluster.py to wait
    for the number of tables to be at least one. This is needed to detect
    that the catalog has seen at least one update. There is special logic in
    dataload to start Impala without tables in that circumstance.
    
    This broke the perf-AB-test job, which starts Impala before loading
    data. There are other times when we want to start Impala without tables,
    and it is inconvenient to need to specify --wait_num_tables each time.
    
    It is actually not necessary to wait for catalog metric of Coordinator
    to reach certain value. Frontend (Coordinator) will not open its service
    port until it heard the first catalog topic update form CatalogD.
    IMPALA-13850 (part 2) also ensure that CatalogD with
    --catalog_topic_mode=minimal will block serving Coordinator request
    until it begin its first reset() operation. Therefore, waiting
    Coordinator's catalog version is not needed anymore and
    --wait_num_tables parameter can be removed.
    
    This patch also slightly change the "progress log" of
    start-impala-cluster.py to print the Coordinator's catalog version
    instead of num DB and tables cached. The sleep interval time now include
    time spent checking Coordinator's metric.
    
    Testing:
    - Pass dataload with updated script.
    - Manually run start-impala-cluster.py in both legacy and local catalog
      mode and confirm it works.
    - Pass custom cluster test_concurrent_ddls.py and test_catalogd_ha.py
    
    Change-Id: I4a3956417ec83de4fb3fc2ef1e72eb3641099f02
    Reviewed-on: http://gerrit.cloudera.org:8080/22994
    Reviewed-by: Csaba Ringhofer <[email protected]>
    Tested-by: Riza Suminto <[email protected]>
---
 bin/start-impala-cluster.py      |  7 +------
 testdata/bin/create-load-data.sh |  2 --
 tests/common/impala_cluster.py   | 34 +++++++++++++---------------------
 3 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index 2816e645c..414c40910 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -205,9 +205,6 @@ parser.add_option("--use_calcite_planner", default="False", 
type="choice",
                   choices=["true", "True", "false", "False"],
                   help="If true, use the Calcite planner for query 
optimization "
                   "instead of the Impala planner")
-parser.add_option("--wait_num_table", default=1, type="int",
-                  help="If starting cluster, wait until coordinator's 
catalog.num-tables "
-                  "reach this value or more.")
 
 # For testing: list of comma-separated delays, in milliseconds, that delay 
impalad catalog
 # replica initialization. The ith delay is applied to the ith impalad.
@@ -1256,9 +1253,7 @@ if __name__ == "__main__":
       # https://issues.apache.org/jira/browse/IMPALA-13755
       expected_num_ready_impalads = options.cluster_size
       expected_cluster_size = options.cluster_size
-    LOG.info("wait_num_table={}".format(options.wait_num_table))
-    impala_cluster.wait_until_ready(expected_cluster_size, 
expected_num_ready_impalads,
-                                    wait_num_table=options.wait_num_table)
+    impala_cluster.wait_until_ready(expected_cluster_size, 
expected_num_ready_impalads)
   except Exception as e:
     LOG.exception("Error starting cluster")
     sys.exit(1)
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 46879c94f..44ea40c4c 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -168,8 +168,6 @@ function start-impala {
   # Disable strict datafile location checks for Iceberg tables
   
DATAFILE_LOCATION_CHECK="-iceberg_allow_datafiles_in_table_location_only=false"
   START_CLUSTER_ARGS_INT+=("--catalogd_args=$DATAFILE_LOCATION_CHECK")
-  # No need to wait for min num table.
-  START_CLUSTER_ARGS_INT+=("--wait_num_table=-1")
   if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
     START_CLUSTER_ARGS_INT+=("--impalad_args=--abort_on_config_error=false -s 
1")
   else
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index dd098048a..c6c44bc76 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -204,8 +204,7 @@ class ImpalaCluster(object):
         client.close()
     return n
 
-  def wait_until_ready(self, expected_num_impalads=1, 
expected_num_ready_impalads=None,
-                       wait_num_table=-1):
+  def wait_until_ready(self, expected_num_impalads=1, 
expected_num_ready_impalads=None):
     """Waits for this 'cluster' to be ready to submit queries.
 
       A cluster is deemed "ready" if:
@@ -250,10 +249,7 @@ class ImpalaCluster(object):
         continue
       if flags.get('stress_catalog_init_delay_ms', '0') != '0':
         continue
-      if flags.get('use_local_catalog', 'false') != 'false':
-        wait_num_table = -1
-      impalad.wait_for_coordinator_services(sleep_interval, 
check_processes_still_running,
-                                            wait_num_table=wait_num_table)
+      impalad.wait_for_coordinator_services(sleep_interval, 
check_processes_still_running)
       # Decrease sleep_interval after first coordinator ready as the others 
are also
       # likely to be (nearly) ready.
       sleep_interval = 0.2
@@ -648,8 +644,7 @@ class ImpaladProcess(BaseImpalaProcess):
       early_abort_fn()
       sleep(sleep_interval)
 
-  def wait_for_coordinator_services(self, sleep_interval, early_abort_fn,
-                                    wait_num_table=-1):
+  def wait_for_coordinator_services(self, sleep_interval, early_abort_fn):
     """Waits for client ports to be opened. Assumes that the webservice ports 
are open."""
     start_time = time.time()
     LOG.info(
@@ -660,21 +655,18 @@ class ImpaladProcess(BaseImpalaProcess):
       beeswax_port_is_open = self.service.beeswax_port_is_open()
       hs2_port_is_open = self.service.hs2_port_is_open()
       hs2_http_port_is_open = self.service.hs2_http_port_is_open()
-      # Fetch the number of catalog objects.
-      num_dbs, num_tbls = self.service.get_metric_values(
-          ["catalog.num-databases", "catalog.num-tables"])
-      if (beeswax_port_is_open and hs2_port_is_open and hs2_http_port_is_open
-          and num_tbls >= wait_num_table):
+      if beeswax_port_is_open and hs2_port_is_open and hs2_http_port_is_open:
         return
       early_abort_fn()
-      # The coordinator is likely to wait for the catalog update.
-      LOG.info(("Client services not ready. Waiting for catalog cache: "
-          "({num_dbs} DBs / {num_tbls} tables / 
wait_num_table={wait_num_table}). "
-          "Trying again ...").format(
-              num_dbs=num_dbs,
-              num_tbls=num_tbls,
-              wait_num_table=wait_num_table))
-      sleep(sleep_interval)
+      # The coordinator is likely waiting for the catalog update. Display 
currently
+      # known catalog version to show progress.
+      metric_check_time = time.time()
+      catalog_version = self.service.get_metric_value("catalog.curr-version")
+      LOG.info(("Client services not ready. Might be waiting for catalog topic 
update: "
+          "(catalog.curr-version={catalog_version}). Trying again ...").format(
+              catalog_version=catalog_version))
+      # Include metric_check_time in sleep_interval.
+      sleep(max(0, sleep_interval - (time.time() - metric_check_time)))
 
     raise RuntimeError(
         "Unable to open client ports within {num_seconds} seconds.".format(

Reply via email to