This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit c0174bb4383c0d284bd3e8a6312bae49a55dc8ec Author: stiga-huang <[email protected]> AuthorDate: Mon Apr 14 20:34:27 2025 +0800 IMPALA-13960: Add catalog timeline item for prepareInsertEventData When enable_insert_events is set to true (default), Impala will fire HMS INSERT events for each INSERT statement. Preparing data of the InsertEvents actually takes time since it fetches checksums of all the new files. This patch adds a catalog timeline item to reveal this step. Before this patch, the duration of "Got Metastore client" before "Fired Metastore events" could be long: Catalog Server Operation: 65.762ms - Got catalog version read lock: 12.724us (12.724us) - Got catalog version write lock and table write lock: 224.572us (211.848us) - Got Metastore client: 418.346us (193.774us) - Got Metastore client: 29.001ms (28.583ms) <---- Unexpected long - Fired Metastore events: 52.665ms (23.663ms) After this patch, we shows what actually takes the time is "Prepared InsertEvent data": Catalog Server Operation: 61.597ms - Got catalog version read lock: 7.129us (7.129us) - Got catalog version write lock and table write lock: 114.476us (107.347us) - Got Metastore client: 200.040us (85.564us) - Prepared InsertEvent data: 25.335ms (25.135ms) - Got Metastore client: 25.342ms (7.009us) - Fired Metastore events: 46.625ms (21.283ms) Tests: - Added e2e test Change-Id: Iaef1cae7e8ca1c350faae8666ab1369717736978 Reviewed-on: http://gerrit.cloudera.org:8080/22778 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/compat/MetastoreShim.java | 5 ++--- .../java/org/apache/impala/service/CatalogOpExecutor.java | 1 + tests/query_test/test_observability.py | 11 ++++++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java index 4a0356e6e..23fc2562b 100644 --- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java +++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java @@ -487,9 +487,8 @@ public class MetastoreShim extends Hive3MetastoreShimBase { Preconditions.checkNotNull(tableName); Preconditions.checkState(!insertEventDataList.isEmpty(), "Atleast one insert event " + "info must be provided."); - LOG.debug(String.format( - "Firing %s insert event(s) for %s.%s", insertEventDataList.size(), dbName, - tableName)); + LOG.debug("Firing {} insert event(s) for {}.{}", insertEventDataList.size(), dbName, + tableName); FireEventRequestData data = new FireEventRequestData(); FireEventRequest rqst = new FireEventRequest(true, data); rqst.setDbName(dbName); diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index c8cdde9c8..883e21fcd 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -7721,6 +7721,7 @@ public class CatalogOpExecutor { } if (insertEventReqDatas.isEmpty()) return; + catalogTimeline.markEvent("Prepared InsertEvent data"); MetaStoreClient metaStoreClient = catalog_.getMetaStoreClient(catalogTimeline); TableInsertEventInfo insertEventInfo = new TableInsertEventInfo( diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py index 247124c65..5394e22f5 100644 --- a/tests/query_test/test_observability.py +++ b/tests/query_test/test_observability.py @@ -547,7 +547,7 @@ class TestObservability(ImpalaTestSuite): "Didn't find event '" + regex + "' for query '" + query + \ "' in profile: \n" + runtime_profile - def test_create_table_profile_events(self, unique_database): + def test_ddl_profile_events(self, unique_database): """Test that specific DDL timeline event labels exist in the profile. Note that labels of HMS events are not used so this test is expected to pass with or without event processor enabled""" @@ -593,6 +593,15 @@ class TestObservability(ImpalaTestSuite): "Loaded file metadata for 2 partitions", "Finished updateCatalog request" ]) + # INSERT into existing partitions + stmt = "insert into %s.t1 partition(p) values (0,0), (1,1)" % unique_database + self.__verify_event_labels_in_profile(stmt, [ + "Got Metastore client", + "Prepared InsertEvent data", + "Fired Metastore events", + "Loaded file metadata for 2 partitions", + "Finished updateCatalog request" + ]) # Compute stats stmt = "compute stats %s.t1" % unique_database self.__verify_event_labels_in_profile(stmt, [
