This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit a0cdb7b5943d03e806388f304b73328f141372ba Author: Sai Hemanth Gantasala <[email protected]> AuthorDate: Tue Jun 20 20:15:41 2023 -0700 IMPALA-12231: Bump GBN to get HMS thrift API changes We need a couple of hive changes HIVE-27319 and HIVE-27337 for catalogD to work with latest HMS server to fix IMPALA-11768 and IMPALA-11939 respectively. Bump CDP_BUILD_NUMBER (GBN) to 44206393 Bump various CDP versiona numbers to be based on 7.2.18.0-273 TESTING: Exhaustive tests ran clean Added a couple of tests for IMPALA-11939 and IMPALA-11768 Change-Id: I117873b628aed3e24280f9fcd79643f918c8d5f3 Reviewed-on: http://gerrit.cloudera.org:8080/20420 Reviewed-by: Quanlong Huang <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/impala-config.sh | 22 +++++----- fe/pom.xml | 8 ++++ .../java/org/apache/impala/catalog/HdfsTable.java | 3 +- java/shaded-deps/hive-exec/pom.xml | 4 ++ java/test-hive-udfs/pom.xml | 4 ++ tests/custom_cluster/test_events_custom_configs.py | 51 ++++++++++++++++++++++ tests/custom_cluster/test_metastore_service.py | 4 +- 7 files changed, 81 insertions(+), 15 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 02a994135..13c2f872c 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -220,19 +220,19 @@ fi : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com} export IMPALA_TOOLCHAIN_HOST -export CDP_BUILD_NUMBER=40643771 +export CDP_BUILD_NUMBER=44206393 export CDP_MAVEN_REPOSITORY=\ "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven" -export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.18.0-41 -export CDP_HADOOP_VERSION=3.1.1.7.2.18.0-41 -export CDP_HBASE_VERSION=2.4.6.7.2.18.0-41 -export CDP_HIVE_VERSION=3.1.3000.7.2.18.0-41 -export CDP_ICEBERG_VERSION=1.1.0.7.2.18.0-41 -export CDP_KNOX_VERSION=1.3.0.7.2.18.0-41 -export CDP_OZONE_VERSION=1.3.0.7.2.18.0-41 -export CDP_PARQUET_VERSION=1.10.99.7.2.18.0-41 -export CDP_RANGER_VERSION=2.3.0.7.2.18.0-41 -export CDP_TEZ_VERSION=0.9.1.7.2.18.0-41 +export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.18.0-273 +export CDP_HADOOP_VERSION=3.1.1.7.2.18.0-273 +export CDP_HBASE_VERSION=2.4.17.7.2.18.0-273 +export CDP_HIVE_VERSION=3.1.3000.7.2.18.0-273 +export CDP_ICEBERG_VERSION=1.1.0.7.2.18.0-273 +export CDP_KNOX_VERSION=1.3.0.7.2.18.0-273 +export CDP_OZONE_VERSION=1.3.0.7.2.18.0-273 +export CDP_PARQUET_VERSION=1.10.99.7.2.18.0-273 +export CDP_RANGER_VERSION=2.4.0.7.2.18.0-273 +export CDP_TEZ_VERSION=0.9.1.7.2.18.0-273 # Ref: https://infra.apache.org/release-download-pages.html#closer : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"} diff --git a/fe/pom.xml b/fe/pom.xml index b817f3fb9..feb544ed4 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -491,6 +491,10 @@ under the License. <groupId>com.sun.jersey</groupId> <artifactId>jersey-server</artifactId> </exclusion> + <exclusion> + <groupId>com.cloudera</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> <dependency> @@ -525,6 +529,10 @@ under the License. <groupId>org.apache.hadoop</groupId> <artifactId>*</artifactId> </exclusion> + <exclusion> + <groupId>com.cloudera</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 70ede2d5e..c57671a43 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; @@ -2816,7 +2817,7 @@ public class HdfsTable extends Table implements FeFsTable { } reloadPartitions(client, hmsPartToHdfsPart, fileMetadataLoadOpts); return hmsPartToHdfsPart.size(); - } catch (NoSuchObjectException e) { + } catch (NoSuchObjectException | InvalidObjectException e) { // HMS throws a NoSuchObjectException if the table does not exist // in HMS anymore. In case the partitions don't exist in HMS it does not include // them in the result of getPartitionsByNames. diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml index 2ae440552..18cec153d 100644 --- a/java/shaded-deps/hive-exec/pom.xml +++ b/java/shaded-deps/hive-exec/pom.xml @@ -48,6 +48,10 @@ the same dependencies <groupId>org.apache.atlas</groupId> <artifactId>*</artifactId> </exclusion> + <exclusion> + <groupId>com.cloudera</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> </dependencies> diff --git a/java/test-hive-udfs/pom.xml b/java/test-hive-udfs/pom.xml index be43607c5..6af53c8ce 100644 --- a/java/test-hive-udfs/pom.xml +++ b/java/test-hive-udfs/pom.xml @@ -55,6 +55,10 @@ under the License. <groupId>org.apache.atlas</groupId> <artifactId>*</artifactId> </exclusion> + <exclusion> + <groupId>com.cloudera</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> <dependency> diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py index 5adb1ea1a..fd69d8c91 100644 --- a/tests/custom_cluster/test_events_custom_configs.py +++ b/tests/custom_cluster/test_events_custom_configs.py @@ -22,6 +22,7 @@ import pytest from hive_metastore.ttypes import FireEventRequest from hive_metastore.ttypes import FireEventRequestData +from hive_metastore.ttypes import InsertEventRequestData from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfFS @@ -557,6 +558,56 @@ class TestEventProcessingCustomConfigs(CustomClusterTestSuite): # 24 partitions inserted and hence we must refresh 24 partitions once. assert int(partitions_refreshed_after_hive) == int(partitions_refreshed_insert) + 24 + @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=5") + def test_event_processor_failure_extra_space(self, unique_database): + """This test verifies that impala event processor is in active state after + processing a couple of previously erroneous events""" + test_table = "extra_space_table" + # IMPALA-11939 -- create table event in HMS contains extra spaces in the db/table + self.run_stmt_in_hive("create table ` {}`.`{} ` (i1 int) partitioned by (year int)" + .format(unique_database, test_table)) + self.run_stmt_in_hive("alter table ` {}`.`{} ` add columns (i2 int)" + .format(unique_database, test_table)) + EventProcessorUtils.wait_for_event_processing(self) + assert EventProcessorUtils.get_event_processor_status() == "ACTIVE" + + @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=10") + def test_event_processor_dropped_partition(self, unique_database): + """This test verifies that impala event processor is in active state after + processing partitioned insert events of a dropped table""" + # IMPALA-11768 -- Insert partition events should be ignored + # if the table is dropped + test_table = "partitioned_table" + + def is_event_processor_active(is_insert): + self.run_stmt_in_hive("create table {}.{} (i1 int) partitioned by (year int)" + .format(unique_database, test_table)) + EventProcessorUtils.wait_for_event_processing(self) + self.client.execute("refresh {}.{}".format(unique_database, test_table)) + self.run_stmt_in_hive( + "insert into {}.{} partition(year=2023) values (4),(5),(6)" + .format(unique_database, test_table)) + data = FireEventRequestData() + if is_insert: + insert_data = InsertEventRequestData() + insert_data.filesAdded = "/warehouse/mytable/b1" + insert_data.replace = False + data.insertData = insert_data + else: + data.refreshEvent = True + req = FireEventRequest(True, data) + req.dbName = unique_database + req.tableName = test_table + req.partitionVals = ["2023"] + self.hive_client.fire_listener_event(req) + self.run_stmt_in_hive( + "drop table {}.{}".format(unique_database, test_table)) + EventProcessorUtils.wait_for_event_processing(self) + assert EventProcessorUtils.get_event_processor_status() == "ACTIVE" + + is_event_processor_active(True) + is_event_processor_active(False) + @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=1") def test_iceberg_self_events(self, unique_database): """This test checks that Impala doesn't refresh Iceberg tables on self events.""" diff --git a/tests/custom_cluster/test_metastore_service.py b/tests/custom_cluster/test_metastore_service.py index d0a696c2b..fca6f3c4c 100644 --- a/tests/custom_cluster/test_metastore_service.py +++ b/tests/custom_cluster/test_metastore_service.py @@ -1201,10 +1201,8 @@ class TestMetastoreService(CustomClusterTestSuite): get_parts_req.tbl_name = "table-does-not-exist" get_parts_req.names = [] if expect_fallback: - # TODO HMS actually throws an InvalidObjectException but the HMS API signature - # doesn't declare it in the signature. self.__get_parts_by_names_expect_exception(catalog_hms_client, get_parts_req, - "Internal error") + "InvalidObjectException") else: self.__get_parts_by_names_expect_exception(catalog_hms_client, get_parts_req, "Table {0}.table-does-not-exist not found".format(
