This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 50a98dce46fd337461f67b98979fece95c7bf738 Author: Riza Suminto <[email protected]> AuthorDate: Sat Apr 12 22:29:07 2025 -0700 IMPALA-13959: Fix TestHmsIntegration.test_change_parquet_column_type TestHmsIntegration.test_change_parquet_column_type fail in exhaustive mode due to a missing int parsing introduced by IMPALA-13920. This patch add the missing int parsing. It also fix flake8 issues in test_hms_integration.py, including unused vector fixture. Testing: Run and pass test_hms_integration.py in exhaustive mode. Change-Id: If5fb9f96b4087e86b0ebaac7135e14b7a14936ea Reviewed-on: http://gerrit.cloudera.org:8080/22774 Reviewed-by: Csaba Ringhofer <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- tests/metadata/test_hms_integration.py | 80 +++++++++++++++++----------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py index d5ca12fdb..27b0e538c 100644 --- a/tests/metadata/test_hms_integration.py +++ b/tests/metadata/test_hms_integration.py @@ -51,7 +51,7 @@ class TestHmsIntegrationSanity(ImpalaTestSuite): create_uncompressed_text_dimension(cls.get_workload())) @pytest.mark.execute_serially - def test_sanity(self, vector, cluster_properties): + def test_sanity(self, cluster_properties): """Verifies that creating a catalog entity (database, table) in Impala using 'IF NOT EXISTS' while the entity exists in HMS, does not throw an error.""" # Create a database in Hive @@ -129,6 +129,7 @@ class TestHmsIntegrationSanity(ImpalaTestSuite): finally: self.run_stmt_in_hive("drop database %s cascade" % db) + @SkipIfFS.hive class TestHmsIntegration(ImpalaTestSuite): @@ -159,7 +160,7 @@ class TestHmsIntegration(ImpalaTestSuite): 'create database if not exists ' + self.db_name) return self.db_name - def __exit__(self, typ, value, traceback): + def __exit__(self, typ, value, traceback): # noqa: U100 self.impala.cleanup_db(self.db_name) class ImpalaTableWrapper(object): @@ -179,7 +180,7 @@ class TestHmsIntegration(ImpalaTestSuite): (self.table_name, self.table_spec)) return self.table_name - def __exit__(self, typ, value, traceback): + def __exit__(self, typ, value, traceback): # noqa: U100 self.impala.client.execute('drop table if exists %s' % self.table_name) def impala_table_stats(self, table): @@ -297,26 +298,24 @@ class TestHmsIntegration(ImpalaTestSuite): command, strs_in_error) @pytest.mark.execute_serially - def test_hive_db_hive_table_add_partition(self, vector): - self.add_hive_partition_helper(vector, HiveDbWrapper, HiveTableWrapper) + def test_hive_db_hive_table_add_partition(self): + self.add_hive_partition_helper(HiveDbWrapper, HiveTableWrapper) @pytest.mark.execute_serially - def test_hive_db_impala_table_add_partition(self, vector): - self.add_hive_partition_helper(vector, HiveDbWrapper, self.ImpalaTableWrapper) + def test_hive_db_impala_table_add_partition(self): + self.add_hive_partition_helper(HiveDbWrapper, self.ImpalaTableWrapper) @pytest.mark.execute_serially - def test_impala_db_impala_table_add_partition(self, vector): - self.add_hive_partition_helper(vector, self.ImpalaDbWrapper, - self.ImpalaTableWrapper) + def test_impala_db_impala_table_add_partition(self): + self.add_hive_partition_helper(self.ImpalaDbWrapper, self.ImpalaTableWrapper) @pytest.mark.execute_serially - def test_impala_db_hive_table_add_partition(self, vector): - self.add_hive_partition_helper(vector, self.ImpalaDbWrapper, - HiveTableWrapper) + def test_impala_db_hive_table_add_partition(self): + self.add_hive_partition_helper(self.ImpalaDbWrapper, HiveTableWrapper) @pytest.mark.xfail(run=False, reason="This is a bug: IMPALA-2426") @pytest.mark.execute_serially - def test_incremental_stats_new_partition(self, vector): + def test_incremental_stats_new_partition(self): with self.ImpalaDbWrapper(self, self.unique_string()) as db_name: with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(), '(x int) partitioned by (y int)') as table_name: @@ -330,7 +329,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert '0' == table_stats[('333',)]['#rows'] assert '0' == table_stats[('333',)]['#files'] - def add_hive_partition_helper(self, vector, DbWrapper, TableWrapper): + def add_hive_partition_helper(self, DbWrapper, TableWrapper): """ Partitions added in Hive can be viewed in Impala after computing stats. """ @@ -348,10 +347,9 @@ class TestHmsIntegration(ImpalaTestSuite): assert [('333', '5309')] == self.get_impala_partition_info(table_name, 'y', 'z') # Impala's compute stats didn't alter Hive's knowledge of the partition assert ['y=333/z=5309'] == self.hive_partition_names(table_name) - self.add_hive_partition_table_stats_helper(vector, DbWrapper, TableWrapper) + self.add_hive_partition_table_stats_helper(DbWrapper, TableWrapper) - def add_hive_partition_table_stats_helper( - self, vector, DbWrapper, TableWrapper): + def add_hive_partition_table_stats_helper(self, DbWrapper, TableWrapper): """ Partitions added in Hive don't make Impala's table stats incorrect. """ @@ -376,7 +374,7 @@ class TestHmsIntegration(ImpalaTestSuite): ('42', '867')] @pytest.mark.execute_serially - def test_add_impala_partition(self, vector): + def test_add_impala_partition(self): """ Partitions added in Impala can be viewed in Hive immediately """ @@ -390,7 +388,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert ['y=42/z=867'] == self.hive_partition_names(table_name) @pytest.mark.execute_serially - def test_drop_column_maintains_stats(self, vector): + def test_drop_column_maintains_stats(self): """ Dropping a column in Impala doesn't alter the stats of other columns in Hive or Impala. @@ -424,7 +422,7 @@ class TestHmsIntegration(ImpalaTestSuite): 'x'] @pytest.mark.execute_serially - def test_select_without_compute_stats(self, vector): + def test_select_without_compute_stats(self): """ Data added in Hive shows up in Impala 'select *', and if the table is not partitioned, 'compute incremental stats' is not required. @@ -453,7 +451,7 @@ class TestHmsIntegration(ImpalaTestSuite): @pytest.mark.xfail(run=False, reason="This is a bug: IMPALA-2458") @pytest.mark.execute_serially - def test_overwrite_added_column(self, vector): + def test_overwrite_added_column(self): """ Impala can't overwrite Hive's column types, and vice versa. """ @@ -488,7 +486,7 @@ class TestHmsIntegration(ImpalaTestSuite): @SkipIfHive3.col_stat_separated_by_engine @pytest.mark.execute_serially - def test_compute_stats_get_to_hive(self, vector): + def test_compute_stats_get_to_hive(self): """Stats computed in Impala are also visible in Hive.""" with self.ImpalaDbWrapper(self, self.unique_string()) as db_name: with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(), @@ -504,7 +502,7 @@ class TestHmsIntegration(ImpalaTestSuite): @SkipIfHive3.col_stat_separated_by_engine @pytest.mark.execute_serially - def test_compute_stats_get_to_impala(self, vector): + def test_compute_stats_get_to_impala(self): """Column stats computed in Hive are also visible in Impala.""" with HiveDbWrapper(self, self.unique_string()) as db_name: with HiveTableWrapper(self, db_name + '.' + self.unique_string(), @@ -529,7 +527,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert '0' == new_impala_stats['x']['#nulls'] @SkipIfHive2.col_stat_not_separated_by_engine - def test_engine_separates_col_stats(self, vector): + def test_engine_separates_col_stats(self): """ The 'engine' column in TAB_COL_STATS and PART_COL_STATS HMS tables is used to differentiate among column stats computed by different engines. @@ -577,7 +575,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert '0' == hive_x_stats['num_nulls'] @SkipIfHive2.col_stat_not_separated_by_engine - def test_engine_separates_partitioned_col_stats(self, vector): + def test_engine_separates_partitioned_col_stats(self): """ The 'engine' column in TAB_COL_STATS and PART_COL_STATS HMS tables is used to differentiate among column stats computed by different engines. @@ -628,7 +626,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert '0' == hive_x_stats['num_nulls'] @pytest.mark.execute_serially - def test_drop_partition(self, vector): + def test_drop_partition(self): """ Impala can see that a partitions was dropped by Hive by invalidating metadata. @@ -647,7 +645,7 @@ class TestHmsIntegration(ImpalaTestSuite): 'select * from %s' % table_name).get_data() @pytest.mark.execute_serially - def test_drop_column_with_data(self, vector): + def test_drop_column_with_data(self): """Columns dropped by Hive are ignored in Impala 'select *'.""" with self.ImpalaDbWrapper(self, self.unique_string()) as db_name: with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(), @@ -660,7 +658,7 @@ class TestHmsIntegration(ImpalaTestSuite): 'select * from %s' % table_name).get_data() @pytest.mark.execute_serially - def test_add_column(self, vector): + def test_add_column(self): """Columns added in one engine are visible in the other via DESCRIBE.""" with self.ImpalaDbWrapper(self, self.unique_string()) as db_name: with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(), @@ -682,14 +680,14 @@ class TestHmsIntegration(ImpalaTestSuite): assert expected == self.impala_columns(table_name) @pytest.mark.execute_serially - def test_drop_database(self, vector): + def test_drop_database(self): """ If a DB is created, then dropped, in Hive, Impala can create one with the same name without invalidating metadata. """ test_db = self.unique_string() - with HiveDbWrapper(self, test_db) as db_name: + with HiveDbWrapper(self, test_db): pass # if events processing is turned on we should make sure that the drop # database event above is processed to avoid flakiness @@ -701,11 +699,11 @@ class TestHmsIntegration(ImpalaTestSuite): self.unique_string()), 'Database does not exist: %s' % test_db) - with self.ImpalaDbWrapper(self, test_db) as db_name: + with self.ImpalaDbWrapper(self, test_db): pass @pytest.mark.execute_serially - def test_table_format_change(self, vector): + def test_table_format_change(self): """ Hive storage format changes propagate to Impala. """ @@ -724,7 +722,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert 'AVRO' == self.impala_table_stats(table_name)[()]['format'] @pytest.mark.execute_serially - def test_change_column_type(self, vector): + def test_change_column_type(self): """Hive column type changes propagate to Impala.""" with HiveDbWrapper(self, self.unique_string()) as db_name: @@ -741,7 +739,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert 'string' == self.impala_columns(table_name)['y']['type'] @pytest.mark.execute_serially - def test_change_parquet_column_type(self, vector): + def test_change_parquet_column_type(self): """ Changing column types in Parquet doesn't always work in Hive and it causes 'select *' to fail in Impala as well, after invalidating metadata. This is a @@ -786,7 +784,7 @@ class TestHmsIntegration(ImpalaTestSuite): # The error message is different in newer Javas than in 17 # TODO: find out which version changed it exactly err_msg = err_msg.format( - "class " if os.environ.get('IMPALA_JDK_VERSION_NUM') >= 17 else "") + "class " if int(os.environ.get('IMPALA_JDK_VERSION_NUM')) >= 17 else "") self.assert_sql_error( self.run_stmt_in_hive, 'select * from %s' % table_name, err_msg) self.client.execute('invalidate metadata %s' % table_name) @@ -795,7 +793,7 @@ class TestHmsIntegration(ImpalaTestSuite): "Column type: INT, Parquet schema:") @SkipIfHive2.acid - def test_acid_inserts(self, vector, unique_database): + def test_acid_inserts(self, unique_database): """ Insert data to insert-only ACID table from Impala and checks that Hive is able to see the data. @@ -858,7 +856,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert '4,41' == hive_result[4] @SkipIfHive2.acid - def test_drop_acid_table(self, vector, unique_database): + def test_drop_acid_table(self, unique_database): """ Tests that a transactional table dropped by Impala is also dropped if we check from Hive. @@ -876,7 +874,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert "acid_insert" not in show_tables_result_after_drop @SkipIfHive2.acid - def test_truncate_acid_table(self, vector, unique_database): + def test_truncate_acid_table(self, unique_database): """ Tests that a transactional table truncated by Impala shows no rows when queried by Hive. @@ -896,7 +894,7 @@ class TestHmsIntegration(ImpalaTestSuite): assert "0" == query_result_after_truncate.split('\n')[1] @pytest.mark.execute_serially - def test_change_table_name(self, vector): + def test_change_table_name(self): """ Changing the table name in Hive propagates to Impala after 'invalidate metadata'. @@ -919,7 +917,7 @@ class TestHmsIntegration(ImpalaTestSuite): 'Could not resolve path') @pytest.mark.execute_serially - def test_impala_partitions_accessible_in_hive(self, vector): + def test_impala_partitions_accessible_in_hive(self): """ IMPALA-1670, IMPALA-4141: Partitions added in Impala are accessible through Hive """
