This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 8f35c7f4a IMPALA-12052: Update EC policy string for Ozone
8f35c7f4a is described below
commit 8f35c7f4aae81be9e47e3bfe3bb0fa6895d9ef8b
Author: Michael Smith <[email protected]>
AuthorDate: Mon Apr 10 09:30:51 2023 -0700
IMPALA-12052: Update EC policy string for Ozone
HDDS-7122 changed how Ozone prints chunk size from bytes to KB, as in
1024k rather than 1048576. That makes it consistent with HDFS reporting.
Our tests verify the chunk size reported in SHOW output. Updates the
expected erasure code policy string to match the new format.
Updates CDP_OZONE_VERSION to a build that includes HDDS-7122. However
this build includes two regressions that we work around for the moment:
- HDDS-8543: FSO layout reports incorrect replication config for
directories in EC buckets
- HDDS-8289: FSO layout listStatus operations get slower with lots of
files and filesystem operations
Testing:
- ran test suite with Ozone Erasure Coding
Change-Id: I5354de61bbc507931a1d5bc86f6466c0dd50fc30
Reviewed-on: http://gerrit.cloudera.org:8080/19870
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Laszlo Gaal <[email protected]>
Reviewed-by: Joe McDonnell <[email protected]>
---
bin/impala-config.sh | 24 ++++++++++++------------
tests/authorization/test_ranger.py | 1 +
tests/common/skip.py | 1 +
tests/metadata/test_compute_stats.py | 5 +++++
tests/metadata/test_ddl.py | 4 ++++
tests/metadata/test_metadata_query_statements.py | 1 +
tests/query_test/test_iceberg.py | 3 ++-
tests/stress/test_acid_stress.py | 5 ++++-
8 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2cf2ee2f2..83ac748b9 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -212,19 +212,19 @@ fi
: ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
export IMPALA_TOOLCHAIN_HOST
-export CDP_BUILD_NUMBER=39127492
+export CDP_BUILD_NUMBER=39671873
export CDP_MAVEN_REPOSITORY=\
"https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
-export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-160
-export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-160
-export CDP_HBASE_VERSION=2.4.6.7.2.17.0-160
-export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-160
-export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-160
-export CDP_KNOX_VERSION=1.3.0.7.2.17.0-160
-export CDP_OZONE_VERSION=1.3.0.7.2.17.0-160
-export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-160
-export CDP_RANGER_VERSION=2.3.0.7.2.17.0-160
-export CDP_TEZ_VERSION=0.9.1.7.2.17.0-160
+export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-204
+export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-204
+export CDP_HBASE_VERSION=2.4.6.7.2.17.0-204
+export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-204
+export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-204
+export CDP_KNOX_VERSION=1.3.0.7.2.17.0-204
+export CDP_OZONE_VERSION=1.3.0.7.2.17.0-204
+export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-204
+export CDP_RANGER_VERSION=2.3.0.7.2.17.0-204
+export CDP_TEZ_VERSION=0.9.1.7.2.17.0-204
# Ref: https://infra.apache.org/release-download-pages.html#closer
: ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
@@ -760,7 +760,7 @@ elif [ "${TARGET_FILESYSTEM}" = "ozone" ]; then
if [[ "${ERASURE_CODING}" = true ]]; then
export OZONE_ERASURECODE_POLICY="RS-3-2-1024k"
# Ozone normalizes the policy for internal storage. Use this string for
tests.
- export ERASURECODE_POLICY="rs-3-2-1048576"
+ export ERASURECODE_POLICY="rs-3-2-1024k"
fi
else
echo "Unsupported filesystem '$TARGET_FILESYSTEM'"
diff --git a/tests/authorization/test_ranger.py
b/tests/authorization/test_ranger.py
index 41b6eb4c3..949fd4cf1 100644
--- a/tests/authorization/test_ranger.py
+++ b/tests/authorization/test_ranger.py
@@ -2016,6 +2016,7 @@ class TestRanger(CustomClusterTestSuite):
admin_client.execute(statement, user=ADMIN)
@pytest.mark.execute_serially
+ @SkipIfFS.incorrent_reported_ec
@CustomClusterTestSuite.with_args(
# We additionally provide impalad and catalogd with the customized
user-to-groups
# mapper since some test cases in grant_revoke.test require Impala to
retrieve the
diff --git a/tests/common/skip.py b/tests/common/skip.py
index cdd96a801..bb7326be2 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -75,6 +75,7 @@ class SkipIfFS:
reason="block size is larger than 128MB")
read_speed_dependent = pytest.mark.skipif(not IS_HDFS or IS_EC,
reason="success depends on fast scan node performance")
+ incorrent_reported_ec = pytest.mark.skipif(IS_OZONE and IS_EC,
reason="HDDS-8543")
# These need test infra work to re-enable.
hive = pytest.mark.skipif(not IS_HDFS, reason="Hive doesn't work")
diff --git a/tests/metadata/test_compute_stats.py
b/tests/metadata/test_compute_stats.py
index 7d4007a4c..370f3eef0 100644
--- a/tests/metadata/test_compute_stats.py
+++ b/tests/metadata/test_compute_stats.py
@@ -52,10 +52,12 @@ class TestComputeStats(ImpalaTestSuite):
create_uncompressed_text_dimension(cls.get_workload()))
@SkipIfLocal.hdfs_blocks
+ @SkipIfFS.incorrent_reported_ec
def test_compute_stats(self, vector, unique_database):
self.run_test_case('QueryTest/compute-stats', vector, unique_database)
@SkipIfLocal.hdfs_blocks
+ @SkipIfFS.incorrent_reported_ec
def test_compute_stats_avro(self, vector, unique_database,
cluster_properties):
if cluster_properties.is_catalog_v2_cluster():
# IMPALA-7308: changed behaviour of various Avro edge cases
significantly in the
@@ -66,16 +68,19 @@ class TestComputeStats(ImpalaTestSuite):
self.run_test_case('QueryTest/compute-stats-avro', vector,
unique_database)
@SkipIfLocal.hdfs_blocks
+ @SkipIfFS.incorrent_reported_ec
def test_compute_stats_decimal(self, vector, unique_database):
# Test compute stats on decimal columns separately so we can vary between
platforms
# with and without write support for decimals (Hive < 0.11 and >= 0.11).
self.run_test_case('QueryTest/compute-stats-decimal', vector,
unique_database)
@SkipIfLocal.hdfs_blocks
+ @SkipIfFS.incorrent_reported_ec
def test_compute_stats_date(self, vector, unique_database):
# Test compute stats on date columns separately.
self.run_test_case('QueryTest/compute-stats-date', vector, unique_database)
+ @SkipIfFS.incorrent_reported_ec
def test_compute_stats_incremental(self, vector, unique_database):
self.run_test_case('QueryTest/compute-stats-incremental', vector,
unique_database)
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 95dc10176..296703301 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -194,6 +194,7 @@ class TestDdlStatements(TestDdlBase):
# Reset to its default value.
self.client.execute("set s3_skip_insert_staging=true")
+ @SkipIfFS.incorrent_reported_ec
@UniqueDatabase.parametrize(sync_ddl=True)
def test_truncate_table(self, vector, unique_database):
vector.get_value('exec_option')['abort_on_error'] = False
@@ -286,6 +287,7 @@ class TestDdlStatements(TestDdlBase):
self.run_test_case('QueryTest/create-table', vector,
use_db=unique_database,
multiple_impalad=self._use_multiple_impalad(vector))
+ @SkipIfFS.incorrent_reported_ec
@UniqueDatabase.parametrize(sync_ddl=True)
def test_create_table_like_table(self, vector, unique_database):
vector.get_value('exec_option')['abort_on_error'] = False
@@ -442,6 +444,7 @@ class TestDdlStatements(TestDdlBase):
# TODO: don't use hdfs_client
@SkipIfLocal.hdfs_client
+ @SkipIfFS.incorrent_reported_ec
@UniqueDatabase.parametrize(sync_ddl=True, num_dbs=2)
def test_alter_table(self, vector, unique_database):
vector.get_value('exec_option')['abort_on_error'] = False
@@ -774,6 +777,7 @@ class TestDdlStatements(TestDdlBase):
assert result.data[0] == ''
assert result.data[1] == 'NULL'
+ @SkipIfFS.incorrent_reported_ec
@UniqueDatabase.parametrize(sync_ddl=True)
def test_partition_ddl_predicates(self, vector, unique_database):
self.run_test_case('QueryTest/partition-ddl-predicates-all-fs', vector,
diff --git a/tests/metadata/test_metadata_query_statements.py
b/tests/metadata/test_metadata_query_statements.py
index e2ecc985d..071339f37 100644
--- a/tests/metadata/test_metadata_query_statements.py
+++ b/tests/metadata/test_metadata_query_statements.py
@@ -66,6 +66,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
def test_show(self, vector):
self.run_test_case('QueryTest/show', vector)
+ @SkipIfFS.incorrent_reported_ec
def test_show_stats(self, vector):
self.run_test_case('QueryTest/show-stats', vector, "functional")
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 37ad24e32..028c6859b 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -36,7 +36,7 @@ import json
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.iceberg_test_suite import IcebergTestSuite
-from tests.common.skip import SkipIf, SkipIfDockerizedCluster
+from tests.common.skip import SkipIf, SkipIfFS, SkipIfDockerizedCluster
from tests.common.file_utils import (
create_iceberg_table_from_directory,
create_table_from_parquet)
@@ -1089,6 +1089,7 @@ class TestIcebergTable(IcebergTestSuite):
assert parquet_column_name_type_list == iceberg_column_name_type_list
+ @SkipIfFS.incorrent_reported_ec
def test_compute_stats(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-compute-stats', vector,
unique_database)
diff --git a/tests/stress/test_acid_stress.py b/tests/stress/test_acid_stress.py
index a284717b5..c4dee98b9 100644
--- a/tests/stress/test_acid_stress.py
+++ b/tests/stress/test_acid_stress.py
@@ -27,6 +27,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.parametrize import UniqueDatabase
from tests.common.skip import SkipIf, SkipIfHive2, SkipIfFS,
SkipIfDockerizedCluster
from tests.stress.stress_util import Task, run_tasks
+from tests.util.filesystem_utils import IS_OZONE
NUM_OVERWRITES = 2
NUM_INSERTS_PER_OVERWRITE = 4
@@ -306,7 +307,9 @@ class TestConcurrentAcidInserts(TestAcidStress):
checkers = [Task(self._impala_role_concurrent_checker, tbl_name, i,
counter,
num_writers)
for i in range(0, num_checkers)]
- run_tasks(writers + checkers)
+ # HDDS-8289: Ozone listStatus is slow with lots of files
+ timeout = 900 if IS_OZONE else 600
+ run_tasks(writers + checkers, timeout_seconds=timeout)
class TestFailingAcidInserts(TestAcidStress):