This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 05890c1c8446ee0ec4b0ab3092f246e7456790e3 Author: Riza Suminto <[email protected]> AuthorDate: Wed Oct 18 13:20:23 2023 -0700 IMPALA-12499: Deflake test_hdfs_scanner_thread_mem_scaling IMPALA-11068 added three new tests into hdfs-scanner-thread-mem-scaling.test. The first one is failing intermittently, most likely due to fragment right above the scan does not pull row batches fast enough. This patch attempt to deflake the tests by replacing it with simple count start query. The three test cases is now contained in its own test_hdfs_scanner_thread_non_reserved_bytes and will be skipped for sanitized build. Testing: - Loop and pass test_hdfs_scanner_thread_non_reserved_bytes a hundred times. Change-Id: I7c99b2ef70b71e148cedb19037e2d99702966d6e Reviewed-on: http://gerrit.cloudera.org:8080/20593 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../QueryTest/hdfs-scanner-thread-mem-scaling.test | 53 ---------------------- .../hdfs-scanner-thread-non-reserved-bytes.test | 36 +++++++++++++++ tests/common/skip.py | 2 + tests/query_test/test_mem_usage_scaling.py | 10 +++- 4 files changed, 47 insertions(+), 54 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-mem-scaling.test b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-mem-scaling.test index b55d848b2..72ec20473 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-mem-scaling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-mem-scaling.test @@ -33,56 +33,3 @@ BIGINT,STRING ---- RUNTIME_PROFILE aggregation(SUM, NumScannerThreadsStarted): 1 ==== ----- QUERY -# IMPALA-11068: without tuning hdfs_scanner_non_reserved_bytes, this query can launch -# up to 3 threads (compressed_text_est_bytes ~ 211MB). -set num_nodes=1; -set mem_limit=750m; -select l_orderkey, l_shipmode from tpch_text_gzip.lineitem -where l_comment = 'telets. quickly '; ----- TYPES -BIGINT,STRING ----- RESULTS: VERIFY_IS_EQUAL_SORTED -49824,'RAIL' -1380737,'AIR' -2981252,'TRUCK' -3415170,'MAIL' ----- RUNTIME_PROFILE -aggregation(SUM, NumScannerThreadsStarted): 3 -==== ----- QUERY -# IMPALA-11068: raising hdfs_scanner_non_reserved_bytes above compressed_text_est_bytes -# will reduce NumScannerThreadsStarted. -set num_nodes=1; -set mem_limit=750m; -set hdfs_scanner_non_reserved_bytes=320m; -select l_orderkey, l_shipmode from tpch_text_gzip.lineitem -where l_comment = 'telets. quickly '; ----- TYPES -BIGINT,STRING ----- RESULTS: VERIFY_IS_EQUAL_SORTED -49824,'RAIL' -1380737,'AIR' -2981252,'TRUCK' -3415170,'MAIL' ----- RUNTIME_PROFILE -aggregation(SUM, NumScannerThreadsStarted): 2 -==== ----- QUERY -# IMPALA-11068: high hdfs_scanner_non_reserved_bytes does not impact the first scanner -# thread. -set num_nodes=1; -set mem_limit=750m; -set hdfs_scanner_non_reserved_bytes=2g; -select l_orderkey, l_shipmode from tpch_text_gzip.lineitem -where l_comment = 'telets. quickly '; ----- TYPES -BIGINT,STRING ----- RESULTS: VERIFY_IS_EQUAL_SORTED -49824,'RAIL' -1380737,'AIR' -2981252,'TRUCK' -3415170,'MAIL' ----- RUNTIME_PROFILE -aggregation(SUM, NumScannerThreadsStarted): 1 -==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test new file mode 100644 index 000000000..f333209ed --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test @@ -0,0 +1,36 @@ +==== +---- QUERY +# IMPALA-11068: without tuning hdfs_scanner_non_reserved_bytes, this query can launch +# up to 3 threads (compressed_text_est_bytes ~ 211MB). +set num_nodes=1; +set mem_limit=750m; +select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. quickly '; +---- RESULTS: +4 +---- RUNTIME_PROFILE +aggregation(SUM, NumScannerThreadsStarted): 3 +==== +---- QUERY +# IMPALA-11068: raising hdfs_scanner_non_reserved_bytes above compressed_text_est_bytes +# will reduce NumScannerThreadsStarted. +set num_nodes=1; +set mem_limit=750m; +set hdfs_scanner_non_reserved_bytes=320m; +select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. quickly '; +---- RESULTS: +4 +---- RUNTIME_PROFILE +aggregation(SUM, NumScannerThreadsStarted): 2 +==== +---- QUERY +# IMPALA-11068: high hdfs_scanner_non_reserved_bytes does not impact the first scanner +# thread. +set num_nodes=1; +set mem_limit=750m; +set hdfs_scanner_non_reserved_bytes=2g; +select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. quickly '; +---- RESULTS: +4 +---- RUNTIME_PROFILE +aggregation(SUM, NumScannerThreadsStarted): 1 +==== diff --git a/tests/common/skip.py b/tests/common/skip.py index dc419a0ef..2b1ac76cb 100644 --- a/tests/common/skip.py +++ b/tests/common/skip.py @@ -116,6 +116,8 @@ class SkipIf: is_buggy_el6_kernel = pytest.mark.skipif( IS_BUGGY_EL6_KERNEL, reason="Kernel is affected by KUDU-1508") is_test_jdk = pytest.mark.skipif(IS_TEST_JDK, reason="Testing with different JDK") + runs_slowly = pytest.mark.skipif(IMPALA_TEST_CLUSTER_PROPERTIES.runs_slowly(), + reason="Test cluster runs slowly due to enablement of code coverage or sanitizer") class SkipIfLocal: # These are skipped due to product limitations. diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py index 2b5982fa1..edf13ba3f 100644 --- a/tests/query_test/test_mem_usage_scaling.py +++ b/tests/query_test/test_mem_usage_scaling.py @@ -26,7 +26,7 @@ from tests.common.test_dimensions import (create_avro_snappy_dimension, create_parquet_dimension) from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfFS +from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfFS, SkipIf from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.verifiers.metric_verifier import MetricVerifier @@ -375,6 +375,14 @@ class TestScanMemLimit(ImpalaTestSuite): del vector.get_value('exec_option')['num_nodes'] self.run_test_case('QueryTest/hdfs-scanner-thread-mem-scaling', vector) + @SkipIf.runs_slowly + def test_hdfs_scanner_thread_non_reserved_bytes(self, vector): + """Test that HDFS_SCANNER_NON_RESERVED_BYTES can limit the scale up of scanner threads + properly.""" + # Remove num_nodes setting to allow .test file to set num_nodes. + del vector.get_value('exec_option')['num_nodes'] + self.run_test_case('QueryTest/hdfs-scanner-thread-non-reserved-bytes', vector) + @SkipIfNotHdfsMinicluster.tuned_for_minicluster class TestHashJoinMemLimit(ImpalaTestSuite):
