This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9f50b5d2395e8789e2af0d67379a07f7355560c3 Author: Riza Suminto <[email protected]> AuthorDate: Mon Nov 13 18:16:42 2023 -0800 IMPALA-12528: (addendum) test non-reserved bytes against parquet test_hdfs_scanner_thread_non_reserved_bytes is still flaky due to the inconsistent number of data files of tpch_text_gzip.lineitem table created during data loading. This patch further deflake this test by changing its target table to tpch_parquet.lineitem. This table is selected because it consist of 3 parquet snappy files, which in turn are consistently planned as 1 scan range per file. This also raise the delay injection from 100ms to 500ms. Testing: - Confirm that tpch_parquet.lineitem always populated with 3 parquet snappy files in HDFS minicluster by repeating the following command: ./bin/load-data.py -f -w tpch --table_formats=parquet/none \ --table_name=lineitem - Loop test_hdfs_scanner_thread_non_reserved_bytes 100 times against HDFS and OZONE. Change-Id: I2f464b1d1e6b676bf9e1376afd4497cb27cd4e23 Reviewed-on: http://gerrit.cloudera.org:8080/20705 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../hdfs-scanner-thread-non-reserved-bytes.test | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test index 5e2b61bee..ba2d87aab 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test @@ -1,11 +1,11 @@ ==== ---- QUERY # IMPALA-11068: without tuning hdfs_scanner_non_reserved_bytes, this query can launch -# up to 3 threads (compressed_text_est_bytes ~ 211MB). +# up to 3 threads (with hdfs_scanner_thread_max_estimated_bytes default to 32MB). set num_nodes=1; -set mem_limit=750m; -set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@100"; -select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. quickly '; +set mem_limit=130m; +set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@500"; +select count(*) from tpch_parquet.lineitem where l_comment = 'telets. quickly '; ---- RESULTS: 4 ---- RUNTIME_PROFILE @@ -15,10 +15,10 @@ aggregation(SUM, NumScannerThreadsStarted): 3 # IMPALA-11068: raising hdfs_scanner_non_reserved_bytes above compressed_text_est_bytes # will reduce NumScannerThreadsStarted. set num_nodes=1; -set mem_limit=750m; -set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@100"; -set hdfs_scanner_non_reserved_bytes=320m; -select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. quickly '; +set mem_limit=130m; +set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@500"; +set hdfs_scanner_non_reserved_bytes=48m; +select count(*) from tpch_parquet.lineitem where l_comment = 'telets. quickly '; ---- RESULTS: 4 ---- RUNTIME_PROFILE @@ -28,10 +28,10 @@ aggregation(SUM, NumScannerThreadsStarted): 2 # IMPALA-11068: high hdfs_scanner_non_reserved_bytes does not impact the first scanner # thread. set num_nodes=1; -set mem_limit=750m; -set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@100"; -set hdfs_scanner_non_reserved_bytes=2g; -select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. quickly '; +set mem_limit=130m; +set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@500"; +set hdfs_scanner_non_reserved_bytes=64m; +select count(*) from tpch_parquet.lineitem where l_comment = 'telets. quickly '; ---- RESULTS: 4 ---- RUNTIME_PROFILE
