This is an automated email from the ASF dual-hosted git repository.

jasonmfehr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 32836eab4 IMPALA-13881: Fix Workload Management Statement Expression 
Limit Exceeded Errors
32836eab4 is described below

commit 32836eab4a42541e44d95435f815a5d0cf9d3b24
Author: jasonmfehr <[email protected]>
AuthorDate: Thu Mar 20 13:27:20 2025 -0700

    IMPALA-13881: Fix Workload Management Statement Expression Limit Exceeded 
Errors
    
    The workload management code calculates the needed statement
    expression limit by multiplying the number of columns in the workload
    management completed queries table by the number of rows being
    inserted. This calculation was added in case the
    default_query_options startup flag sets the default value of the
    statement_expression_limit query option to a very low value.
    
    In practice, the calculation has been wrong causing workload
    management insert DMLs to fail with:
    "AnalysisException: Exceeded the statement expression limit (1024)".
    
    This commit adds a new hidden startup flag query_log_expression_limit
    to set the value of the statement_expression_limit query option on
    the workload management insert DMLs. If the value of this flag is
    less than 0, the query option is not set. Otherwise, the query option
    is set to the value of this new flag.
    
    Additionally, the query_log_max_queued startup flag has been reduced
    from 5,000 to 3,000. This flag places an upper limit on the completed
    queries queue size, and if workload management attempted to insert
    5,000 records at once, it would exceed the 250,000 default for the
    statement_expression_limit query option.
    
    Testing was accomplished by running all workload management related
    custom cluster tests locally.
    
    Change-Id: I999187b33cfab411b62931458f2c4ce3be5ad88d
    Reviewed-on: http://gerrit.cloudera.org:8080/22652
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/service/workload-management-worker.cc      | 11 +++++++----
 be/src/workload_mgmt/workload-management-flags.cc | 15 ++++++++++++++-
 tests/custom_cluster/test_query_log.py            |  3 ++-
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/be/src/service/workload-management-worker.cc 
b/be/src/service/workload-management-worker.cc
index 870688db3..c4a849a00 100644
--- a/be/src/service/workload-management-worker.cc
+++ b/be/src/service/workload-management-worker.cc
@@ -76,6 +76,7 @@ DECLARE_string(cluster_id);
 DECLARE_string(query_log_request_pool);
 DECLARE_int32(query_log_write_timeout_s);
 DECLARE_string(workload_mgmt_user);
+DECLARE_int32(query_log_expression_limit);
 
 namespace impala {
 namespace workloadmgmt {
@@ -775,10 +776,12 @@ void ImpalaServer::WorkloadManagementWorker(const 
Version& target_schema_version
     // Set max_statement_length_bytes based on actual query, and at least the 
minimum.
     opts[TImpalaQueryOptions::MAX_STATEMENT_LENGTH_BYTES] =
         std::to_string(max<size_t>(MIN_MAX_STATEMENT_LENGTH_BYTES, 
final_sql_len));
-    // Set statement_expression_limit based on actual query, and at least the 
minimum.
-    opts[TImpalaQueryOptions::STATEMENT_EXPRESSION_LIMIT] =
-        std::to_string(max<size_t>(MIN_STATEMENT_EXPRESSION_LIMIT,
-            queries_to_insert.size() * 
_TQueryTableColumn_VALUES_TO_NAMES.size()));
+    // Set statement_expression_limit based on the startup flag. The flag 
validation
+    // ensures this value is numeric and falls within an acceptable range.
+    if (FLAGS_query_log_expression_limit >= 0) {
+      opts[TImpalaQueryOptions::STATEMENT_EXPRESSION_LIMIT] =
+          std::to_string(FLAGS_query_log_expression_limit);
+    }
     opts[TImpalaQueryOptions::MAX_ROW_SIZE] = std::to_string(max_row_size);
 
     // Execute the insert dml.
diff --git a/be/src/workload_mgmt/workload-management-flags.cc 
b/be/src/workload_mgmt/workload-management-flags.cc
index d582e0a41..9d83d82d1 100644
--- a/be/src/workload_mgmt/workload-management-flags.cc
+++ b/be/src/workload_mgmt/workload-management-flags.cc
@@ -92,7 +92,7 @@ DEFINE_int32(query_log_dml_exec_timeout_s, 120, "Value of the 
EXEC_TIME_LIMIT_S
     "query option on the query log table insert dmls.");
 DEFINE_validator(query_log_dml_exec_timeout_s, gt_eq_0);
 
-DEFINE_int32(query_log_max_queued, 5000, "Maximum number of records that can 
be queued "
+DEFINE_int32(query_log_max_queued, 3000, "Maximum number of records that can 
be queued "
     "before they are written to the impala query log table. This flag operates 
"
     "independently of the 'query_log_write_interval_s' flag. If the number of 
queued "
     "records reaches this value, the records will be written to the query log 
table no "
@@ -185,3 +185,16 @@ DEFINE_string_hidden(workload_mgmt_drop_tables, "", 
"Specifies which workload ma
     "tables to drop at startup. Value must be a comma-separated list of table 
names only "
     "(without the database name) used in workload managment. This flag will 
fix "
     "situations where the tables have become corrupt and are preventing daemon 
startup.");
+
+DEFINE_int32_hidden(query_log_expression_limit, -1, "Determines the value of 
the "
+    "statement_expression_limit query option in the workload management 
completed "
+    "queries insert DML. A negative value leaves this query option unset.");
+DEFINE_validator(query_log_expression_limit, [](const char* name, const 
int32_t val) {
+  if (val >= 0 && (val < 1024 || val > 999999)) {
+    LOG(ERROR) << "Invalid value for --" << name << ": must be at least 1,024 
and less "
+        "than 1,000,000";
+    return false;
+  }
+
+  return true;
+});
\ No newline at end of file
diff --git a/tests/custom_cluster/test_query_log.py 
b/tests/custom_cluster/test_query_log.py
index a27a13c55..c5d2a9ffa 100644
--- a/tests/custom_cluster/test_query_log.py
+++ b/tests/custom_cluster/test_query_log.py
@@ -293,7 +293,8 @@ class TestQueryLogTableBeeswax(TestQueryLogTableBase):
   @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
                                                  "--query_log_max_queued={0} "
                                                  
"--query_log_write_interval_s=9999 "
-                                                 "--cluster_id={1}"
+                                                 "--cluster_id={1} "
+                                                 
"--query_log_expression_limit=5000"
                                                  
.format(FLUSH_MAX_RECORDS_QUERY_COUNT,
                                                  FLUSH_MAX_RECORDS_CLUSTER_ID),
                                     catalogd_args="--enable_workload_mgmt",

Reply via email to