This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 768527c89ad2ea3484fec0cd0bfdd56f54ab9046
Author: Michael Smith <[email protected]>
AuthorDate: Wed Feb 26 14:41:58 2025 -0800

    IMPALA-13804: Use redacted statement in live table
    
    Uses the redacted SQL statement in sys.impala_query_live, so it's
    consistent with the profile and sys.impala_query_log.
    
    Testing: added a test with redaction rules for live and log tables.
    
    Change-Id: I9a72eeaea84981e96655aec6c67b5ef2cbbd3c3e
    Reviewed-on: http://gerrit.cloudera.org:8080/22556
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Jason Fehr <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/system-table-scanner.cc     |  2 +-
 testdata/workload_mgmt/redaction.json   | 11 +++++++++++
 tests/custom_cluster/test_query_live.py | 16 +++++++++++++++-
 tests/custom_cluster/test_query_log.py  | 23 ++++++++++++++++++++++-
 tests/util/workload_management.py       |  7 +++++++
 5 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/be/src/exec/system-table-scanner.cc 
b/be/src/exec/system-table-scanner.cc
index e286c8353..229ad560c 100644
--- a/be/src/exec/system-table-scanner.cc
+++ b/be/src/exec/system-table-scanner.cc
@@ -363,7 +363,7 @@ Status QueryScanner::MaterializeNextTuple(
         }
         break;
       case TQueryTableColumn::SQL:
-        RETURN_IF_ERROR(WriteStringSlot(record.stmt, pool, slot));
+        RETURN_IF_ERROR(WriteStringSlot(query.redacted_sql, pool, slot));
         break;
       case TQueryTableColumn::PLAN:
         RETURN_IF_ERROR(WriteStringSlot(
diff --git a/testdata/workload_mgmt/redaction.json 
b/testdata/workload_mgmt/redaction.json
new file mode 100644
index 000000000..3a086fcfe
--- /dev/null
+++ b/testdata/workload_mgmt/redaction.json
@@ -0,0 +1,11 @@
+{
+  "version": 1,
+  "rules": [
+    {
+      "description": "Don't show nonnsense",
+      "caseSensitive": false,
+      "search": "supercalifragilisticexpialidocious",
+      "replace": "*poppins*"
+    }
+  ]
+}
diff --git a/tests/custom_cluster/test_query_live.py 
b/tests/custom_cluster/test_query_live.py
index a0868d997..3175e544a 100644
--- a/tests/custom_cluster/test_query_live.py
+++ b/tests/custom_cluster/test_query_live.py
@@ -25,7 +25,7 @@ from signal import SIGRTMIN
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.impala_cluster import DEFAULT_KRPC_PORT
 from tests.util.retry import retry
-from tests.util.workload_management import assert_query
+from tests.util.workload_management import assert_query, redaction_rules_file
 from time import sleep
 
 
@@ -225,6 +225,20 @@ class TestQueryLive(CustomClusterTestSuite):
     assert_query('sys.impala_query_live', self.client, 'test_query_live',
                  result.runtime_profile)
 
+  @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
+                                                 "--cluster_id=test_query_live 
"
+                                                 "--redaction_rules_file={}"
+                                                 
.format(redaction_rules_file()),
+                                    catalogd_args="--enable_workload_mgmt",
+                                    disable_log_buffering=True)
+  def test_redaction(self):
+    """Asserts the query live table table redacts the statement."""
+    result = self.client.execute(
+        "select *, 'supercalifragilisticexpialidocious' from 
functional.alltypes",
+        fetch_profile_after_close=True)
+    assert_query('sys.impala_query_live', self.client, 'test_query_live',
+                 result.runtime_profile)
+
   @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
                                                  
"--cluster_id=test_query_live",
                                     catalogd_args="--enable_workload_mgmt",
diff --git a/tests/custom_cluster/test_query_log.py 
b/tests/custom_cluster/test_query_log.py
index 31dcf26c8..1d780e7ec 100644
--- a/tests/custom_cluster/test_query_log.py
+++ b/tests/custom_cluster/test_query_log.py
@@ -34,7 +34,11 @@ from tests.common.custom_cluster_test_suite import 
CustomClusterTestSuite
 from tests.common.impala_test_suite import IMPALAD_HS2_HOST_PORT
 from tests.common.test_vector import ImpalaTestDimension
 from tests.util.retry import retry
-from tests.util.workload_management import assert_query, WM_DB, QUERY_TBL_LOG
+from tests.util.workload_management import (
+    assert_query,
+    WM_DB,
+    QUERY_TBL_LOG,
+    redaction_rules_file)
 
 
 class TestQueryLogTableBase(CustomClusterTestSuite):
@@ -410,6 +414,23 @@ class TestQueryLogTableBeeswax(TestQueryLogTableBase):
     finally:
       client2.close()
 
+  @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
+                                                 
"--query_log_write_interval_s=1 "
+                                                 "--redaction_rules_file={}"
+                                                 
.format(redaction_rules_file()),
+                                    catalogd_args="--enable_workload_mgmt",
+                                    disable_log_buffering=True)
+  def test_redaction(self):
+    """Asserts the query log table redacts the statement."""
+    result = self.client.execute(
+        "select *, 'supercalifragilisticexpialidocious' from 
functional.alltypes",
+        fetch_profile_after_close=True)
+    assert result.success
+
+    self.cluster.get_first_impalad().service.wait_for_metric_value(
+        "impala-server.completed-queries.written", 1, 60)
+    assert_query(QUERY_TBL_LOG, self.client, 
raw_profile=result.runtime_profile)
+
 
 class TestQueryLogOtherTable(TestQueryLogTableBase):
   """Tests to assert that query_log_table_name works with non-default value."""
diff --git a/tests/util/workload_management.py 
b/tests/util/workload_management.py
index 9b2f567db..3a8a7c5b7 100644
--- a/tests/util/workload_management.py
+++ b/tests/util/workload_management.py
@@ -17,6 +17,7 @@
 
 from __future__ import absolute_import, division, print_function
 
+import os
 import re
 import requests
 
@@ -733,3 +734,9 @@ def assert_csv_col(client, query_tbl, col, query_id, 
expected_list, db="tpcds"):
         "'{}'\n  actual   (length {}): {}\n  expected (length {}): {}" \
         .format(TQueryTableColumn._VALUES_TO_NAMES[col], query_id, expected, 
len(actual),
         sorted(actual), len(expected_list), sorted(expected_list))
+
+
+def redaction_rules_file():
+  """Provides the path to a redaction file that redacts the word
+     'supercalifragilisticexpialidocious'."""
+  return 
"{}/testdata/workload_mgmt/redaction.json".format(os.environ["IMPALA_HOME"])

Reply via email to