This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 768527c89ad2ea3484fec0cd0bfdd56f54ab9046 Author: Michael Smith <[email protected]> AuthorDate: Wed Feb 26 14:41:58 2025 -0800 IMPALA-13804: Use redacted statement in live table Uses the redacted SQL statement in sys.impala_query_live, so it's consistent with the profile and sys.impala_query_log. Testing: added a test with redaction rules for live and log tables. Change-Id: I9a72eeaea84981e96655aec6c67b5ef2cbbd3c3e Reviewed-on: http://gerrit.cloudera.org:8080/22556 Reviewed-by: Impala Public Jenkins <[email protected]> Reviewed-by: Jason Fehr <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/system-table-scanner.cc | 2 +- testdata/workload_mgmt/redaction.json | 11 +++++++++++ tests/custom_cluster/test_query_live.py | 16 +++++++++++++++- tests/custom_cluster/test_query_log.py | 23 ++++++++++++++++++++++- tests/util/workload_management.py | 7 +++++++ 5 files changed, 56 insertions(+), 3 deletions(-) diff --git a/be/src/exec/system-table-scanner.cc b/be/src/exec/system-table-scanner.cc index e286c8353..229ad560c 100644 --- a/be/src/exec/system-table-scanner.cc +++ b/be/src/exec/system-table-scanner.cc @@ -363,7 +363,7 @@ Status QueryScanner::MaterializeNextTuple( } break; case TQueryTableColumn::SQL: - RETURN_IF_ERROR(WriteStringSlot(record.stmt, pool, slot)); + RETURN_IF_ERROR(WriteStringSlot(query.redacted_sql, pool, slot)); break; case TQueryTableColumn::PLAN: RETURN_IF_ERROR(WriteStringSlot( diff --git a/testdata/workload_mgmt/redaction.json b/testdata/workload_mgmt/redaction.json new file mode 100644 index 000000000..3a086fcfe --- /dev/null +++ b/testdata/workload_mgmt/redaction.json @@ -0,0 +1,11 @@ +{ + "version": 1, + "rules": [ + { + "description": "Don't show nonnsense", + "caseSensitive": false, + "search": "supercalifragilisticexpialidocious", + "replace": "*poppins*" + } + ] +} diff --git a/tests/custom_cluster/test_query_live.py b/tests/custom_cluster/test_query_live.py index a0868d997..3175e544a 100644 --- a/tests/custom_cluster/test_query_live.py +++ b/tests/custom_cluster/test_query_live.py @@ -25,7 +25,7 @@ from signal import SIGRTMIN from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.impala_cluster import DEFAULT_KRPC_PORT from tests.util.retry import retry -from tests.util.workload_management import assert_query +from tests.util.workload_management import assert_query, redaction_rules_file from time import sleep @@ -225,6 +225,20 @@ class TestQueryLive(CustomClusterTestSuite): assert_query('sys.impala_query_live', self.client, 'test_query_live', result.runtime_profile) + @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt " + "--cluster_id=test_query_live " + "--redaction_rules_file={}" + .format(redaction_rules_file()), + catalogd_args="--enable_workload_mgmt", + disable_log_buffering=True) + def test_redaction(self): + """Asserts the query live table table redacts the statement.""" + result = self.client.execute( + "select *, 'supercalifragilisticexpialidocious' from functional.alltypes", + fetch_profile_after_close=True) + assert_query('sys.impala_query_live', self.client, 'test_query_live', + result.runtime_profile) + @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt " "--cluster_id=test_query_live", catalogd_args="--enable_workload_mgmt", diff --git a/tests/custom_cluster/test_query_log.py b/tests/custom_cluster/test_query_log.py index 31dcf26c8..1d780e7ec 100644 --- a/tests/custom_cluster/test_query_log.py +++ b/tests/custom_cluster/test_query_log.py @@ -34,7 +34,11 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.impala_test_suite import IMPALAD_HS2_HOST_PORT from tests.common.test_vector import ImpalaTestDimension from tests.util.retry import retry -from tests.util.workload_management import assert_query, WM_DB, QUERY_TBL_LOG +from tests.util.workload_management import ( + assert_query, + WM_DB, + QUERY_TBL_LOG, + redaction_rules_file) class TestQueryLogTableBase(CustomClusterTestSuite): @@ -410,6 +414,23 @@ class TestQueryLogTableBeeswax(TestQueryLogTableBase): finally: client2.close() + @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt " + "--query_log_write_interval_s=1 " + "--redaction_rules_file={}" + .format(redaction_rules_file()), + catalogd_args="--enable_workload_mgmt", + disable_log_buffering=True) + def test_redaction(self): + """Asserts the query log table redacts the statement.""" + result = self.client.execute( + "select *, 'supercalifragilisticexpialidocious' from functional.alltypes", + fetch_profile_after_close=True) + assert result.success + + self.cluster.get_first_impalad().service.wait_for_metric_value( + "impala-server.completed-queries.written", 1, 60) + assert_query(QUERY_TBL_LOG, self.client, raw_profile=result.runtime_profile) + class TestQueryLogOtherTable(TestQueryLogTableBase): """Tests to assert that query_log_table_name works with non-default value.""" diff --git a/tests/util/workload_management.py b/tests/util/workload_management.py index 9b2f567db..3a8a7c5b7 100644 --- a/tests/util/workload_management.py +++ b/tests/util/workload_management.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function +import os import re import requests @@ -733,3 +734,9 @@ def assert_csv_col(client, query_tbl, col, query_id, expected_list, db="tpcds"): "'{}'\n actual (length {}): {}\n expected (length {}): {}" \ .format(TQueryTableColumn._VALUES_TO_NAMES[col], query_id, expected, len(actual), sorted(actual), len(expected_list), sorted(expected_list)) + + +def redaction_rules_file(): + """Provides the path to a redaction file that redacts the word + 'supercalifragilisticexpialidocious'.""" + return "{}/testdata/workload_mgmt/redaction.json".format(os.environ["IMPALA_HOME"])
