This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 3dd5c9e66 IMPALA-3880: Add list of all tables queried to runtime
profile
3dd5c9e66 is described below
commit 3dd5c9e661c45f1793f3ca4d86e9ab83d12e8b3b
Author: Peter Rozsa <[email protected]>
AuthorDate: Thu Jan 5 10:32:38 2023 +0100
IMPALA-3880: Add list of all tables queried to runtime profile
This change adds a new info string to the frontend runtime profile
which contains the referenced tables by the query in a
comma-separated format.
Tests:
- Added tests to check if the referenced tables are enumerated
correctly
- Added test to check if referenced table is filled properly with
different DLM statements
Change-Id: Ib474a5c6522032679701103aa225a18edca62f5a
Reviewed-on: http://gerrit.cloudera.org:8080/19401
Reviewed-by: Daniel Becker <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../java/org/apache/impala/service/Frontend.java | 7 +++
tests/query_test/test_observability.py | 1 +
tests/shell/test_shell_commandline.py | 57 +++++++++++++++++++++-
3 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java
b/fe/src/main/java/org/apache/impala/service/Frontend.java
index c28398e77..df89ce079 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -2054,6 +2054,13 @@ public class Frontend {
//TODO (IMPALA-8788): should load table write ids in transaction context.
StmtTableCache stmtTableCache = metadataLoader.loadTables(stmt);
+ // Add referenced tables to frontend profile
+ FrontendProfile.getCurrent().addInfoString("Referenced Tables",
+ stmtTableCache.tables.keySet()
+ .stream()
+ .map(TableName::toString)
+ .collect(Collectors.joining(", ")));
+
// Analyze and authorize stmt
AnalysisContext analysisCtx = new AnalysisContext(queryCtx, authzFactory_,
timeline);
AnalysisResult analysisResult = analysisCtx.analyzeAndAuthorize(stmt,
stmtTableCache,
diff --git a/tests/query_test/test_observability.py
b/tests/query_test/test_observability.py
index a92347f88..25bc2347e 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -288,6 +288,7 @@ class TestObservability(ImpalaTestSuite):
else:
load_event_regexes = [
r'Frontend:',
+ r'Referenced Tables:',
r'CatalogFetch.ColumnStats.Hits',
r'CatalogFetch.ColumnStats.Misses',
r'CatalogFetch.ColumnStats.Requests',
diff --git a/tests/shell/test_shell_commandline.py
b/tests/shell/test_shell_commandline.py
index c91c31797..d097ccdaf 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -374,7 +374,7 @@ class TestImpalaShell(ImpalaTestSuite):
def test_runtime_profile(self, vector):
if vector.get_value('strict_hs2_protocol'):
- pytest.skip("Runtime profile not support in strict hs2 mode.")
+ pytest.skip("Runtime profile is not supported in strict hs2 mode.")
# test summary is in both the profile printed by the
# -p option and the one printed by the profile command
args = ['-p', '-q', 'select 1; profile;']
@@ -385,6 +385,61 @@ class TestImpalaShell(ImpalaTestSuite):
assert len(re.findall(regex, result_set.stdout)) == 2, \
"Could not detect two profiles, stdout: %s" % result_set.stdout
+ def test_runtime_profile_referenced_tables(self, vector, unique_database):
+ if vector.get_value('strict_hs2_protocol'):
+ pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+ db = unique_database
+ base_args = ['-p', '-q']
+
+ statements = ['select id from %s.shell_profile_test' % db,
+ 'alter table %s.shell_profile_test add column b int' % db,
+ 'insert into %s.shell_profile_test(id) values (1)' % db,
+ 'truncate table %s.shell_profile_test' % db,
+ 'drop table %s.shell_profile_test' % db]
+
+ args = base_args + ['create table %s.shell_profile_test (id int)' % db]
+ create = run_impala_shell_cmd(vector, args)
+ assert "Referenced Tables: \n" in create.stdout
+
+ for statement in statements:
+ args = base_args + [statement]
+ result = run_impala_shell_cmd(vector, args)
+ assert "Referenced Tables: %s.shell_profile_test" % unique_database in
result.stdout
+
+ def test_runtime_profile_multiple_referenced_tables(self, vector,
unique_database):
+ if vector.get_value('strict_hs2_protocol'):
+ pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+
+ def get_referenced_tables(profile):
+ return re.findall(r'Referenced Tables: (.*)', profile)[0].split(', ')
+
+ db = unique_database
+ base_args = ['-p', '-q']
+
+ for i in range(0, 2):
+ args = base_args + ['create table %s.shell_profile_test%d (id int)' %
(db, i)]
+ run_impala_shell_cmd(vector, args)
+
+ args = base_args + ["select * from {db}.shell_profile_test0 t0 inner join "
+ "{db}.shell_profile_test1 t1 on t0.id =
t1.id".format(db=db)]
+ result = run_impala_shell_cmd(vector, args)
+ referenced_tables = get_referenced_tables(result.stdout)
+
+ assert len(referenced_tables) == 2
+ for i in range(0, 2):
+ assert "{db}.shell_profile_test{index}".format(db=db, index=i) in
referenced_tables
+
+ args = base_args + ["select * from {db}.shell_profile_test0 t0 inner join "
+ "{db}.shell_profile_test1 t1 on t0.id = t1.id inner
join "
+ "{db}.shell_profile_test1 t11 on t0.id =
t11.id".format(db=db)]
+
+ result = run_impala_shell_cmd(vector, args)
+ referenced_tables = get_referenced_tables(result.stdout)
+
+ assert len(referenced_tables) == 2
+ for i in range(0, 2):
+ assert "{db}.shell_profile_test{index}".format(db=db, index=i) in
referenced_tables
+
def test_summary(self, vector):
if vector.get_value('strict_hs2_protocol'):
pytest.skip("Summary not supported in strict hs2 mode.")