This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1ff4e1b68298563bbcc2729066b11e0028254eb0 Author: Fang-Yu Rao <[email protected]> AuthorDate: Wed Jul 23 12:10:03 2025 -0700 IMPALA-13767: Do not treat CTEs as names of actual tables This patch implements an additional check when collecting table names that are used in the given query. Specifically, for a table name that is not fully qualified, we make sure the derived fully qualified table name is not a common table expression (CTE) of a SqlWithItem in a WITH clause since such CTE's are not actual tables. Testing: - Added a test in test_ranger.py to verify the issue is fixed. Change-Id: I3f51af42d64cdcff3c26ad5a96c7f53ebef431b3 Reviewed-on: http://gerrit.cloudera.org:8080/23209 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Fang-Yu Rao <[email protected]> --- .../calcite/service/CalciteMetadataHandler.java | 49 ++++++++++++++- tests/authorization/test_ranger.py | 69 ++++++++++++++++++---- 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java index ad2642948..3634be748 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java @@ -32,6 +32,8 @@ import org.apache.calcite.sql.SqlJoin; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; import org.apache.calcite.sql.util.SqlBasicVisitor; import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.StmtMetadataLoader; @@ -58,6 +60,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.Stack; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -200,19 +203,46 @@ public class CalciteMetadataHandler implements CompilerStep { // will contain that table. public final List<String> errorTables_ = new ArrayList<>(); + // This stack contains the sets of TableName's of the currently visited SqlWith + // nodes, with the set of TableName's of the most recently visited SqlWith node being + // the top of the stack. + public final Stack<Set<TableName>> withItemTableNames_ = new Stack<>(); + public TableVisitor(String currentDb) { this.currentDb_ = currentDb.toLowerCase(); } @Override public Void visit(SqlCall call) { + if (call instanceof SqlWith) { + withItemTableNames_.push(new HashSet<>()); + } + if (call.getKind() == SqlKind.SELECT) { SqlSelect select = (SqlSelect) call; if (select.getFrom() != null) { tableNames_.addAll(getTableNames(select.getFrom())); } } - return super.visit(call); + + if (call.getKind() == SqlKind.WITH_ITEM) { + TableName tableName = new TableName(this.currentDb_.toLowerCase(), + ((SqlWithItem) call).name.names.get(0).toLowerCase()); + // Since a SqlWithItem node cannot exist without a SqlWith node, we can be sure + // the top stack element was added by the respective SqlWith node of this + // SqlWithItem. Adding 'tableName' to this stack element would allow us to + // determine in getTableNames() if a given TableName derived from a SqlIdentifier + // was registered via a SqlWithItem node of which the corresponding SqlWith node + // is an ancestor of the SqlIdentifier. + withItemTableNames_.peek().add(tableName); + } + + Void v = super.visit(call); + + if (call instanceof SqlWith) { + withItemTableNames_.pop(); + } + return v; } private List<TableName> getTableNames(SqlNode fromNode) { @@ -221,8 +251,14 @@ public class CalciteMetadataHandler implements CompilerStep { String tableName = fromNode.toString(); List<String> parts = Splitter.on('.').splitToList(tableName); if (parts.size() == 1) { - localTableNames.add(new TableName( - currentDb_.toLowerCase(), parts.get(0).toLowerCase())); + TableName tableNameToAdd = new TableName( + currentDb_.toLowerCase(), parts.get(0).toLowerCase()); + // Do not collect this table if 'tableNameToAdd' was already registered via + // a SqlWithItem node since in this case 'tableNameToAdd' is not an actual + // table. + if (!isRegisteredBySqlWithItem(tableNameToAdd)) { + localTableNames.add(tableNameToAdd); + } } else if (parts.size() == 2) { localTableNames.add( new TableName(parts.get(0).toLowerCase(), parts.get(1).toLowerCase())); @@ -248,6 +284,13 @@ public class CalciteMetadataHandler implements CompilerStep { return localTableNames; } + private boolean isRegisteredBySqlWithItem(TableName tableName) { + for (Set<TableName> tableNames : withItemTableNames_) { + if (tableNames.contains(tableName)) return true; + } + return false; + } + /** * Check if the error table is actually a table with a complex column. There is Impala * syntax where a complex column uses the same syntax in the FROM clause as a table. diff --git a/tests/authorization/test_ranger.py b/tests/authorization/test_ranger.py index 59896f5f2..fa72be612 100644 --- a/tests/authorization/test_ranger.py +++ b/tests/authorization/test_ranger.py @@ -1459,17 +1459,8 @@ class TestRanger(CustomClusterTestSuite): admin_client.execute("create database {0}".format(unique_database)) admin_client.execute("create table {0}.{1} (id int, bigint_col bigint)" .format(unique_database, unique_table)) - admin_client.execute("grant select on table functional.alltypes to user {0}" - .format(grantee_user)) admin_client.execute("refresh authorization") - # Even though the user 'grantee_user' was granted the SELECT privilege on the - # table 'functional.alltypes', the user still could not execute the query due to - # IMPALA-13767. - result = self.execute_query_expect_failure(non_owner_client, - "with t as (select * from functional.alltypes) select * from t") - assert "{0} default.t".format(error_msg_prefix) in str(result) - admin_client.execute("grant select (id) on table {0}.{1} to user {2}" .format(unique_database, unique_table, grantee_user)) admin_client.execute("refresh authorization") @@ -1490,8 +1481,6 @@ class TestRanger(CustomClusterTestSuite): # 'bigint_col', the query could be executed. non_owner_client.execute(test_select_query) finally: - admin_client.execute("revoke select on table functional.alltypes from user {0}" - .format(grantee_user)) admin_client.execute("revoke select (id) on table {0}.{1} from user {2}" .format(unique_database, unique_table, grantee_user)) admin_client.execute("revoke select (bigint_col) on table {0}.{1} from user {2}" @@ -3658,5 +3647,63 @@ class TestRangerWithCalcite(TestRanger): def test_view_on_view_all_configs(self, unique_name): self._test_view_on_view_all_configs(unique_name) + @pytest.mark.execute_serially def test_table_masking_calcite_frontend(self, unique_name): self._test_table_masking_calcite_frontend(unique_name) + + @pytest.mark.execute_serially + def test_cte(self): + """ + This verifies the Calcite planner won't treat CTEs as names of actual tables. + """ + with self.create_impala_client(user=ADMIN) as admin_client, \ + self.create_impala_client(user=NON_OWNER) as non_owner_client: + # Set the query option of 'use_calcite_planner' to 1 to use the Calcite planner. + non_owner_client.set_configuration({"use_calcite_planner": 1}) + database = "functional" + table_1 = database + "." + "alltypes" + table_2 = database + "." "alltypestiny" + test_query_1 = "with t as (select * from {0}) select * from t".format(table_1) + # A query that has a WITH clause involving more than one CTE. + test_query_2 = "with " \ + "t1 as (select id from {0}), " \ + "t2 as (select id from {1}) " \ + "select * from t1, t2 where t1.id = t2.id".format(table_1, table_2) + # A query that has more than one WITH clause. A WITH clause is defined in the + # inline view 'v', and the outer CTE, i.e., 't1', is not referenced within 'v'. + test_query_3 = "with " \ + "t1 as (select id from {0}) " \ + "select v.id from " \ + "(with t2 as (select id from {1}) select * from t2) v, t1 " \ + "where v.id = t1.id".format(table_1, table_2) + # A query that has more than one WITH clause. A WITH clause is defined in the + # inline view 'v', and the outer CTE, i.e., 't1', is referenced within 'v'. + test_query_4 = "with " \ + "t1 as (select id from {0} where id < 2) " \ + "select v.id from " \ + "(with t2 as (select id from {1}) select * from t2, t1 " \ + "where t2.id = t1.id) v".format(table_1, table_2) + error_classic_fe = "AnalysisException: duplicated inline view column alias: " \ + "'id' in inline view 'v'" + try: + admin_client.execute("grant select on table {0} to user {1}" + .format(table_1, NON_OWNER)) + admin_client.execute("grant select on table {0} to user {1}" + .format(table_2, NON_OWNER)) + non_owner_client.execute(test_query_1) + non_owner_client.execute(test_query_2) + non_owner_client.execute(test_query_3) + non_owner_client.execute(test_query_4) + + # Set the query option of 'use_calcite_planner' to 0 to use the classic frontend. + non_owner_client.set_configuration({"use_calcite_planner": 0}) + # Impala's classic frontend supports 'test_query_3'. + non_owner_client.execute(test_query_3) + # Impala's classic frontend could not support 'test_query_4'. + result = self.execute_query_expect_failure(non_owner_client, test_query_4) + assert error_classic_fe in str(result) + finally: + admin_client.execute("revoke select on table {0} from user {1}" + .format(table_1, NON_OWNER)) + admin_client.execute("revoke select on table {0} from user {1}" + .format(table_2, NON_OWNER))
