This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1ff4e1b68298563bbcc2729066b11e0028254eb0
Author: Fang-Yu Rao <[email protected]>
AuthorDate: Wed Jul 23 12:10:03 2025 -0700

    IMPALA-13767: Do not treat CTEs as names of actual tables
    
    This patch implements an additional check when collecting table names
    that are used in the given query. Specifically, for a table name that
    is not fully qualified, we make sure the derived fully qualified table
    name is not a common table expression (CTE) of a SqlWithItem in a
    WITH clause since such CTE's are not actual tables.
    
    Testing:
     - Added a test in test_ranger.py to verify the issue is fixed.
    
    Change-Id: I3f51af42d64cdcff3c26ad5a96c7f53ebef431b3
    Reviewed-on: http://gerrit.cloudera.org:8080/23209
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Fang-Yu Rao <[email protected]>
---
 .../calcite/service/CalciteMetadataHandler.java    | 49 ++++++++++++++-
 tests/authorization/test_ranger.py                 | 69 ++++++++++++++++++----
 2 files changed, 104 insertions(+), 14 deletions(-)

diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
index ad2642948..3634be748 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
@@ -32,6 +32,8 @@ import org.apache.calcite.sql.SqlJoin;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlSelect;
+import org.apache.calcite.sql.SqlWith;
+import org.apache.calcite.sql.SqlWithItem;
 import org.apache.calcite.sql.util.SqlBasicVisitor;
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.StmtMetadataLoader;
@@ -58,6 +60,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
+import java.util.Stack;
 import java.util.stream.Collectors;
 
 import org.slf4j.Logger;
@@ -200,19 +203,46 @@ public class CalciteMetadataHandler implements 
CompilerStep {
     // will contain that table.
     public final List<String> errorTables_ = new ArrayList<>();
 
+    // This stack contains the sets of TableName's of the currently visited 
SqlWith
+    // nodes, with the set of TableName's of the most recently visited SqlWith 
node being
+    // the top of the stack.
+    public final Stack<Set<TableName>> withItemTableNames_ = new Stack<>();
+
     public TableVisitor(String currentDb) {
       this.currentDb_ = currentDb.toLowerCase();
     }
 
     @Override
     public Void visit(SqlCall call) {
+      if (call instanceof SqlWith) {
+        withItemTableNames_.push(new HashSet<>());
+      }
+
       if (call.getKind() == SqlKind.SELECT) {
         SqlSelect select = (SqlSelect) call;
         if (select.getFrom() != null) {
           tableNames_.addAll(getTableNames(select.getFrom()));
         }
       }
-      return super.visit(call);
+
+      if (call.getKind() == SqlKind.WITH_ITEM) {
+        TableName tableName = new TableName(this.currentDb_.toLowerCase(),
+            ((SqlWithItem) call).name.names.get(0).toLowerCase());
+        // Since a SqlWithItem node cannot exist without a SqlWith node, we 
can be sure
+        // the top stack element was added by the respective SqlWith node of 
this
+        // SqlWithItem. Adding 'tableName' to this stack element would allow 
us to
+        // determine in getTableNames() if a given TableName derived from a 
SqlIdentifier
+        // was registered via a SqlWithItem node of which the corresponding 
SqlWith node
+        // is an ancestor of the SqlIdentifier.
+        withItemTableNames_.peek().add(tableName);
+      }
+
+      Void v = super.visit(call);
+
+      if (call instanceof SqlWith) {
+        withItemTableNames_.pop();
+      }
+      return v;
     }
 
     private List<TableName> getTableNames(SqlNode fromNode) {
@@ -221,8 +251,14 @@ public class CalciteMetadataHandler implements 
CompilerStep {
         String tableName = fromNode.toString();
         List<String> parts = Splitter.on('.').splitToList(tableName);
         if (parts.size() == 1) {
-          localTableNames.add(new TableName(
-              currentDb_.toLowerCase(), parts.get(0).toLowerCase()));
+          TableName tableNameToAdd = new TableName(
+              currentDb_.toLowerCase(), parts.get(0).toLowerCase());
+          // Do not collect this table if 'tableNameToAdd' was already 
registered via
+          // a SqlWithItem node since in this case 'tableNameToAdd' is not an 
actual
+          // table.
+          if (!isRegisteredBySqlWithItem(tableNameToAdd)) {
+            localTableNames.add(tableNameToAdd);
+          }
         } else if (parts.size() == 2) {
           localTableNames.add(
               new TableName(parts.get(0).toLowerCase(), 
parts.get(1).toLowerCase()));
@@ -248,6 +284,13 @@ public class CalciteMetadataHandler implements 
CompilerStep {
       return localTableNames;
     }
 
+    private boolean isRegisteredBySqlWithItem(TableName tableName) {
+      for (Set<TableName> tableNames : withItemTableNames_) {
+        if (tableNames.contains(tableName)) return true;
+      }
+      return false;
+    }
+
     /**
      * Check if the error table is actually a table with a complex column. 
There is Impala
      * syntax where a complex column uses the same syntax in the FROM clause 
as a table.
diff --git a/tests/authorization/test_ranger.py 
b/tests/authorization/test_ranger.py
index 59896f5f2..fa72be612 100644
--- a/tests/authorization/test_ranger.py
+++ b/tests/authorization/test_ranger.py
@@ -1459,17 +1459,8 @@ class TestRanger(CustomClusterTestSuite):
         admin_client.execute("create database {0}".format(unique_database))
         admin_client.execute("create table {0}.{1} (id int, bigint_col bigint)"
             .format(unique_database, unique_table))
-        admin_client.execute("grant select on table functional.alltypes to 
user {0}"
-            .format(grantee_user))
         admin_client.execute("refresh authorization")
 
-        # Even though the user 'grantee_user' was granted the SELECT privilege 
on the
-        # table 'functional.alltypes', the user still could not execute the 
query due to
-        # IMPALA-13767.
-        result = self.execute_query_expect_failure(non_owner_client,
-            "with t as (select * from functional.alltypes) select * from t")
-        assert "{0} default.t".format(error_msg_prefix) in str(result)
-
         admin_client.execute("grant select (id) on table {0}.{1} to user {2}"
             .format(unique_database, unique_table, grantee_user))
         admin_client.execute("refresh authorization")
@@ -1490,8 +1481,6 @@ class TestRanger(CustomClusterTestSuite):
         # 'bigint_col', the query could be executed.
         non_owner_client.execute(test_select_query)
       finally:
-        admin_client.execute("revoke select on table functional.alltypes from 
user {0}"
-            .format(grantee_user))
         admin_client.execute("revoke select (id) on table {0}.{1} from user 
{2}"
             .format(unique_database, unique_table, grantee_user))
         admin_client.execute("revoke select (bigint_col) on table {0}.{1} from 
user {2}"
@@ -3658,5 +3647,63 @@ class TestRangerWithCalcite(TestRanger):
   def test_view_on_view_all_configs(self, unique_name):
     self._test_view_on_view_all_configs(unique_name)
 
+  @pytest.mark.execute_serially
   def test_table_masking_calcite_frontend(self, unique_name):
     self._test_table_masking_calcite_frontend(unique_name)
+
+  @pytest.mark.execute_serially
+  def test_cte(self):
+    """
+    This verifies the Calcite planner won't treat CTEs as names of actual 
tables.
+    """
+    with self.create_impala_client(user=ADMIN) as admin_client, \
+        self.create_impala_client(user=NON_OWNER) as non_owner_client:
+      # Set the query option of 'use_calcite_planner' to 1 to use the Calcite 
planner.
+      non_owner_client.set_configuration({"use_calcite_planner": 1})
+      database = "functional"
+      table_1 = database + "." + "alltypes"
+      table_2 = database + "." "alltypestiny"
+      test_query_1 = "with t as (select * from {0}) select * from 
t".format(table_1)
+      # A query that has a WITH clause involving more than one CTE.
+      test_query_2 = "with " \
+          "t1 as (select id from {0}), " \
+          "t2 as (select id from {1}) " \
+          "select * from t1, t2 where t1.id = t2.id".format(table_1, table_2)
+      # A query that has more than one WITH clause. A WITH clause is defined 
in the
+      # inline view 'v', and the outer CTE, i.e., 't1', is not referenced 
within 'v'.
+      test_query_3 = "with " \
+          "t1 as (select id from {0}) " \
+          "select v.id from " \
+          "(with t2 as (select id from {1}) select * from t2) v, t1 " \
+          "where v.id = t1.id".format(table_1, table_2)
+      # A query that has more than one WITH clause. A WITH clause is defined 
in the
+      # inline view 'v', and the outer CTE, i.e., 't1', is referenced within 
'v'.
+      test_query_4 = "with " \
+          "t1 as (select id from {0} where id < 2) " \
+          "select v.id from " \
+          "(with t2 as (select id from {1}) select * from t2, t1 " \
+          "where t2.id = t1.id) v".format(table_1, table_2)
+      error_classic_fe = "AnalysisException: duplicated inline view column 
alias: " \
+          "'id' in inline view 'v'"
+      try:
+        admin_client.execute("grant select on table {0} to user {1}"
+            .format(table_1, NON_OWNER))
+        admin_client.execute("grant select on table {0} to user {1}"
+            .format(table_2, NON_OWNER))
+        non_owner_client.execute(test_query_1)
+        non_owner_client.execute(test_query_2)
+        non_owner_client.execute(test_query_3)
+        non_owner_client.execute(test_query_4)
+
+        # Set the query option of 'use_calcite_planner' to 0 to use the 
classic frontend.
+        non_owner_client.set_configuration({"use_calcite_planner": 0})
+        # Impala's classic frontend supports 'test_query_3'.
+        non_owner_client.execute(test_query_3)
+        # Impala's classic frontend could not support 'test_query_4'.
+        result = self.execute_query_expect_failure(non_owner_client, 
test_query_4)
+        assert error_classic_fe in str(result)
+      finally:
+        admin_client.execute("revoke select on table {0} from user {1}"
+            .format(table_1, NON_OWNER))
+        admin_client.execute("revoke select on table {0} from user {1}"
+            .format(table_2, NON_OWNER))

Reply via email to