This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3dac0135fba0717dd977043e7ecc6b52bf55189f
Author: stiga-huang <[email protected]>
AuthorDate: Tue Jan 27 22:24:02 2026 +0800

    IMPALA-14703: Improves finding column masking policies of a table
    
    When Ranger authorization is enabled, if a user wants to update masked
    data, it should be blocked. This is done by checking if there are any
    column masking or row filtering policies on the table enabled for the
    user. Currently we iterate all the columns of the table and check if a
    masking policy exists on the column. This is inefficent, especially for
    wide tables. It also requires metadata of the table is loaded to get the
    column list, which introduces a performance regression for INVALIDATE
    and REFRESH statements that previously don't trigger metadata loading.
    
    This patch improves the check to make the request on table level with a
    resource matching scope of SELF_OR_DESCENDANTS. By using this, ranger
    plugin will return the first matching column masking policy in
    evalDataMaskPolicies().
    
    As we don't need the column list now, table loading triggered by
    INVALIDATE and REFRESH statements are also removed.
    
    Tests
     - Ran test_block_metadata_update and data masking tests in
       test_ranger.py
    
    Change-Id: Ic8ab88b7cfd4f7e156c4eead53a2ff3086b1daa6
    Reviewed-on: http://gerrit.cloudera.org:8080/23908
    Reviewed-by: Csaba Ringhofer <[email protected]>
    Reviewed-by: Fang-Yu Rao <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/analysis/StmtMetadataLoader.java | 14 +-----
 .../impala/authorization/AuthorizableTable.java    | 13 ------
 .../authorization/BaseAuthorizationChecker.java    | 24 ----------
 .../ranger/RangerAuthorizationChecker.java         | 52 +++++++++-------------
 4 files changed, 22 insertions(+), 81 deletions(-)

diff --git 
a/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java 
b/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java
index e13695367..b055fcecd 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java
@@ -485,19 +485,7 @@ public class StmtMetadataLoader {
   public Set<TableName> collectTableCandidates(StatementBase stmt) {
     Preconditions.checkNotNull(stmt);
     List<TableRef> tblRefs = new ArrayList<>();
-    // The information about whether table masking is supported is not 
available to
-    // ResetMetadataStmt so we collect the TableRef for ResetMetadataStmt 
whenever
-    // applicable. Skip this if allow_catalog_cache_op_from_masked_users=true 
because
-    // we don't need column info for fetching column-masking policies.
-    if (stmt instanceof ResetMetadataStmt
-        && fe_.getAuthzFactory().getAuthorizationConfig().isEnabled()
-        && fe_.getAuthzFactory().supportsTableMasking()
-        && !BackendConfig.INSTANCE.allowCatalogCacheOpFromMaskedUsers()) {
-      TableName tableName = ((ResetMetadataStmt) stmt).getTableName();
-      if (tableName != null) tblRefs.add(new TableRef(tableName.toPath(), 
null));
-    } else {
-      stmt.collectTableRefs(tblRefs);
-    }
+    stmt.collectTableRefs(tblRefs);
     Set<TableName> tableNames = new HashSet<>();
     for (TableRef ref: tblRefs) {
       tableNames.addAll(Path.getCandidateTables(ref.getPath(), sessionDb_));
diff --git 
a/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java 
b/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java
index b670438af..3cfc8ad86 100644
--- a/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java
+++ b/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java
@@ -22,9 +22,6 @@ import javax.annotation.Nullable;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 
-import java.util.ArrayList;
-import java.util.List;
-
 /**
  * A class to authorize access to a table.
  */
@@ -33,7 +30,6 @@ public class AuthorizableTable extends Authorizable {
   private final String tableName_;
   @Nullable // Is null if the owner is not set.
   private final String ownerUser_;
-  private final List<String> columns_ = new ArrayList<>();
 
   public AuthorizableTable(String dbName, String tableName, @Nullable String 
ownerUser) {
     Preconditions.checkArgument(!Strings.isNullOrEmpty(dbName));
@@ -60,13 +56,4 @@ public class AuthorizableTable extends Authorizable {
 
   @Override
   public String getOwnerUser() { return ownerUser_; }
-
-  public void setColumns(List<String> columns) {
-    columns_.clear();
-    columns_.addAll(columns);
-  }
-
-  public List<String> getColumns() {
-    return columns_;
-  }
 }
diff --git 
a/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
 
b/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
index aa8123dcc..b2b3980cc 100644
--- 
a/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
+++ 
b/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
@@ -216,30 +216,6 @@ public abstract class BaseAuthorizationChecker implements 
AuthorizationChecker {
     if (dbName != null && checkSystemDbAccess(catalog, dbName, 
request.getPrivilege())) {
       return;
     }
-    // Populate column names to check column masking policies in blocking 
updates.
-    // No need to do this for REFRESH if 
allow_catalog_cache_op_from_masked_users=true.
-    // Note that db.getTable() could be a heavy operation in local catalog 
mode since it
-    // triggers metadata loading on the table if it's unloaded in catalogd. 
Skipping this
-    // improves the performance of "INVALIDATE METADATA <table>" statements. 
For REFRESH
-    // statements, the performance doesn't differ a lot since there are other 
places that
-    // use db.getTable() (see IMPALA-12591).
-    if (config_.isEnabled() && request.getAuthorizable() != null
-        && request.getAuthorizable().getType() == Type.TABLE
-        && (request.getPrivilege() != Privilege.REFRESH
-          || !BackendConfig.INSTANCE.allowCatalogCacheOpFromMaskedUsers())) {
-      Preconditions.checkNotNull(dbName);
-      AuthorizableTable authorizableTable = (AuthorizableTable) 
request.getAuthorizable();
-      FeDb db = catalog.getDb(dbName);
-      if (db != null) {
-        // 'db', 'table' could be null for an unresolved table ref. 'table' 
could be
-        // null for target table of a CTAS statement. Don't need to populate 
column
-        // names in such cases since no column masking policies will be 
checked.
-        FeTable table = db.getTable(authorizableTable.getTableName());
-        if (table != null && !(table instanceof FeIncompleteTable)) {
-          authorizableTable.setColumns(table.getColumnNames());
-        }
-      }
-    }
     checkAccess(authzCtx, analysisResult.getAnalyzer().getUser(), request);
   }
 
diff --git 
a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
 
b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
index 4f9bdb18e..f062b6240 100644
--- 
a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
+++ 
b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
@@ -491,13 +491,18 @@ public class RangerAuthorizationChecker extends 
BaseAuthorizationChecker {
       String tableName, String columnName, RangerBufferAuditHandler 
auditHandler)
       throws InternalException {
     Preconditions.checkNotNull(user);
-    RangerAccessResourceImpl resource = new RangerImpalaResourceBuilder()
-        .database(dbName)
-        .table(tableName)
-        .column(columnName)
-        .build();
     RangerAccessRequestImpl req = new RangerAccessRequestImpl();
-    req.setResource(resource);
+    RangerImpalaResourceBuilder builder = new RangerImpalaResourceBuilder()
+        .database(dbName)
+        .table(tableName);
+    if (columnName != null) {
+      builder.column(columnName);
+    } else {
+      // If no column is given, find any column masking policy on the table.
+      req.setResourceMatchingScope(
+          RangerAccessRequest.ResourceMatchingScope.SELF_OR_DESCENDANTS);
+    }
+    req.setResource(builder.build());
     req.setAccessType(SELECT_ACCESS_TYPE);
     req.setUser(user.getShortName());
     req.setUserGroups(getUserGroups(user));
@@ -714,7 +719,7 @@ public class RangerAuthorizationChecker extends 
BaseAuthorizationChecker {
       RangerAccessResult rowFilterResult = plugin_.evalRowFilterPolicies(
           request, /*resultProcessor*/null);
       if (rowFilterResult != null && rowFilterResult.isRowFilterEnabled()) {
-        LOG.trace("Deny {} on {} due to row filtering policy {}",
+        LOG.info("Deny {} on {} due to row filtering policy {}",
             privilege, authorizable.getName(), rowFilterResult.getPolicyId());
         accessResult.setIsAllowed(false);
         accessResult.setPolicyId(rowFilterResult.getPolicyId());
@@ -725,30 +730,15 @@ public class RangerAuthorizationChecker extends 
BaseAuthorizationChecker {
     }
     // Check if masking is enabled for any column in the table/view.
     if (accessResult.getIsAllowed()) {
-      List<String> columns;
-      if (authorizable.getType() == Type.TABLE) {
-        // Check all columns.
-        columns = ((AuthorizableTable) authorizable).getColumns();
-        LOG.trace("Checking mask policies on {} columns of table {}", 
columns.size(),
-            authorizable.getFullTableName());
-      } else {
-        columns = Lists.newArrayList(authorizable.getColumnName());
-      }
-      for (String column : columns) {
-        RangerAccessResult columnMaskResult = evalColumnMask(user,
-            authorizable.getDbName(), authorizable.getTableName(), column,
-            /*auditHandler*/null);
-        if (columnMaskResult != null && columnMaskResult.isMaskEnabled()) {
-          LOG.trace("Deny {} on {} due to column masking policy {}",
-              privilege, authorizable.getName(), 
columnMaskResult.getPolicyId());
-          accessResult.setIsAllowed(false);
-          accessResult.setPolicyId(columnMaskResult.getPolicyId());
-          accessResult.setReason("User does not have access to unmasked column 
values");
-          break;
-        } else {
-          LOG.trace("No column masking policy found on column {} of {}.", 
column,
-              authorizable.getFullTableName());
-        }
+      RangerAccessResult columnMaskResult = evalColumnMask(user,
+          authorizable.getDbName(), authorizable.getTableName(), 
/*columnName*/null,
+          /*auditHandler*/null);
+      if (columnMaskResult != null && columnMaskResult.isMaskEnabled()) {
+        LOG.info("Deny {} on {} due to column masking policy {}",
+            privilege, authorizable.getName(), columnMaskResult.getPolicyId());
+        accessResult.setIsAllowed(false);
+        accessResult.setPolicyId(columnMaskResult.getPolicyId());
+        accessResult.setReason("User does not have access to unmasked column 
values");
       }
     }
     // Set back the original access type. The request object is still 
referenced by the

Reply via email to