This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 62e08b8798f40c41508efb27381bdacad812af4c
Author: Michael Smith <[email protected]>
AuthorDate: Tue Jun 18 15:37:40 2024 -0700

    IMPALA-13166: Improve lookup of existing slot descriptors
    
    Improves lookup for existing SlotDescriptors created by another Analyzer
    by maintaining a map of Path to SlotDescriptor in TupleDescriptor. Only
    adds entries that are also inserted into slotPathMap_.
    
    Moves expensive lower-case slot path precondition to happen only when
    adding new slot descriptors.
    
    Speeds up PlannerTest#testManyExpressionPerformance by 1-2s (5-10%) in
    local testing.
    
    Change-Id: Id904ee8a641fb06ebca95b53ea29e78d120a2288
    Reviewed-on: http://gerrit.cloudera.org:8080/21531
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 .../java/org/apache/impala/analysis/Analyzer.java  | 25 +++++++---------------
 .../apache/impala/analysis/TupleDescriptor.java    | 19 ++++++++++++++++
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 4655c4091..270857332 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -663,10 +663,6 @@ public class Analyzer {
     // Shows how many zipping unnests were in the query;
     public int numZippingUnnests = 0;
 
-    // Ids of (collection-typed) SlotDescriptors that were registered with
-    // 'duplicateIfCollections=true' in 'Analyzer.registerSlotRef()'.
-    public Set<SlotId> duplicateCollectionSlots = new HashSet<>();
-
     public GlobalState(StmtTableCache stmtTableCache, TQueryCtx queryCtx,
         AuthorizationFactory authzFactory, AuthorizationContext authzCtx) {
       this.stmtTableCache = stmtTableCache;
@@ -1785,20 +1781,19 @@ public class Analyzer {
         // Register a new slot descriptor for collection types. The BE 
currently
         // relies on this behavior for setting unnested collection slots to 
NULL.
         SlotDescriptor res = createAndRegisterRawSlotDesc(slotPath, false);
-        globalState_.duplicateCollectionSlots.add(res.getId());
         return res;
       }
-      // SlotRefs with scalar or struct types are registered against the slot's
-      // fully-qualified lowercase path.
-      List<String> key = slotPath.getFullyQualifiedRawPath();
-      Preconditions.checkState(key.stream().allMatch(s -> 
s.equals(s.toLowerCase())),
-          "Slot paths should be lower case: " + key);
       SlotDescriptor existingSlotDesc = slotPathMap_.get(slotPath);
       if (existingSlotDesc != null) return existingSlotDesc;
 
       SlotDescriptor existingInTuple = findPathInCurrentTuple(slotPath);
       if (existingInTuple != null) return existingInTuple;
 
+      // SlotRefs with scalar or struct types are registered against the slot's
+      // fully-qualified lowercase path.
+      List<String> key = slotPath.getFullyQualifiedRawPath();
+      Preconditions.checkState(key.stream().allMatch(s -> 
s.equals(s.toLowerCase())),
+          "Slot paths should be lower case: " + key);
       return createAndRegisterSlotDesc(slotPath);
     }
   }
@@ -1817,13 +1812,7 @@ public class Analyzer {
     TupleDescriptor currentTupleDesc = tupleStack_.peek();
     Preconditions.checkNotNull(currentTupleDesc);
 
-    for (SlotDescriptor slotDesc : currentTupleDesc.getSlots()) {
-      if (slotPath.equals(slotDesc.getPath()) &&
-          !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) {
-        return slotDesc;
-      }
-    }
-    return null;
+    return currentTupleDesc.getUniqueSlotDescriptor(slotPath);
   }
 
   private SlotDescriptor createAndRegisterSlotDesc(Path slotPath)
@@ -1865,6 +1854,8 @@ public class Analyzer {
     desc.setPath(slotPath);
     if (insertIntoSlotPathMap) {
       slotPathMap_.put(slotPath, desc);
+      // Ensure parent TupleDescriptor can find desc by path.
+      desc.getParent().registerUniqueSlotDescriptorPath(desc);
     }
     registerColumnPrivReq(desc);
   }
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java 
b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index bbc73964d..c996abf8f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -81,6 +81,7 @@ public class TupleDescriptor {
   private final TupleId id_;
   private final String debugName_;  // debug-only
   private final List<SlotDescriptor> slots_ = new ArrayList<>();
+  private final Map<Path, SlotDescriptor> slotByPath_ = new HashMap<>();
 
   // Resolved path to the collection corresponding to this tuple descriptor, 
if any,
   // Only set for materialized tuples.
@@ -136,9 +137,27 @@ public class TupleDescriptor {
     slots_.add(desc);
   }
 
+  /**
+   * Marks SlotDescriptor as unique and makes it accessible by 
getUniqueSlotDescriptor.
+   */
+  public void registerUniqueSlotDescriptorPath(SlotDescriptor desc) {
+    Preconditions.checkState(desc.getPath() != null);
+    Preconditions.checkState(desc.getParent() == this);
+    SlotDescriptor oldDesc = slotByPath_.put(desc.getPath(), desc);
+    Preconditions.checkState(oldDesc == null, "SlotDescriptor must be unique");
+  }
+
   public TupleId getId() { return id_; }
   public List<SlotDescriptor> getSlots() { return slots_; }
 
+  /**
+   * Returns a unique slot descriptor for a given Path. For collections that 
can have
+   * multiple descriptors for a Path, returns null.
+   */
+  public SlotDescriptor getUniqueSlotDescriptor(Path p) {
+    return slotByPath_.get(p);
+  }
+
   /**
    * Returns the slots in this 'TupleDescriptor' and if any slot is a struct 
slot, returns
    * their slots as well, recursively. Does not descend into collections, only 
structs.

Reply via email to