This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 62e08b8798f40c41508efb27381bdacad812af4c Author: Michael Smith <[email protected]> AuthorDate: Tue Jun 18 15:37:40 2024 -0700 IMPALA-13166: Improve lookup of existing slot descriptors Improves lookup for existing SlotDescriptors created by another Analyzer by maintaining a map of Path to SlotDescriptor in TupleDescriptor. Only adds entries that are also inserted into slotPathMap_. Moves expensive lower-case slot path precondition to happen only when adding new slot descriptors. Speeds up PlannerTest#testManyExpressionPerformance by 1-2s (5-10%) in local testing. Change-Id: Id904ee8a641fb06ebca95b53ea29e78d120a2288 Reviewed-on: http://gerrit.cloudera.org:8080/21531 Reviewed-by: Michael Smith <[email protected]> Tested-by: Michael Smith <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 25 +++++++--------------- .../apache/impala/analysis/TupleDescriptor.java | 19 ++++++++++++++++ 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 4655c4091..270857332 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -663,10 +663,6 @@ public class Analyzer { // Shows how many zipping unnests were in the query; public int numZippingUnnests = 0; - // Ids of (collection-typed) SlotDescriptors that were registered with - // 'duplicateIfCollections=true' in 'Analyzer.registerSlotRef()'. - public Set<SlotId> duplicateCollectionSlots = new HashSet<>(); - public GlobalState(StmtTableCache stmtTableCache, TQueryCtx queryCtx, AuthorizationFactory authzFactory, AuthorizationContext authzCtx) { this.stmtTableCache = stmtTableCache; @@ -1785,20 +1781,19 @@ public class Analyzer { // Register a new slot descriptor for collection types. The BE currently // relies on this behavior for setting unnested collection slots to NULL. SlotDescriptor res = createAndRegisterRawSlotDesc(slotPath, false); - globalState_.duplicateCollectionSlots.add(res.getId()); return res; } - // SlotRefs with scalar or struct types are registered against the slot's - // fully-qualified lowercase path. - List<String> key = slotPath.getFullyQualifiedRawPath(); - Preconditions.checkState(key.stream().allMatch(s -> s.equals(s.toLowerCase())), - "Slot paths should be lower case: " + key); SlotDescriptor existingSlotDesc = slotPathMap_.get(slotPath); if (existingSlotDesc != null) return existingSlotDesc; SlotDescriptor existingInTuple = findPathInCurrentTuple(slotPath); if (existingInTuple != null) return existingInTuple; + // SlotRefs with scalar or struct types are registered against the slot's + // fully-qualified lowercase path. + List<String> key = slotPath.getFullyQualifiedRawPath(); + Preconditions.checkState(key.stream().allMatch(s -> s.equals(s.toLowerCase())), + "Slot paths should be lower case: " + key); return createAndRegisterSlotDesc(slotPath); } } @@ -1817,13 +1812,7 @@ public class Analyzer { TupleDescriptor currentTupleDesc = tupleStack_.peek(); Preconditions.checkNotNull(currentTupleDesc); - for (SlotDescriptor slotDesc : currentTupleDesc.getSlots()) { - if (slotPath.equals(slotDesc.getPath()) && - !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) { - return slotDesc; - } - } - return null; + return currentTupleDesc.getUniqueSlotDescriptor(slotPath); } private SlotDescriptor createAndRegisterSlotDesc(Path slotPath) @@ -1865,6 +1854,8 @@ public class Analyzer { desc.setPath(slotPath); if (insertIntoSlotPathMap) { slotPathMap_.put(slotPath, desc); + // Ensure parent TupleDescriptor can find desc by path. + desc.getParent().registerUniqueSlotDescriptorPath(desc); } registerColumnPrivReq(desc); } diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java index bbc73964d..c996abf8f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java +++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java @@ -81,6 +81,7 @@ public class TupleDescriptor { private final TupleId id_; private final String debugName_; // debug-only private final List<SlotDescriptor> slots_ = new ArrayList<>(); + private final Map<Path, SlotDescriptor> slotByPath_ = new HashMap<>(); // Resolved path to the collection corresponding to this tuple descriptor, if any, // Only set for materialized tuples. @@ -136,9 +137,27 @@ public class TupleDescriptor { slots_.add(desc); } + /** + * Marks SlotDescriptor as unique and makes it accessible by getUniqueSlotDescriptor. + */ + public void registerUniqueSlotDescriptorPath(SlotDescriptor desc) { + Preconditions.checkState(desc.getPath() != null); + Preconditions.checkState(desc.getParent() == this); + SlotDescriptor oldDesc = slotByPath_.put(desc.getPath(), desc); + Preconditions.checkState(oldDesc == null, "SlotDescriptor must be unique"); + } + public TupleId getId() { return id_; } public List<SlotDescriptor> getSlots() { return slots_; } + /** + * Returns a unique slot descriptor for a given Path. For collections that can have + * multiple descriptors for a Path, returns null. + */ + public SlotDescriptor getUniqueSlotDescriptor(Path p) { + return slotByPath_.get(p); + } + /** * Returns the slots in this 'TupleDescriptor' and if any slot is a struct slot, returns * their slots as well, recursively. Does not descend into collections, only structs.
