This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new ca9b08372 IMPALA-11685: Slot memory sharing between struct and field 
not working if the field is also a struct
ca9b08372 is described below

commit ca9b08372556ac2010348b7c978c350e538b7b2c
Author: Daniel Becker <[email protected]>
AuthorDate: Tue Oct 25 17:28:37 2022 +0200

    IMPALA-11685: Slot memory sharing between struct and field not working
    if the field is also a struct
    
    IMPALA-10838 introduced that if a struct and one of its fields are both
    present in the select list, no extra slot is generated in the row for
    the struct field but the memory of the struct is reused, i.e. the row
    size is the same as when only the struct is queried. It works when the
    struct field is a primitive type:
    
    explain select id, outer_struct from
    functional_orc_def.complextypes_nested_structs;
    row-size=64B
    
    explain select id, outer_struct, outer_struct.str from
    functional_orc_def.complextypes_nested_structs;
    row-size=64B
    
    However, it does not if the child is itself a struct:
    
    explain select id, outer_struct, outer_struct.inner_struct3 from
    functional_orc_def.complextypes_nested_structs;
    row-size=80B
    
    This is because struct slot descriptors are registered before others so
    that it is easier to reuse the slot memory of the struct fields, but
    struct slot descriptors among themselves are sorted in the wrong order
    (see
    
https://github.com/apache/impala/blob/c12ac6c27b2df1eae693b44c157d65499f491d21/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java#L340).
    
    Also, because struct children are also inserted into the Analyzer's
    slotPathMap_ when the struct is registered, there is no need for the
    mechanism that tries to find ancestors of fields to be able to share
    their slot memory - they can be retrieved from the slotPathMap_. This
    change deletes the code that dealt with that.
    
    Testing:
     - PlannerTest#testStructFieldSlotSharedWithStruct has been updated to
       include queries where the struct field is also a struct; the elements
       in the select list are permutated to make sure the order in which
       they are listed does not matter.
    
    Change-Id: I6d4dee3941fb2d285fbd3836ea5712c859db8848
    Reviewed-on: http://gerrit.cloudera.org:8080/19167
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/analysis/Analyzer.java  | 124 ---------------------
 .../main/java/org/apache/impala/analysis/Path.java |  20 ----
 .../org/apache/impala/analysis/SelectStmt.java     |   5 +-
 .../org/apache/impala/planner/PlannerTest.java     |  39 ++++---
 4 files changed, 27 insertions(+), 161 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 831d35d73..d85218be8 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1474,13 +1474,6 @@ public class Analyzer {
       return result;
     }
 
-    final int highestAncestorDistance = 
getHighestLevelAncestorDistance(slotPath);
-    if (highestAncestorDistance > 0 && isOnlyWithinStructs(slotPath)) {
-      // 'slotPath' is within a struct that is also needed in the query.
-      return registerSlotRefWithinStruct(slotPath, highestAncestorDistance);
-    }
-
-    Preconditions.checkState(highestAncestorDistance == 0);
     if (slotPath.destType().isStructType()) {
       // 'slotPath' refers to a struct that has no ancestor that is also 
needed in the
       // query. We also create the child slot descriptors.
@@ -1493,87 +1486,6 @@ public class Analyzer {
     return result;
   }
 
-  // Returns whether all ancestors of 'slotPath' are structs (and for example 
not arrays).
-  private static boolean isOnlyWithinStructs(Path slotPath) {
-    List<Type> matchedTypes = slotPath.getMatchedTypes();
-    List<Type> ancestorTypes = matchedTypes.subList(0, matchedTypes.size() - 
1);
-    for (Type ancestorType : ancestorTypes) {
-      if (!ancestorType.isStructType()) return false;
-    }
-    return true;
-  }
-
-  /**
-   * If there is
-   *   a) a struct expression and
-   *   b) another expression that is a (possibly multiply) embedded field in 
the
-   *      struct expresssion
-   * in the select list or any other part of the query (WHERE clause, sorting 
etc.), we
-   * would like the tuple memory corresponding to the embedded expression to 
be shared
-   * between the struct expression and the embedded expression. Therefore, if 
we get the
-   * path of the embedded expression, we should return a slot descriptor that 
is within
-   * the tree of the struct expression.
-   *
-   * This is easy if we encounter the path of the struct expression first and 
the path of
-   * the embedded expression later: when we encounter the path of the embedded 
expression
-   * we simply traverse the tree of slot and tuple descriptors belonging to 
the struct
-   * expression and return the 'SlotDescriptor' corresponding to the embedded 
expression.
-   *
-   * If the order was reversed, the 'SlotDescriptor' for the ancestor struct 
wouldn't yet
-   * exist at the time we encountered the embedded expression. Therefore, in 
the
-   * 'SelectStmt' class we make sure that all struct paths are registered in 
descending
-   * order of raw path length (meaning the number of path elements, not string 
length):
-   * see 'SelectStmt.registerStructSlotRefPathsWithAnalyzer()'.
-   *
-   * If we encounter a path that is within a struct, we find the highest level 
enclosing
-   * struct that is needed in the query. This is not necessarily the top level 
struct
-   * enclosing the embedded field as that may not be used in the query - take 
the
-   * following example:
-   *     outer: struct<middle: struct<inner: struct<field: int>>>
-   * and suppose that 'outer' is not present in the query but 'field' and 
'middle' are
-   * both in the select list. The highest level enclosing struct for 'field' 
that is
-   * present in the query is 'middle' in this case.
-   *
-   * We traverse the tree of the highest level enclosing struct expression 
('middle' in
-   * the example) to find the 'SlotDescriptor' corresponding to the original 
path ('field'
-   * in the example) and return it.
-   *
-   * The parameter 'slotPath' is the path that we want to register and return 
the
-   * 'SlotDescriptor' for; 'highestAncestorDistance' is the distance in the 
expression
-   * tree from 'slotPath' to the highest level ancestor present in the query: 
in the
-   * example it is the distance from 'field' to 'middle', which is 1.
-   */
-  private SlotDescriptor registerSlotRefWithinStruct(Path slotPath,
-      int highestAncestorDistance) throws AnalysisException {
-    Preconditions.checkState(highestAncestorDistance > 0);
-
-    final List<String> rawPath = slotPath.getRawPath();
-    final int topStructPathLen = rawPath.size() - highestAncestorDistance;
-    Preconditions.checkState(topStructPathLen > 0);
-    final Path topStructPath = new Path(slotPath.getRootDesc(),
-        rawPath.subList(0, topStructPathLen));
-    Path topStructResolvedPath = null;
-    try {
-      topStructResolvedPath = 
resolvePathWithMasking(topStructPath.getRawPath(),
-          PathType.SLOT_REF);
-    } catch (TableLoadingException e) {
-      // Should never happen because we only check registered table aliases.
-      Preconditions.checkState(false);
-    }
-    Preconditions.checkState(topStructResolvedPath != null);
-    Preconditions.checkState(topStructResolvedPath.isResolved());
-    List<String> topStructKey = 
topStructResolvedPath.getFullyQualifiedRawPath();
-    SlotDescriptor topStructSlotDesc = slotPathMap_.get(topStructKey);
-
-    // Structs should already be registered when their members are registered.
-    Preconditions.checkNotNull(topStructSlotDesc);
-
-    List<String> remainingPath = rawPath.subList(topStructPathLen, 
rawPath.size());
-    SlotDescriptor childSlotRef = findChildSlotDesc(topStructSlotDesc, 
remainingPath);
-    Preconditions.checkState(childSlotRef != null);
-    return childSlotRef;
-  }
-
   private SlotDescriptor createAndRegisterSlotDesc(Path slotPath,
       boolean insertIntoSlotPathMap) {
     Preconditions.checkState(slotPath.isRootedAtTuple());
@@ -1586,23 +1498,6 @@ public class Analyzer {
     return result;
   }
 
-  private int getHighestLevelAncestorDistance(Path slotPath) {
-    Optional<Integer> greatestDistanceAncestor = 
slotPathMap_.entrySet().stream()
-      .filter(entry -> entry.getValue().getType().isStructType()) // only 
structs
-      .map(entry -> entry.getKey()) // take only the paths, not the 
SlotDescriptors
-      .map(parentPath -> 
Path.getRawPathWithoutPrefix(slotPath.getFullyQualifiedRawPath(),
-            parentPath)) // null for non-ancestors
-      .filter(remainingPath -> remainingPath != null) // only ancestors remain
-      .map(remainingPath -> remainingPath.size()) // distance from ancestor
-      .max(java.util.Comparator.naturalOrder());
-
-    if (greatestDistanceAncestor.isPresent()) {
-      return greatestDistanceAncestor.get();
-    } else {
-      return 0; // There is no ancestor, this is a top level path within the 
query.
-    }
-  }
-
   public void createStructTuplesAndSlotDescs(SlotDescriptor desc)
       throws AnalysisException {
     Preconditions.checkState(desc.getType().isStructType());
@@ -1633,25 +1528,6 @@ public class Analyzer {
     }
   }
 
-  private SlotDescriptor findChildSlotDesc(SlotDescriptor parent, List<String> 
path) {
-    if (path.isEmpty()) return parent;
-
-    final TupleDescriptor tuple = parent.getItemTupleDesc();
-    if (tuple == null) return null;
-
-    final int parentPathLen = parent.getPath().getRawPath().size();
-    SlotDescriptor childSlotDesc = null;
-    for (SlotDescriptor desc : tuple.getSlots()) {
-      if (desc.getPath().getRawPath().get(parentPathLen).equals(path.get(0))) {
-        childSlotDesc = desc;
-        break;
-      }
-    }
-
-    if (childSlotDesc == null) return null;
-    return findChildSlotDesc(childSlotDesc, path.subList(1, path.size()));
-  }
-
   /**
    * Registers a collection and its descendants.
    * Creates a CollectionTableRef for all collections on the path.
diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java 
b/fe/src/main/java/org/apache/impala/analysis/Path.java
index 7cf702b7c..2dfe01032 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Path.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Path.java
@@ -508,26 +508,6 @@ public class Path {
     return absolutePath_;
   }
 
-  /**
-   * If 'prefix' is a prefix of this path, returns a list with the elements of 
this path
-   * that would remain after removing the prefix; otherwise returns null.
-   *
-   * Uses 'getFullyQualifiedRawPath()' to obtain the elements
-   * of the paths.
-   */
-  public List<String> getRawPathWithoutPrefix(Path prefix) {
-    final List<String> prefixPath = prefix.getFullyQualifiedRawPath();
-    final List<String> thisPath = this.getFullyQualifiedRawPath();
-    return getRawPathWithoutPrefix(thisPath, prefixPath);
-  }
-
-  public static List<String> getRawPathWithoutPrefix(List<String> path,
-      List<String> prefix) {
-    if (prefix.size() > path.size()) return null;
-    if (!prefix.equals(path.subList(0, prefix.size()))) return null;
-    return path.subList(prefix.size(), path.size());
-  }
-
   /**
    * Converts table schema path to file schema path. Well, it's actually 
somewhere between
    * the two because the first column is offsetted with the number of 
partitions.
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 4164f21ec..b94ff2494 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -355,11 +355,10 @@ public class SelectStmt extends QueryStmt {
           .filter(path -> path != null)
           .filter(path -> path.destType().isStructType())
           .collect(Collectors.toList());
-      // Sort paths by length in descending order so that structs that contain 
other
+      // Sort paths by length in ascending order so that structs that contain 
other
       // structs come before their children.
       Collections.sort(paths,
-          Comparator.<Path>comparingInt(path -> path.getMatchedTypes().size())
-          .reversed());
+          Comparator.<Path>comparingInt(path -> 
path.getMatchedTypes().size()));
       for (Path p : paths) {
         analyzer_.registerSlotRef(p);
       }
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index fc5650900..06080b1e4 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -20,6 +20,8 @@ package org.apache.impala.planner;
 import static org.junit.Assert.assertEquals;
 
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
 
 import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.ColumnStats;
@@ -46,6 +48,7 @@ import org.junit.BeforeClass;
 import org.junit.Test;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Collections2;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 
@@ -848,25 +851,33 @@ public class PlannerTest extends PlannerTestBase {
 
   @Test
   public void testStructFieldSlotSharedWithStruct() throws ImpalaException {
-    // Tests that in the case where a struct and one of its fields are both 
present in the
-    // select list, no extra slot is generated in the row for the struct field 
but the
-    // memory of the struct is reused, i.e. the row size is the same as when 
only the
+    // Tests that in the case where both a struct and some of its fields are 
present in
+    // the select list, no extra slots are generated in the row for the struct 
fields but
+    // the memory of the struct is reused, i.e. the row size is the same as 
when only the
     // struct is queried.
 
-    // For comlex types in the select list, we have to turn codegen off.
+    // For complex types in the select list, we have to turn codegen off.
     TQueryOptions queryOpts = defaultQueryOptions();
     queryOpts.setDisable_codegen(true);
 
-    String queryWithoutField =
-        "select id, outer_struct from 
functional_orc_def.complextypes_nested_structs";
-    int rowSizeWithoutField = getRowSize(queryWithoutField, queryOpts);
-
-    String queryWithField =
-        "select id, outer_struct, outer_struct.str " +
-        "from functional_orc_def.complextypes_nested_structs";
-    int rowSizeWithField = getRowSize(queryWithField, queryOpts);
-
-    Assert.assertEquals(rowSizeWithoutField, rowSizeWithField);
+    String queryTemplate =
+        "select %s from functional_orc_def.complextypes_nested_structs";
+
+    // The base case is when the top-level struct is selected.
+    String queryWithoutFields =
+        String.format(queryTemplate, "outer_struct");
+    int rowSizeWithoutFields = getRowSize(queryWithoutFields, queryOpts);
+
+    // Try permutations of (nested) fields of the top-level struct.
+    String[] fields = {"outer_struct", "outer_struct.str", 
"outer_struct.inner_struct3",
+      "outer_struct.inner_struct3.s"};
+    Collection<List<String>> permutations =
+      Collections2.permutations(java.util.Arrays.asList(fields));
+    for (List<String> permutation : permutations) {
+      String query = String.format(queryTemplate, String.join(", ", 
permutation));
+      int rowSize = getRowSize(query, queryOpts);
+      Assert.assertEquals(rowSizeWithoutFields, rowSize);
+    }
   }
 
   @Test

Reply via email to