This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit efa426453a8af3728bc272b9158f5564ce37e0ea
Author: Daniel Becker <[email protected]>
AuthorDate: Fri Oct 28 16:59:20 2022 +0200

    IMPALA-11692: Struct slot memory sharing involving select * not working 
properly
    
    With EXPAND_COMPLEX_TYPES=1, if there are structs coming from the star
    expansion and members of the structs are also given explicitly, slot
    memory sharing does not work in some cases:
    
    explain select * from functional_orc_def.complextypes_nested_structs;
    row-size=64B
    
    explain select *, outer_struct.inner_struct1 from
    functional_orc_def.complextypes_nested_structs;
    row-size=80B
    
    The row size should be the same in both cases as
    outer_struct.inner_struct1 is part of outer_struct which is included in
    the star.
    
    This change modifies how star select list items are analysed. First,
    before 'SelectAnalyzer.analyzeSelectClause()' is called, all star items
    are expanded to paths which are stored in the 'starExpandedPaths_' map.
    The function 'SelectAnalyzer.registerStructSlotRefPathsWithAnalyzer()',
    which makes struct slot memory sharing possible, takes these star
    expanded paths into account. Then in
    'SelectAnalyzer.analyzeSelectClause()' the paths expanded from the star
    items are retrieved and and normal analysis takes place.
    
    Testing:
     - Added the test function
       PlannerTest.testStructFieldSlotSharedWithStructFromStarExpansion()
       that verifies that struct slot memory sharing takes place.
    
    Change-Id: I346c2808c1aa5e77e3cdf3593f7f48ac96516c00
    Reviewed-on: http://gerrit.cloudera.org:8080/19190
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../org/apache/impala/analysis/SelectStmt.java     | 269 ++++++++++++++-------
 .../org/apache/impala/planner/PlannerTest.java     |  34 +++
 2 files changed, 222 insertions(+), 81 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index b75b86da9..19db3caa0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -21,8 +21,9 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -283,6 +284,41 @@ public class SelectStmt extends QueryStmt {
     private List<FunctionCallExpr> aggExprs_;
     private ExprSubstitutionMap countAllMap_;
 
+    private class StarExpandedPathInfo {
+      public StarExpandedPathInfo(Path expandedPath, Path originalRootPath) {
+        expandedPath_ = expandedPath;
+        originalRootPath_ = originalRootPath;
+      }
+
+      public Path getExpandedPath() { return expandedPath_; }
+      public boolean shouldRegisterForColumnMasking() {
+        // Empty matched types means this is expanded from star of a catalog 
table. For
+        // star of complex types, e.g. my_struct.*, my_array.*, my_map.*, the 
matched
+        // types will have the complex type so it's not empty.
+        // TODO: IMPALA-11712: We should sort out column masking and complex 
types. The
+        // above comment may not always be true: in the query
+        //   select a.* from mix_struct_array t, t.struct_in_arr a;
+        // getMatchedTypes() returns an empty list for the star path even 
though it is not
+        // from a catalog table.
+        // We should also find out whether we can determine from the expanded 
path alone
+        // (and not from the path of the star item) whether we need to 
register it for
+        // column masking, for example by checking if it is within a complex 
type.
+        return originalRootPath_.getMatchedTypes().isEmpty();
+      }
+
+      // The path expanded from a star select list item.
+      private final Path expandedPath_;
+
+      // The original path of the star select list item from which 
'expandedPath_' was
+      // expanded.
+      // Can be the path of a table, a struct or a collection.
+      private final Path originalRootPath_;
+    }
+
+    // A map from star 'SelectListItem's to the paths to which they are 
expanded.
+    private final Map<SelectListItem, List<StarExpandedPathInfo>> 
starExpandedPaths_
+        = new HashMap<>();
+
     private SelectAnalyzer(Analyzer analyzer) {
       this.analyzer_ = analyzer;
     }
@@ -291,6 +327,10 @@ public class SelectStmt extends QueryStmt {
       // Start out with table refs to establish aliases.
       fromClause_.analyze(analyzer_);
 
+      // Register struct paths (including those expanded from star 
expressions) before
+      // analyzeSelectClause() to guarantee tuple memory sharing between 
structs and
+      // struct members. See registerStructSlotRefPathsWithAnalyzer().
+      collectStarExpandedPaths();
       registerStructSlotRefPathsWithAnalyzer();
 
       analyzeSelectClause();
@@ -333,14 +373,43 @@ public class SelectStmt extends QueryStmt {
       analyzer_.setSimpleLimitStatus(checkSimpleLimitStmt());
     }
 
-    /* Register paths that point to structs. This is to unify expressions and 
allow
-     * embedded (struct member) expressions to share the tuple memory of their 
enclosing
-     * expressions.
+    /** Ensure that embedded (struct member) expressions share the tuple 
memory of their
+     * enclosing struct expressions by registering struct paths before 
analysis of select
+     * list items. Struct paths are registered in order of increasing number 
of path
+     * elements - this guarantees that when a struct member path is 
registered, its
+     * enclosing struct has already been registered, so 
Analyzer.registerSlotRef() can
+     * return the SlotDescriptor already created for the struct member within 
the struct,
+     * instead of creating a new SlotDescriptor for the struct member outside 
of the
+     * struct.
+     *
+     * Note that struct members can themselves be structs: this is the reason 
that
+     * ordering by increasing path element number (increasing embedding depth) 
is
+     * necessary and simply registering struct paths before other paths is not 
enough.
      */
     private void registerStructSlotRefPathsWithAnalyzer() throws 
AnalysisException {
+      Stream<Path> nonStarPaths = collectNonStarPaths();
+      Stream<Path> starExpandedPaths = starExpandedPaths_.values().stream()
+          // Get a flat list of paths (and booleans) belonging to all star 
items.
+          .flatMap(pathList -> pathList.stream())
+          // Discard the booleans and keep only the actual paths.
+          .map((StarExpandedPathInfo pathInfo)  -> pathInfo.getExpandedPath());
+
+      Stream<Path> allPaths = Stream.concat(nonStarPaths, starExpandedPaths);
+      List<Path> structPaths = allPaths
+          .filter(path -> path.destType().isStructType())
+          .collect(Collectors.toList());
+
+      // Sort paths by length in ascending order so that structs that contain 
other
+      // structs come before their children.
+      Collections.sort(structPaths,
+          Comparator.<Path>comparingInt(path -> 
path.getMatchedTypes().size()));
+      for (Path p : structPaths) {
+        analyzer_.registerSlotRef(p);
+      }
+    }
+
+    private Stream<Path> collectNonStarPaths() {
       Preconditions.checkNotNull(selectList_);
-      // Note: if we in the future include complex types in star expressions, 
we will have
-      // to expand star expressions here.
       Stream<Expr> selectListExprs = selectList_.getItems().stream()
         .filter(elem -> !elem.isStar())
         .map(elem -> elem.getExpr());
@@ -348,20 +417,13 @@ public class SelectStmt extends QueryStmt {
 
       Stream<Expr> exprs = Stream.concat(selectListExprs, nonSelectListExprs);
 
-      HashSet<SlotRef> slotRefs = new HashSet<>();
+      // Use a LinkedHashSet for deterministic iteration order.
+      LinkedHashSet<SlotRef> slotRefs = new LinkedHashSet<>();
       exprs.forEach(expr -> expr.collect(SlotRef.class, slotRefs));
 
-      List<Path> paths = slotRefs.stream().map(this::slotRefToResolvedPath)
-          .filter(path -> path != null)
-          .filter(path -> path.destType().isStructType())
-          .collect(Collectors.toList());
-      // Sort paths by length in ascending order so that structs that contain 
other
-      // structs come before their children.
-      Collections.sort(paths,
-          Comparator.<Path>comparingInt(path -> 
path.getMatchedTypes().size()));
-      for (Path p : paths) {
-        analyzer_.registerSlotRef(p);
-      }
+      return slotRefs.stream()
+          .map(this::slotRefToResolvedPath)
+          .filter(path -> path != null);
     }
 
     private Stream<Expr> collectExprsOutsideSelectList() {
@@ -419,48 +481,9 @@ public class SelectStmt extends QueryStmt {
       for (int i = 0; i < selectList_.getItems().size(); ++i) {
         SelectListItem item = selectList_.getItems().get(i);
         if (item.isStar()) {
-          if (item.getRawPath() != null) {
-            Path resolvedPath = analyzeStarPath(item.getRawPath(), analyzer_);
-            expandStar(resolvedPath);
-          } else {
-            expandStar();
-          }
+          analyzeStarItem(item);
         } else {
-          // Analyze the resultExpr before generating a label to ensure 
enforcement
-          // of expr child and depth limits (toColumn() label may call 
toSql()).
-          item.getExpr().analyze(analyzer_);
-          // Check for scalar subquery types which are not supported
-          List<Subquery> subqueryExprs = new ArrayList<>();
-          item.getExpr().collect(Subquery.class, subqueryExprs);
-          for (Subquery s : subqueryExprs) {
-            Preconditions.checkState(s.getStatement() instanceof SelectStmt);
-            if (!s.returnsScalarColumn()) {
-              throw new AnalysisException("A non-scalar subquery is not 
supported in "
-                  + "the expression: " + item.getExpr().toSql());
-            }
-            if (s.getStatement().isRuntimeScalar()) {
-              throw new AnalysisException(
-                  "A subquery which may return more than one row is not 
supported in "
-                  + "the expression: " + item.getExpr().toSql());
-            }
-            if (!((SelectStmt)s.getStatement()).returnsAtMostOneRow()) {
-              throw new AnalysisException("Only subqueries that are guaranteed 
to return "
-                   + "a single row are supported: " + item.getExpr().toSql());
-            }
-          }
-          resultExprs_.add(item.getExpr());
-          String label = item.toColumnLabel(i, analyzer_.useHiveColLabels());
-          SlotRef aliasRef = new SlotRef(label);
-          Expr existingAliasExpr = existingAliasExprs.get(label);
-          if (existingAliasExpr != null && 
!existingAliasExpr.equals(item.getExpr())) {
-            // If we have already seen this alias, it refers to more than one 
column and
-            // therefore is ambiguous.
-            ambiguousAliasList_.add(aliasRef);
-          } else {
-            existingAliasExprs.put(label, item.getExpr());
-          }
-          aliasSmap_.put(aliasRef, item.getExpr().clone());
-          colLabels_.add(label);
+          analyzeNonStarItem(item, existingAliasExprs, i);
         }
       }
       if (LOG.isTraceEnabled()) {
@@ -468,6 +491,62 @@ public class SelectStmt extends QueryStmt {
       }
     }
 
+    private void analyzeStarItem(SelectListItem item) throws AnalysisException 
{
+      Preconditions.checkState(item.isStar());
+      List<StarExpandedPathInfo> starExpandedPathInfos = 
starExpandedPaths_.get(item);
+      // If complex types are not expanded, a star item may expand to zero 
items, in which
+      // case starExpandedPaths_ does not have a value for it.
+      if (starExpandedPathInfos == null) {
+        Preconditions.checkState(
+            
!analyzer_.getQueryCtx().client_request.query_options.expand_complex_types);
+        return;
+      }
+
+      for (StarExpandedPathInfo pathInfo : starExpandedPathInfos) {
+        addStarExpandedPathResultExpr(pathInfo);
+      }
+    }
+
+    private void analyzeNonStarItem(SelectListItem item,
+        Map<String, Expr> existingAliasExprs, int selectListPos)
+        throws AnalysisException {
+      // Analyze the resultExpr before generating a label to ensure enforcement
+      // of expr child and depth limits (toColumn() label may call toSql()).
+      item.getExpr().analyze(analyzer_);
+      // Check for scalar subquery types which are not supported
+      List<Subquery> subqueryExprs = new ArrayList<>();
+      item.getExpr().collect(Subquery.class, subqueryExprs);
+      for (Subquery s : subqueryExprs) {
+        Preconditions.checkState(s.getStatement() instanceof SelectStmt);
+        if (!s.returnsScalarColumn()) {
+          throw new AnalysisException("A non-scalar subquery is not supported 
in "
+              + "the expression: " + item.getExpr().toSql());
+        }
+        if (s.getStatement().isRuntimeScalar()) {
+          throw new AnalysisException(
+              "A subquery which may return more than one row is not supported 
in "
+              + "the expression: " + item.getExpr().toSql());
+        }
+        if (!((SelectStmt)s.getStatement()).returnsAtMostOneRow()) {
+          throw new AnalysisException("Only subqueries that are guaranteed to 
return "
+              + "a single row are supported: " + item.getExpr().toSql());
+        }
+      }
+      resultExprs_.add(item.getExpr());
+      String label = item.toColumnLabel(selectListPos, 
analyzer_.useHiveColLabels());
+      SlotRef aliasRef = new SlotRef(label);
+      Expr existingAliasExpr = existingAliasExprs.get(label);
+      if (existingAliasExpr != null && 
!existingAliasExpr.equals(item.getExpr())) {
+        // If we have already seen this alias, it refers to more than one 
column and
+        // therefore is ambiguous.
+        ambiguousAliasList_.add(aliasRef);
+      } else {
+        existingAliasExprs.put(label, item.getExpr());
+      }
+      aliasSmap_.put(aliasRef, item.getExpr().clone());
+      colLabels_.add(label);
+    }
+
     private void verifyResultExprs() throws AnalysisException {
       // Star exprs only expand to the scalar-typed columns/fields, so
       // the resultExprs_ could be empty.
@@ -718,7 +797,7 @@ public class SelectStmt extends QueryStmt {
      * complex-typed fields for backwards compatibility unless 
EXPAND_COMPLEX_TYPES is set
      * to true.
      */
-    private void expandStar() throws AnalysisException {
+    private void expandStar(SelectListItem selectListItem) throws 
AnalysisException {
       if (fromClause_.isEmpty()) {
         throw new AnalysisException(
             "'*' expression in select list requires FROM clause.");
@@ -730,7 +809,7 @@ public class SelectStmt extends QueryStmt {
         Path resolvedPath = new Path(tableRef.getDesc(),
             Collections.<String>emptyList());
         Preconditions.checkState(resolvedPath.resolve());
-        expandStar(resolvedPath);
+        expandStar(selectListItem, resolvedPath);
       }
     }
 
@@ -738,7 +817,7 @@ public class SelectStmt extends QueryStmt {
      * Expand "path.*" from a resolved path, ignoring complex-typed fields for 
backwards
      * compatibility unless EXPAND_COMPLEX_TYPES is set to true.
      */
-    private void expandStar(Path resolvedPath)
+    private void expandStar(SelectListItem selectListItem, Path resolvedPath)
         throws AnalysisException {
       Preconditions.checkState(resolvedPath.isResolved());
       if (resolvedPath.destTupleDesc() != null &&
@@ -749,7 +828,7 @@ public class SelectStmt extends QueryStmt {
         TupleDescriptor tupleDesc = resolvedPath.destTupleDesc();
         FeTable table = tupleDesc.getTable();
         for (Column c: table.getColumnsInHiveOrder()) {
-          addStarResultExpr(resolvedPath, c.getName());
+          addStarExpandedPath(selectListItem, resolvedPath, c.getName());
         }
       } else {
         // The resolved path does not target the descriptor of a catalog table.
@@ -767,54 +846,82 @@ public class SelectStmt extends QueryStmt {
         if (structType instanceof CollectionStructType) {
           CollectionStructType cst = (CollectionStructType) structType;
           if (cst.isMapStruct()) {
-            addStarResultExpr(resolvedPath, Path.MAP_KEY_FIELD_NAME);
+            addStarExpandedPath(selectListItem, resolvedPath, 
Path.MAP_KEY_FIELD_NAME);
           }
           if (cst.getOptionalField().getType().isStructType()) {
             structType = (StructType) cst.getOptionalField().getType();
             for (StructField f: structType.getFields()) {
-              addStarResultExpr(
-                  resolvedPath, cst.getOptionalField().getName(), f.getName());
+              addStarExpandedPath(selectListItem, resolvedPath,
+                  cst.getOptionalField().getName(), f.getName());
             }
           } else if (cst.isMapStruct()) {
-            addStarResultExpr(resolvedPath, Path.MAP_VALUE_FIELD_NAME);
+            addStarExpandedPath(selectListItem, resolvedPath, 
Path.MAP_VALUE_FIELD_NAME);
           } else {
-            addStarResultExpr(resolvedPath, Path.ARRAY_ITEM_FIELD_NAME);
+            addStarExpandedPath(selectListItem, resolvedPath, 
Path.ARRAY_ITEM_FIELD_NAME);
           }
         } else {
           // Default star expansion.
           for (StructField f: structType.getFields()) {
             if (f.isHidden()) continue;
-            addStarResultExpr(resolvedPath, f.getName());
+            addStarExpandedPath(selectListItem, resolvedPath, f.getName());
           }
         }
       }
     }
 
     /**
-     * Helper function used during star expansion to add a single result expr
-     * based on a given raw path to be resolved relative to an existing path.
+     * Expand star items to paths and store them in 'starExpandedPaths_'.
      */
-    private void addStarResultExpr(Path resolvedPath,
+    private void collectStarExpandedPaths() throws AnalysisException {
+      for (SelectListItem item : selectList_.getItems()) {
+        if (item.isStar()) {
+          if (item.getRawPath() != null) {
+            Path resolvedPath = analyzeStarPath(item.getRawPath(), analyzer_);
+            expandStar(item, resolvedPath);
+          } else {
+            expandStar(item);
+          }
+        }
+      }
+    }
+
+    /**
+     * Helper function used during star expansion to add a single expanded 
path based on a
+     * given raw path to be resolved relative to an existing path.
+     */
+    private void addStarExpandedPath(SelectListItem selectListItem, Path 
resolvedRootPath,
         String... relRawPath) throws AnalysisException {
-      Path p = Path.createRelPath(resolvedPath, relRawPath);
-      Preconditions.checkState(p.resolve());
-      if (p.destType().isComplexType() &&
+      Path starExpandedPath = Path.createRelPath(resolvedRootPath, relRawPath);
+      Preconditions.checkState(starExpandedPath.resolve());
+      if (starExpandedPath.destType().isComplexType() &&
           
!analyzer_.getQueryCtx().client_request.query_options.expand_complex_types) {
         return;
       }
-      SlotDescriptor slotDesc = analyzer_.registerSlotRef(p, false);
+
+      if (!starExpandedPaths_.containsKey(selectListItem)) {
+        starExpandedPaths_.put(selectListItem, new ArrayList<>());
+      }
+      List<StarExpandedPathInfo> pathsOfStarItem = 
starExpandedPaths_.get(selectListItem);
+      pathsOfStarItem.add(new StarExpandedPathInfo(starExpandedPath, 
resolvedRootPath));
+    }
+
+    private void addStarExpandedPathResultExpr(StarExpandedPathInfo 
starExpandedPathInfo)
+        throws AnalysisException {
+      
Preconditions.checkState(starExpandedPathInfo.getExpandedPath().isResolved());
+
+      SlotDescriptor slotDesc = analyzer_.registerSlotRef(
+          starExpandedPathInfo.getExpandedPath(), false);
       SlotRef slotRef = new SlotRef(slotDesc);
       Preconditions.checkState(slotRef.isAnalyzed(),
           "Analysis should be done in constructor");
 
-      // Empty matched types means this is expanded from star of a catalog 
table.
-      // For star of complex types, e.g. my_struct.*, my_array.*, my_map.*, 
the matched
-      // types will have the complex type so it's not empty.
-      if (resolvedPath.getMatchedTypes().isEmpty()) {
+      if (starExpandedPathInfo.shouldRegisterForColumnMasking()) {
         analyzer_.registerColumnForMasking(slotDesc);
       }
       resultExprs_.add(slotRef);
-      colLabels_.add(relRawPath[relRawPath.length - 1]);
+      final List<String> starExpandedRawPath = starExpandedPathInfo
+        .getExpandedPath().getRawPath();
+      colLabels_.add(starExpandedRawPath.get(starExpandedRawPath.size() - 1));
     }
 
     /**
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 06080b1e4..5f2383e48 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -857,6 +857,7 @@ public class PlannerTest extends PlannerTestBase {
     // struct is queried.
 
     // For complex types in the select list, we have to turn codegen off.
+    // TODO: Remove this when IMPALA-10851 is fixed.
     TQueryOptions queryOpts = defaultQueryOptions();
     queryOpts.setDisable_codegen(true);
 
@@ -880,6 +881,39 @@ public class PlannerTest extends PlannerTestBase {
     }
   }
 
+  @Test
+  public void testStructFieldSlotSharedWithStructFromStarExpansion()
+      throws ImpalaException {
+    // Like testStructFieldSlotSharedWithStruct(), but involving structs that 
come from a
+    // star expansion.
+
+    TQueryOptions queryOpts = defaultQueryOptions();
+    // For complex types in the select list, we have to turn codegen off.
+    // TODO: Remove this when IMPALA-10851 is fixed.
+    queryOpts.setDisable_codegen(true);
+    // Enable star-expandion of complex types.
+    queryOpts.setExpand_complex_types(true);
+
+    String queryTemplate =
+        "select %s from functional_orc_def.complextypes_nested_structs";
+
+    // The base case is when only the star is given in the select list.
+    String queryWithoutFields =
+        String.format(queryTemplate, "*");
+    int rowSizeWithoutFields = getRowSize(queryWithoutFields, queryOpts);
+
+    // Try permutations of (nested) fields of the top-level struct.
+    String[] fields = {"*", "outer_struct", "outer_struct.inner_struct1",
+      "outer_struct.inner_struct1.str"};
+    Collection<List<String>> permutations =
+      Collections2.permutations(java.util.Arrays.asList(fields));
+    for (List<String> permutation : permutations) {
+      String query = String.format(queryTemplate, String.join(", ", 
permutation));
+      int rowSize = getRowSize(query, queryOpts);
+      Assert.assertEquals(rowSizeWithoutFields, rowSize);
+    }
+  }
+
   @Test
   public void testResourceRequirements() {
     // Tests the resource requirement computation from the planner.

Reply via email to