This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 98b584a45f95380a30cddc1736973d91ba29542f
Author: Steve Carlin <[email protected]>
AuthorDate: Wed Oct 23 08:34:49 2024 -0700

    IMPALA-13481: Add support for various agg and analytic functions
    
    Various functions were added.  There were several issues for
    these functions:
    
    1) The Calcite parser and/or validator was generating SqlNodes
    that weren't compatible with Impala. To fix this, the parsing
    had to be removed from the Parser.jj file and the functions were
    marked to use the ImpalaOperator rather than the Calcite operator.
    These functions include:
    
    trim, extract, regr*, regexp*, localtime, group_concat
    
    2) The ntile, cume_dist, and percent_rank functions undergo a
    transformation in AnalyticExpr. To make this more clean for Calcite,
    the transformation now happens in the RewriteRexOverRule.
    
    3) The "negative" operator had to be added to the custom operator table.
    The subtract was already added there, and all "-" operators need
    to be in the same table.
    
    4) Various functions were added to function resolver where the Calcite
    function name was different from the Impala function name.
    
    Also added the test mentioned in IMPALA-13688 for cume_dist with
    duplicates.
    
    Change-Id: I57c69a60c63872b2964688f395b662a85698555e
    Reviewed-on: http://gerrit.cloudera.org:8080/21976
    Reviewed-by: Joe McDonnell <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 java/calcite-planner/src/main/codegen/config.fmpp  |  13 +
 .../src/main/codegen/templates/Parser.jj           |  81 +-----
 .../impala/calcite/functions/FunctionResolver.java |  22 ++
 .../operators/ImpalaCustomOperatorTable.java       |  14 +
 .../calcite/operators/ImpalaOperatorTable.java     |   6 +-
 .../impala/calcite/rules/RewriteRexOverRule.java   | 288 +++++++++++++++++++++
 .../impala/calcite/service/CalciteOptimizer.java   |   2 +
 .../queries/QueryTest/analytic-fns.test            | 132 ++++++++++
 8 files changed, 482 insertions(+), 76 deletions(-)

diff --git a/java/calcite-planner/src/main/codegen/config.fmpp 
b/java/calcite-planner/src/main/codegen/config.fmpp
index 9cd56e6c3..2c44b224e 100644
--- a/java/calcite-planner/src/main/codegen/config.fmpp
+++ b/java/calcite-planner/src/main/codegen/config.fmpp
@@ -87,6 +87,7 @@ parser: {
     "CONSTRUCTOR"
     "CONTAINS_SUBSTR"
     "CONTINUE"
+    "CORR"
     "CURSOR_NAME"
     "DATA"
     "DATABASE"
@@ -126,6 +127,7 @@ parser: {
     "EXCEPTION"
     "EXCLUDE"
     "EXCLUDING"
+    "EXTRACT"
     "FINAL"
     "FIRST"
     "FOLLOWING"
@@ -171,6 +173,7 @@ parser: {
     "LENGTH"
     "LEVEL"
     "LIBRARY"
+    "LOCALTIME"
     "LOCATOR"
     "M"
     "MAP"
@@ -230,6 +233,15 @@ parser: {
     "QUARTER"
     "QUARTERS"
     "READ"
+    "REGR_AVGX"
+    "REGR_AVGY"
+    "REGR_COUNT"
+    "REGR_INTERCEPT"
+    "REGR_R2"
+    "REGR_SLOPE"
+    "REGR_SXX"
+    "REGR_SXY"
+    "REGR_SYY"
     "RELATIVE"
     "REPEATABLE"
     "REPLACE"
@@ -346,6 +358,7 @@ parser: {
     "TRIGGER_CATALOG"
     "TRIGGER_NAME"
     "TRIGGER_SCHEMA"
+    "TRIM"
     "TUMBLE"
     "TYPE"
     "UNBOUNDED"
diff --git a/java/calcite-planner/src/main/codegen/templates/Parser.jj 
b/java/calcite-planner/src/main/codegen/templates/Parser.jj
index d7c558812..439ff2446 100644
--- a/java/calcite-planner/src/main/codegen/templates/Parser.jj
+++ b/java/calcite-planner/src/main/codegen/templates/Parser.jj
@@ -3970,7 +3970,6 @@ SqlCall StringAggFunctionCall() :
     (
         <ARRAY_AGG> { s = span(); op = SqlLibraryOperators.ARRAY_AGG; }
     |   <ARRAY_CONCAT_AGG> { s = span(); op = 
SqlLibraryOperators.ARRAY_CONCAT_AGG; }
-    |   <GROUP_CONCAT> { s = span(); op = SqlLibraryOperators.GROUP_CONCAT; }
     |   <STRING_AGG> { s = span(); op = SqlLibraryOperators.STRING_AGG; }
     )
     <LPAREN>
@@ -6177,16 +6176,6 @@ SqlNode BuiltinFunctionCall() :
         <RPAREN> {
             return f.createCall(s.end(this), args);
         }
-    |
-        <EXTRACT> { s = span(); }
-        <LPAREN> unit = TimeUnitOrName() {
-            args.add(unit);
-        }
-        <FROM>
-        AddExpression(args, ExprContext.ACCEPT_SUB_QUERY)
-        <RPAREN> {
-            return SqlStdOperatorTable.EXTRACT.createCall(s.end(this), args);
-        }
     |
         <POSITION> { s = span(); }
         <LPAREN>
@@ -6294,65 +6283,6 @@ SqlNode BuiltinFunctionCall() :
             return SqlStdOperatorTable.SUBSTRING.createCall(
                 s.end(this), args);
         }
-    |
-        <TRIM> {
-            SqlLiteral flag = null;
-            SqlNode trimChars = null;
-            s = span();
-        }
-        <LPAREN>
-        [
-            LOOKAHEAD(2)
-            [
-                <BOTH> {
-                    s.add(this);
-                    flag = SqlTrimFunction.Flag.BOTH.symbol(getPos());
-                }
-            |
-                <TRAILING> {
-                    s.add(this);
-                    flag = SqlTrimFunction.Flag.TRAILING.symbol(getPos());
-                }
-            |
-                <LEADING> {
-                    s.add(this);
-                    flag = SqlTrimFunction.Flag.LEADING.symbol(getPos());
-                }
-            ]
-            [ trimChars = Expression(ExprContext.ACCEPT_SUB_QUERY) ]
-            (
-                <FROM> {
-                    if (null == flag && null == trimChars) {
-                        throw SqlUtil.newContextException(getPos(),
-                            RESOURCE.illegalFromEmpty());
-                    }
-                }
-            |
-                <RPAREN> {
-                    // This is to handle the case of TRIM(x)
-                    // (FRG-191).
-                    if (flag == null) {
-                        flag = 
SqlTrimFunction.Flag.BOTH.symbol(SqlParserPos.ZERO);
-                    }
-                    args.add(flag);
-                    args.add(null); // no trim chars
-                    args.add(trimChars); // reinterpret trimChars as source
-                    return SqlStdOperatorTable.TRIM.createCall(s.end(this),
-                        args);
-                }
-            )
-        ]
-        e = Expression(ExprContext.ACCEPT_SUB_QUERY) {
-            if (flag == null) {
-                flag = SqlTrimFunction.Flag.BOTH.symbol(SqlParserPos.ZERO);
-            }
-            args.add(flag);
-            args.add(trimChars);
-            args.add(e);
-        }
-        <RPAREN> {
-            return SqlStdOperatorTable.TRIM.createCall(s.end(this), args);
-        }
     |
         node = ContainsSubstrFunctionCall() { return node; }
     |
@@ -7535,7 +7465,6 @@ SqlIdentifier ReservedFunctionName() :
     |   <LEFT>
     |   <LAST_VALUE>
     |   <LN>
-    |   <LOCALTIME>
     |   <LOCALTIMESTAMP>
     |   <LOWER>
     |   <MAX>
@@ -7552,9 +7481,6 @@ SqlIdentifier ReservedFunctionName() :
     |   <PERCENT_RANK>
     |   <POWER>
     |   <RANK>
-    |   <REGR_COUNT>
-    |   <REGR_SXX>
-    |   <REGR_SYY>
     |   <RIGHT>
     |   <ROW_NUMBER>
     |   <SECOND>
@@ -7589,7 +7515,6 @@ SqlIdentifier ContextVariable() :
     |   <CURRENT_TIME>
     |   <CURRENT_TIMESTAMP>
     |   <CURRENT_USER>
-    |   <LOCALTIME>
     |   <LOCALTIMESTAMP>
     |   <SESSION_USER>
     |   <SYSTEM_USER>
@@ -7808,6 +7733,10 @@ SqlBinaryOperator BinaryRowOperator() :
 |   <LE> { return SqlStdOperatorTable.LESS_THAN_OR_EQUAL; }
 |   <GE> { return SqlStdOperatorTable.GREATER_THAN_OR_EQUAL; }
 |   <NE> { return SqlStdOperatorTable.NOT_EQUALS; }
+|   <REGEXP> { return SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE; }
+|   LOOKAHEAD(2) <NOT> <REGEXP> { return 
SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE; }
+|   <IREGEXP> { return SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE; }
+|   LOOKAHEAD(2) <NOT> <IREGEXP> { return 
SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE; }
 |   <NE2> {
         if (!this.conformance.isBangEqualAllowed()) {
             throw SqlUtil.newContextException(getPos(), 
RESOURCE.bangEqualNotAllowed());
@@ -8822,6 +8751,8 @@ void NonReservedKeyWord2of3() :
 |   < VERTICAL_BAR: "|" >
 |   < CARET: "^" >
 |   < DOLLAR: "$" >
+|   < REGEXP : "REGEXP" >
+|   < IREGEXP : "IREGEXP" >
 <#list (parser.binaryOperatorsTokens!default.parser.binaryOperatorsTokens) as 
operator>
 |   ${operator}
 </#list>
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java
index 1e88f18c5..b812a283a 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java
@@ -57,8 +57,12 @@ public class FunctionResolver {
   public static Map<SqlKind, String> CALCITE_KIND_TO_IMPALA_FUNC =
       ImmutableMap.<SqlKind, String> builder()
       .put(SqlKind.EQUALS, "eq")
+      .put(SqlKind.IS_FALSE, "isfalse")
+      .put(SqlKind.IS_NOT_FALSE, "isnotfalse")
       .put(SqlKind.IS_NOT_NULL, "is_not_null_pred")
+      .put(SqlKind.IS_NOT_TRUE, "isnottrue")
       .put(SqlKind.IS_NULL, "is_null_pred")
+      .put(SqlKind.IS_TRUE, "istrue")
       .put(SqlKind.GREATER_THAN, "gt")
       .put(SqlKind.GREATER_THAN_OR_EQUAL, "ge")
       .put(SqlKind.LESS_THAN, "lt")
@@ -66,9 +70,12 @@ public class FunctionResolver {
       .put(SqlKind.NOT_EQUALS, "ne")
       .put(SqlKind.PLUS, "add")
       .put(SqlKind.MINUS, "subtract")
+      .put(SqlKind.MINUS_PREFIX, "negative")
       .put(SqlKind.TIMES, "multiply")
       .put(SqlKind.DIVIDE, "divide")
       .put(SqlKind.SUM0, "sum_init_zero")
+      .put(SqlKind.POSIX_REGEX_CASE_SENSITIVE, "regexp")
+      .put(SqlKind.POSIX_REGEX_CASE_INSENSITIVE, "iregexp")
       .build();
 
   // Map of Calcite names to an Impala function name when the names are 
different
@@ -90,6 +97,7 @@ public class FunctionResolver {
   public static Set<String> SPECIAL_PROCESSING_FUNCTIONS =
       ImmutableSet.<String> builder()
       .add("grouping_id")
+      .add("count")
       .build();
 
   public static Function getSupertypeFunction(RexCall call) {
@@ -182,6 +190,20 @@ public class FunctionResolver {
           lowercaseName, impalaArgTypes, Type.BIGINT, true, false, true);
     }
 
+    // Hack.  The count function can have more than one parameter when it is
+    // of the form "count(distinct c1, c2)"  However, the function resolver 
only
+    // contains count functions with one parameter. It is only later on in
+    // the compilation that the AggregateInfo class changes the multiple
+    // parameters into one parameter. But we still have to deal with resolving
+    // count here.  So we just grab the first parameter so that it resolves
+    // properly.
+    if (lowercaseName.equals("count")) {
+      if (impalaArgTypes.size() > 1) {
+        impalaArgTypes = Lists.newArrayList(impalaArgTypes.get(0));
+      }
+      return getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch);
+    }
+
     throw new RuntimeException("Special function not found: " + lowercaseName);
   }
 
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
index 9bd41e538..b0ab24ddd 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
@@ -25,11 +25,13 @@ import org.apache.calcite.sql.SqlBinaryOperator;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperatorBinding;
 import org.apache.calcite.sql.SqlSetOperator;
+import org.apache.calcite.sql.SqlPrefixOperator;
 import org.apache.calcite.sql.fun.ImpalaGroupingFunction;
 import org.apache.calcite.sql.fun.SqlMonotonicBinaryOperator;
 import org.apache.calcite.sql.fun.SqlCountAggFunction;
 import org.apache.calcite.sql.type.InferTypes;
 import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.sql.type.SqlTypeUtil;
@@ -165,6 +167,18 @@ public class ImpalaCustomOperatorTable extends 
ReflectiveSqlOperatorTable {
           InferTypes.FIRST_KNOWN,
           OperandTypes.DIVISION_OPERATOR);
 
+  // UNARY_MINUS is the same as the one in Calcite. We need it in
+  // our custom operators because "subtract" is here, and all
+  // operators with "-" need to be in the same operator table.
+  public static final SqlPrefixOperator UNARY_MINUS =
+      new SqlPrefixOperator(
+          "-",
+          SqlKind.MINUS_PREFIX,
+          80,
+          ReturnTypes.ARG0,
+          InferTypes.RETURN_TYPE,
+          OperandTypes.NUMERIC_OR_INTERVAL);
+
   public static final SqlBinaryOperator PERCENT_REMAINDER =
       new SqlBinaryOperator(
           "%",
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java
index 354e33b19..e13b43948 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java
@@ -79,6 +79,10 @@ public class ImpalaOperatorTable extends 
ReflectiveSqlOperatorTable {
       .add("covar_samp")
       .add("coalesce")
       .add("lag")
+      .add("trim")
+      .add("extract")
+      .add("regr_count")
+      .add("localtime")
       .build();
 
   private static ImpalaOperatorTable INSTANCE;
@@ -94,7 +98,7 @@ public class ImpalaOperatorTable extends 
ReflectiveSqlOperatorTable {
     ImpalaCustomOperatorTable.instance().lookupOperatorOverloads(opName, 
category, syntax,
         operatorList, nameMatcher);
 
-    if (operatorList.size() == 1) {
+    if (operatorList.size() >= 1) {
       return;
     }
 
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/RewriteRexOverRule.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/RewriteRexOverRule.java
new file mode 100644
index 000000000..907027c44
--- /dev/null
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/RewriteRexOverRule.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.impala.calcite.rules;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexFieldCollation;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.impala.calcite.operators.ImpalaAggOperator;
+import org.apache.impala.calcite.operators.ImpalaCustomOperatorTable;
+import org.apache.impala.calcite.operators.ImpalaOperatorTable;
+import org.apache.impala.calcite.operators.ImpalaOperator;
+import org.apache.impala.calcite.type.ImpalaTypeSystemImpl;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+
+/**
+ * Rule to replace unsupported RexOver with supported syntax. For instance,
+ * the percent_rank function isn't supported directly, but it can be calculated
+ * through the rank() and count() functions which are supported.
+ */
+public class RewriteRexOverRule extends RelOptRule {
+  private final ProjectFactory projectFactory;
+
+  public static final RewriteRexOverRule INSTANCE =
+      new RewriteRexOverRule();
+
+  private RewriteRexOverRule() {
+    super(operand(Project.class, any()));
+    this.projectFactory = RelFactories.DEFAULT_PROJECT_FACTORY;
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    final Project project = call.rel(0);
+    final RelNode input = project.getInput();
+    final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
+
+    RexOverReplacer replacer = new RexOverReplacer(rexBuilder);
+
+    List<RexNode> exprs = new ArrayList<>();
+    for (RexNode r : project.getProjects()) {
+      exprs.add(replacer.apply(r));
+    }
+
+    if (!replacer.replacedValue) {
+      return;
+    }
+
+    RelNode newProject = projectFactory.createProject(
+        input, Collections.emptyList(), exprs, 
project.getRowType().getFieldNames());
+
+    call.transformTo(newProject);
+  }
+
+  private static class RexOverReplacer extends RexShuttle {
+    public boolean replacedValue;
+    private final RexBuilder rexBuilder;
+
+    public RexOverReplacer(RexBuilder rexBuilder) {
+      this.rexBuilder = rexBuilder;
+    }
+
+    @Override
+    public RexNode visitOver(RexOver over) {
+      if (over.getOperator().getName().toLowerCase().equals("percent_rank")) {
+        replacedValue = true;
+        return replacePercentRank(over);
+      }
+      if (over.getOperator().getName().toLowerCase().equals("cume_dist")) {
+        replacedValue = true;
+        return replaceCumeDist(over);
+      }
+      if (over.getOperator().getName().toLowerCase().equals("ntile")) {
+        replacedValue = true;
+        return replaceNTile(over);
+      }
+      return super.visitOver(over);
+    }
+
+    /**
+     * Rewrite cume_dist() to the following:
+     *
+     * cume_dist() over([partition by clause] order by clause)
+     *    = ((Count - Rank) + 1)/Count
+     * where,
+     *  Rank = rank() over([partition by clause] order by clause DESC)
+     *  Count = count() over([partition by clause])
+     */
+    public RexNode replaceCumeDist(RexOver over) {
+      ImmutableList<RexFieldCollation> reversedCollation =
+          reverseCollation(over.getWindow().orderKeys);
+      RelDataType bigintType =
+          rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT);
+
+      // create RexNode for  rank() over([partition by clause] order by clause 
DESC)
+      RexNode rankNode = rexBuilder.makeOver(bigintType, 
SqlStdOperatorTable.RANK,
+          over.getOperands(), over.getWindow().partitionKeys, 
reversedCollation,
+          over.getWindow().getLowerBound(), over.getWindow().getUpperBound(),
+          over.getWindow().isRows(), true, false, over.isDistinct(), 
over.ignoreNulls());
+
+      // create RexNode for count() over([partition by clause])
+      RexNode countNode = rexBuilder.makeOver(bigintType,
+          ImpalaCustomOperatorTable.COUNT, over.getOperands(),
+          over.getWindow().partitionKeys, ImmutableList.of(),
+          over.getWindow().getLowerBound(), over.getWindow().getUpperBound(),
+          over.getWindow().isRows(), true, false, over.isDistinct(), 
over.ignoreNulls());
+
+      RexNode one = rexBuilder.makeCast(bigintType, 
rexBuilder.makeExactLiteral(
+          BigDecimal.valueOf(1)));
+      RexNode countMinusRank = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.MINUS,
+          countNode, rankNode);
+
+      // create RexNode for((Count - Rank) + 1)
+      RexNode countMinusRankPlusOne = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.PLUS,
+          countMinusRank, one);
+      RexNode numeratorDouble = rexBuilder.makeCast(over.type, 
countMinusRankPlusOne);
+
+      RexNode denominatorDouble = rexBuilder.makeCast(over.type, countNode);
+      return rexBuilder.makeCall(ImpalaCustomOperatorTable.DIVIDE, 
numeratorDouble,
+          denominatorDouble);
+    }
+
+    /**
+     * Rewrite percent_rank() to the following:
+     *
+     * percent_rank() over([partition by clause] order by clause)
+     *    = case
+     *        when Count = 1 then 0
+     *        else (Rank - 1)/(Count - 1)
+     * where,
+     *  Rank = rank() over([partition by clause] order by clause)
+     *  Count = count() over([partition by clause])
+     */
+    public RexNode replacePercentRank(RexOver over) {
+      RelDataType bigintType =
+          rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT);
+
+      // create RexNode for rank() over ([partition by clause] order by clause)
+      RexNode rankNode = rexBuilder.makeOver(bigintType, 
SqlStdOperatorTable.RANK,
+          over.getOperands(), over.getWindow().partitionKeys, 
over.getWindow().orderKeys,
+          over.getWindow().getLowerBound(), over.getWindow().getUpperBound(),
+          over.getWindow().isRows(), true, false, over.isDistinct(), 
over.ignoreNulls());
+
+      // create RexNode for literal "1"
+      RexNode one = rexBuilder.makeCast(bigintType, 
rexBuilder.makeExactLiteral(
+          BigDecimal.valueOf(1)));
+
+      // create RexNode for (Rank - 1)
+      RexNode rankNodeMinusOne = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.MINUS,
+          rankNode, one);
+      RexNode rankDouble = rexBuilder.makeCast(over.type, rankNodeMinusOne);
+
+      // create RexNode for count() over([partition by clause])
+      RexNode countNode = rexBuilder.makeOver(bigintType, 
ImpalaCustomOperatorTable.COUNT,
+          over.getOperands(), over.getWindow().partitionKeys, 
ImmutableList.of(),
+          over.getWindow().getLowerBound(), over.getWindow().getUpperBound(),
+          over.getWindow().isRows(), true, false, over.isDistinct(), 
over.ignoreNulls());
+
+      // create RexNode for (Count - 1)
+      RexNode countNodeMinusOne = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.MINUS,
+          countNode, one);
+      RexNode countDouble = rexBuilder.makeCast(over.type, countNodeMinusOne);
+
+      // create else RexNode for (Rank - 1)/(Count - 1)
+      RexNode rankDivCount = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.DIVIDE,
+          rankDouble, countDouble);
+
+      // create when clause for case statement: Count = 1
+      RexNode caseCountOne = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, 
countNode,
+          one);
+      // create then clause for case statement: 0
+      RexNode zeroDouble = rexBuilder.makeCast(over.type, 
rexBuilder.makeExactLiteral(
+          BigDecimal.valueOf(0)));
+      return rexBuilder.makeCall(SqlStdOperatorTable.CASE, caseCountOne, 
zeroDouble,
+          rankDivCount);
+    }
+
+    /**
+     * Rewrite ntile() to the following:
+     *
+     * ntile(B) over([partition by clause] order by clause)
+     *    = floor(min(Count, B) * (RowNumber - 1)/Count) + 1
+     * where,
+     *  RowNumber = row_number() over([partition by clause] order by clause)
+     *  Count = count() over([partition by clause])
+     */
+    public RexNode replaceNTile(RexOver over) {
+      RelDataType bigintType =
+          rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT);
+
+      // create RowNumber: row_number() over([partition by clause] order by 
clause)
+      RexNode rowNumberNode = rexBuilder.makeOver(bigintType,
+          SqlStdOperatorTable.ROW_NUMBER,
+          new ArrayList<>(), over.getWindow().partitionKeys, 
over.getWindow().orderKeys,
+          over.getWindow().getLowerBound(), over.getWindow().getUpperBound(),
+          over.getWindow().isRows(), true, false, over.isDistinct(), 
over.ignoreNulls());
+
+      ImmutableList<RexFieldCollation> orderKeys = ImmutableList.of();
+
+      // create Count: count() over([partition by clause])
+      RexNode countNode = rexBuilder.makeOver(bigintType, 
ImpalaCustomOperatorTable.COUNT,
+          new ArrayList<>(), over.getWindow().partitionKeys, orderKeys,
+          over.getWindow().getLowerBound(), over.getWindow().getUpperBound(),
+          over.getWindow().isRows(), true, false, over.isDistinct(), 
over.ignoreNulls());
+
+      RexNode one = rexBuilder.makeCast(bigintType, 
rexBuilder.makeExactLiteral(
+          BigDecimal.valueOf(1)));
+      RexNode zero = rexBuilder.makeCast(bigintType, 
rexBuilder.makeExactLiteral(
+          BigDecimal.valueOf(0)));
+
+      // create min: min(Count, B)
+      RexNode minCountNTile = rexBuilder.makeCall(new 
ImpalaAggOperator("least"),
+          countNode, over.getOperands().get(0));
+
+      // create RowNumber - 1
+      RexNode rowNumberMinusOne = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.MINUS,
+          rowNumberNode, one);
+
+      // create min(Count, B) * (RowNumber - 1)
+      RexNode numeratorInt = 
rexBuilder.makeCall(ImpalaCustomOperatorTable.MULTIPLY,
+          minCountNTile, rowNumberMinusOne);
+
+      // create floor(min(Count, B) * (RowNumber - 1)/Count)
+      RexNode NTileMinusOne  = rexBuilder.makeCall(new 
ImpalaOperator("int_divide"),
+          numeratorInt, countNode);
+
+      // create floor(min(Count, B) * (RowNumber - 1)/Count) + 1
+      return rexBuilder.makeCall(over.getType(), 
ImpalaCustomOperatorTable.PLUS,
+          ImmutableList.of(NTileMinusOne, one));
+    }
+
+    private static ImmutableList<RexFieldCollation> reverseCollation(
+        List<RexFieldCollation> collationList) {
+      ImmutableList.Builder<RexFieldCollation> builder = 
ImmutableList.builder();
+      for (RexFieldCollation collation : collationList) {
+        ImmutableSet.Builder<SqlKind> directionBuilder = 
ImmutableSet.builder();
+        switch (collation.getDirection()) {
+          case ASCENDING:
+          case STRICTLY_ASCENDING:
+            directionBuilder.add(SqlKind.DESCENDING);
+            break;
+        }
+        if (collation.getNullDirection() == 
RelFieldCollation.NullDirection.FIRST) {
+          directionBuilder.add(SqlKind.NULLS_LAST);
+        }
+        builder.add(new RexFieldCollation(collation.getKey(), 
directionBuilder.build()));
+      }
+      return builder.build();
+    }
+  }
+}
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
index 0b3aaef6c..d00182cdb 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
@@ -36,6 +36,7 @@ import org.apache.impala.calcite.rel.node.ImpalaPlanRel;
 import org.apache.impala.calcite.rules.ConvertToCNFRules;
 import org.apache.impala.calcite.rules.ExtractLiteralAgg;
 import org.apache.impala.calcite.rules.ImpalaMinusToDistinctRule;
+import org.apache.impala.calcite.rules.RewriteRexOverRule;
 import org.apache.impala.common.ImpalaException;
 
 import org.slf4j.Logger;
@@ -147,6 +148,7 @@ public class CalciteOptimizer implements CompilerStep {
     RelNode retRelNode = plan;
     builder.addMatchOrder(HepMatchOrder.BOTTOM_UP);
     builder.addRuleCollection(ImmutableList.of(
+        RewriteRexOverRule.INSTANCE,
         new ExtractLiteralAgg()
         ));
 
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test 
b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
index 4c0162f2d..8ee703ec0 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
@@ -1388,6 +1388,44 @@ order by t.year, t.month, t.id;
 INT, INT, INT, DOUBLE, DOUBLE
 ====
 ---- QUERY
+# Test percent_rank() desc with multiple partition exprs and multiple order by 
exprs and
+# compare with the rewrite target query
+# This is a copy of the previous percent_rank test with the order flipped.
+select t.year, t.month, t.id, t.pr, (t.r-1)/(t.c-1)
+from (select year, month, id, rank() over (partition by year, month order by 
id desc) as r,
+count() over(partition by year, month) as c,
+percent_rank() over(partition by year, month order by id desc) as pr
+from alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
+order by t.year, t.month, t.id;
+---- RESULTS
+2009,1,0,1,1
+2009,1,1,0.8571428571428571,0.8571428571428571
+2009,1,100,0.7142857142857143,0.7142857142857143
+2009,1,101,0.5714285714285714,0.5714285714285714
+2009,1,200,0.4285714285714285,0.4285714285714285
+2009,1,201,0.2857142857142857,0.2857142857142857
+2009,1,300,0.1428571428571428,0.1428571428571428
+2009,1,301,0,0
+2009,2,400,1,1
+2009,2,401,0.6666666666666666,0.6666666666666666
+2009,2,500,0.3333333333333333,0.3333333333333333
+2009,2,501,0,0
+2010,1,3700,1,1
+2010,1,3701,0.8,0.8
+2010,1,3800,0.6,0.6
+2010,1,3801,0.4,0.4
+2010,1,3900,0.2,0.2
+2010,1,3901,0,0
+2010,2,4000,1,1
+2010,2,4001,0.8,0.8
+2010,2,4100,0.6,0.6
+2010,2,4101,0.4,0.4
+2010,2,4200,0.2,0.2
+2010,2,4201,0,0
+---- TYPES
+INT, INT, INT, DOUBLE, DOUBLE
+====
+---- QUERY
 # Test cume_dist() with multiple partition exprs and multiple order by exprs
 # and compare with the rewrite query
 select t.year, t.month, t.id, t.cd, ((t.c-t.r)+1)/t.c
@@ -1426,6 +1464,45 @@ order by t.year, t.month, t.id;
 INT, INT, INT, DOUBLE, DOUBLE
 ====
 ---- QUERY
+# Test cume_dist() desc with multiple partition exprs and multiple order by 
exprs
+# and compare with the rewrite query
+# This is a copy of the previous percent_rank test with the order flipped.
+select t.year, t.month, t.id, t.cd, ((t.c-t.r)+1)/t.c
+from (select year, month, id,
+rank() over (partition by year, month order by id) as r,
+count() over(partition by year, month) as c,
+cume_dist() over(partition by year, month order by id desc) as cd
+from alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
+order by t.year, t.month, t.id;
+---- RESULTS
+2009,1,0,1,1
+2009,1,1,0.875,0.875
+2009,1,100,0.75,0.75
+2009,1,101,0.625,0.625
+2009,1,200,0.5,0.5
+2009,1,201,0.375,0.375
+2009,1,300,0.25,0.25
+2009,1,301,0.125,0.125
+2009,2,400,1,1
+2009,2,401,0.75,0.75
+2009,2,500,0.5,0.5
+2009,2,501,0.25,0.25
+2010,1,3700,1,1
+2010,1,3701,0.8333333333333334,0.8333333333333334
+2010,1,3800,0.6666666666666666,0.6666666666666666
+2010,1,3801,0.5,0.5
+2010,1,3900,0.3333333333333333,0.3333333333333333
+2010,1,3901,0.1666666666666667,0.1666666666666667
+2010,2,4000,1,1
+2010,2,4001,0.8333333333333334,0.8333333333333334
+2010,2,4100,0.6666666666666666,0.6666666666666666
+2010,2,4101,0.5,0.5
+2010,2,4200,0.3333333333333333,0.3333333333333333
+2010,2,4201,0.1666666666666667,0.1666666666666667
+---- TYPES
+INT, INT, INT, DOUBLE, DOUBLE
+====
+---- QUERY
 # Test ntile() with multiple partition exprs and multiple order by exprs and 
compare
 # with the rewrite query
 select t.year, t.month, t.id, t.nt, floor((if(t.c>3, 3, 
t.c)*(t.rownum-1))/t.c) + 1
@@ -1464,6 +1541,45 @@ order by t.year, t.month, t.id;
 INT, INT, INT, BIGINT, DOUBLE
 ====
 ---- QUERY
+# Test ntile() desc with multiple partition exprs and multiple order by exprs 
and compare
+# with the rewrite query
+# This is a copy of the previous percent_rank test with the order flipped.
+select t.year, t.month, t.id, t.nt, floor((if(t.c>3, 3, 
t.c)*(t.rownum-1))/t.c) + 1
+from (select year, month, id,
+row_number() over (partition by year, month order by id desc) as rownum,
+count() over(partition by year, month) as c,
+ntile(3) over(partition by year, month order by id desc) as nt
+from alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
+order by t.year, t.month, t.id;
+---- RESULTS
+2009,1,0,3,3
+2009,1,1,3,3
+2009,1,100,2,2
+2009,1,101,2,2
+2009,1,200,2,2
+2009,1,201,1,1
+2009,1,300,1,1
+2009,1,301,1,1
+2009,2,400,3,3
+2009,2,401,2,2
+2009,2,500,1,1
+2009,2,501,1,1
+2010,1,3700,3,3
+2010,1,3701,3,3
+2010,1,3800,2,2
+2010,1,3801,2,2
+2010,1,3900,1,1
+2010,1,3901,1,1
+2010,2,4000,3,3
+2010,2,4001,3,3
+2010,2,4100,2,2
+2010,2,4101,2,2
+2010,2,4200,1,1
+2010,2,4201,1,1
+---- TYPES
+INT, INT, INT, BIGINT, DOUBLE
+====
+---- QUERY
 # Test for ntile when the argument is larger than number of rows
 select year, month, id, ntile(1000) over(order by year, month, id) from 
alltypes where
 tinyint_col < 2 and month < 3 and (id % 100 < 10) order by year, month, id
@@ -1565,6 +1681,22 @@ order by percent_rank() over (order by id)
 INT, DOUBLE
 ====
 ---- QUERY
+# IMPALA-13688: a cume_dist test that checks if we handle duplicates properly
+select month, cume_dist() over (order by month)
+from functional.alltypestiny order by month;
+---- RESULTS
+1,0.25
+1,0.25
+2,0.5
+2,0.5
+3,0.75
+3,0.75
+4,1
+4,1
+---- TYPES
+INT, DOUBLE
+====
+---- QUERY
 # Tests IMPALA-2158. Check that analytic functions with a limit succeed.
 select id, rank() over(order by id) from alltypesagg limit 10
 ---- RESULTS

Reply via email to