This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 420e357b9 IMPALA-13695: Calcite planner: fix for ndv with 2 args
420e357b9 is described below
commit 420e357b9586b45fbf4d08265fa57dd616989377
Author: Steve Carlin <[email protected]>
AuthorDate: Fri Sep 5 15:33:15 2025 -0700
IMPALA-13695: Calcite planner: fix for ndv with 2 args
The NDV function was crashing when called with the "scale" arg. This
requires special processing which exists in FunctionCallExpr.
The validation for this is now done in ImpalaNdvFunction
and the special calculation is done within ImpalaAggRel
This also fixes ndv for varchar types. The aggregation call
within CoerceNodes was not differentiating between varchar
and string. A cast to string function is needed in order
to run the ndv function on a varchar column.
Change-Id: I82419f77e043e9975865a042ffb8db75a26931f7
Reviewed-on: http://gerrit.cloudera.org:8080/23513
Reviewed-by: Riza Suminto <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../apache/impala/analysis/FunctionCallExpr.java | 4 +-
.../impala/calcite/coercenodes/CoerceNodes.java | 8 +++
.../operators/ImpalaCustomOperatorTable.java | 2 +
.../calcite/operators/ImpalaNdvFunction.java | 57 ++++++++++++++++++++++
.../impala/calcite/rel/node/ImpalaAggRel.java | 38 ++++++++++++---
.../queries/QueryTest/calcite.test | 20 ++++++++
6 files changed, 122 insertions(+), 7 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
index e6e303ba5..54ea6c513 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
@@ -622,7 +622,9 @@ public class FunctionCallExpr extends Expr {
// the needed memory for that precision value which is 2^precision.
// This method must be identical to function ComputeHllLengthFromScale()
// defined in aggregate-functions-ir.cc.
- private int ComputeHllLengthFromScale(int scale) { return 1 << (scale + 8); }
+ public static int ComputeHllLengthFromScale(int scale) {
+ return 1 << (scale + 8);
+ }
@Override
protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceNodes.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceNodes.java
index 0f6846943..5712cb295 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceNodes.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceNodes.java
@@ -644,6 +644,14 @@ public class CoerceNodes{
}
private static boolean areSqlTypesEqual(RelDataType r1, RelDataType r2) {
+ if (r1.getSqlTypeName().equals(SqlTypeName.VARCHAR) &&
+ r2.getSqlTypeName().equals(SqlTypeName.VARCHAR)) {
+ // if both precisions are Integer.MAX_VALUE, they are both strings
+ // if both precisions are not INteger.MAX_VALUE, they are both varchars
+ int maxVal = Integer.MAX_VALUE;
+ return (r1.getPrecision() == maxVal && r2.getPrecision() == maxVal) ||
+ (r1.getPrecision() != maxVal && r2.getPrecision() != maxVal);
+ }
return r1.getSqlTypeName().equals(r2.getSqlTypeName());
}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
index 10e4b0716..9f498b795 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java
@@ -252,6 +252,8 @@ public class ImpalaCustomOperatorTable extends
ReflectiveSqlOperatorTable {
public static final SqlSetOperator INTERSECT_ALL =
new SqlSetOperator("INTERSECT ALL", SqlKind.INTERSECT, 12, true);
+ public static final ImpalaNdvFunction NDV = new ImpalaNdvFunction();
+
public static ImpalaCustomOperatorTable instance() {
return INSTANCE.get();
}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaNdvFunction.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaNdvFunction.java
new file mode 100644
index 000000000..e4683d2b6
--- /dev/null
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaNdvFunction.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.impala.calcite.operators;
+
+import org.apache.calcite.sql.SqlCallBinding;
+
+import java.math.BigDecimal;
+
+/**
+ * Implementation of NDV that handles special validation logic specific
+ * to Impala.
+ */
+public class ImpalaNdvFunction extends ImpalaAggOperator {
+ public ImpalaNdvFunction() {
+ super("NDV");
+ }
+
+ @Override
+ public boolean checkOperandTypes(SqlCallBinding callBinding, boolean
throwOnFailure) {
+ int operandCount = callBinding.getOperandCount();
+ if (operandCount < 1 || operandCount > 2) {
+ throw new IllegalArgumentException("Error in NDV function, " +
+ "must contain 1 or 2 parameters.");
+ }
+
+ if (operandCount == 1) {
+ return true;
+ }
+
+ BigDecimal bd = callBinding.getOperandLiteralValue(1, BigDecimal.class);
+ if (bd == null) {
+ throw new IllegalArgumentException("Error in NDV function, " +
+ "second parameter needs to be an integer.");
+ }
+
+ if (bd.intValue() < 1 || bd.intValue() > 10) {
+ throw new IllegalArgumentException("Error in NDV function, " +
+ "second parameter needs to be between 1 and 10.");
+ }
+
+ return true;
+ }
+}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaAggRel.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaAggRel.java
index 8d48b0ed2..d0c3e7925 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaAggRel.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaAggRel.java
@@ -40,14 +40,17 @@ import
org.apache.impala.calcite.rel.util.ExprConjunctsConverter;
import org.apache.impala.calcite.type.ImpalaTypeConverter;
import org.apache.impala.analysis.AggregateInfo;
import org.apache.impala.analysis.Analyzer;
+import org.apache.impala.analysis.CastExpr;
import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.ExprSubstitutionMap;
import org.apache.impala.analysis.FunctionCallExpr;
import org.apache.impala.analysis.FunctionParams;
import org.apache.impala.analysis.MultiAggregateInfo;
+import org.apache.impala.analysis.NumericLiteral;
import org.apache.impala.analysis.SlotDescriptor;
import org.apache.impala.calcite.util.SimplifiedAnalyzer;
import org.apache.impala.catalog.AggregateFunction;
+import org.apache.impala.catalog.BuiltinsDb;
import org.apache.impala.catalog.Function;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
@@ -277,7 +280,7 @@ public class ImpalaAggRel extends Aggregate
*/
public boolean hasDistinctOnly() throws ImpalaException {
for (AggregateCall aggCall : getAggCallList()) {
- Function fn = getFunction(aggCall);
+ Function fn = getFunction(aggCall, ImmutableList.of());
if (fn == null) {
return false;
}
@@ -326,7 +329,7 @@ public class ImpalaAggRel extends Aggregate
.map(t -> inputExprs.get(t))
.collect(Collectors.toList());
- Function fn = getFunction(aggCall);
+ Function fn = getFunction(aggCall, inputExprs);
Preconditions.checkState(fn != null, "Could not find the Impala function
for " +
aggCall.getAggregation().getName());
@@ -342,8 +345,8 @@ public class ImpalaAggRel extends Aggregate
return exprs;
}
- private Function getFunction(AggregateCall aggCall)
- throws ImpalaException {
+ private Function getFunction(AggregateCall aggCall,
+ List<Expr> inputExprs) throws ImpalaException {
RelDataType retType = aggCall.getType();
SqlAggFunction aggFunction = aggCall.getAggregation();
List<RelDataType> operandTypes = Lists.newArrayList();
@@ -352,8 +355,31 @@ public class ImpalaAggRel extends Aggregate
RelDataType relDataType =
input.getRowType().getFieldList().get(i).getType();
operandTypes.add(relDataType);
}
- return FunctionResolver.getExactFunction(aggFunction.getName(),
aggFunction.getKind(),
- operandTypes);
+ Function fn = FunctionResolver.getExactFunction(aggFunction.getName(),
+ aggFunction.getKind(), operandTypes);
+ // special case for ndv which needs a little extra resolving
+ return aggFunction.getName().toLowerCase().equals("ndv")
+ ? getNdvFunction(fn, aggCall, inputExprs)
+ : fn;
+ }
+
+ private Function getNdvFunction(Function fn, AggregateCall aggCall,
+ List<Expr> inputExprs) {
+ if (aggCall.getArgList().size() < 2 || inputExprs.size() == 0) {
+ return fn;
+ }
+
+ // The second argument is the scale. It might be stuffed within
+ // some cast expressions, but there should be a literal value there
+ // which is checked at validation time.
+ Expr literalExpr = inputExprs.get(aggCall.getArgList().get(1));
+ while (literalExpr instanceof CastExpr) {
+ literalExpr = literalExpr.getChild(0);
+ }
+ int scale = ((NumericLiteral) literalExpr).getIntValue();
+ int size = FunctionCallExpr.ComputeHllLengthFromScale(scale);
+ return ((BuiltinsDb)BuiltinsDb.getInstance()).resolveNdvIntermediateType(
+ (AggregateFunction) fn, size);
}
private boolean isCardinalityCheckRelNode() {
diff --git a/testdata/workloads/functional-query/queries/QueryTest/calcite.test
b/testdata/workloads/functional-query/queries/QueryTest/calcite.test
index 3013d7965..d464ad74f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/calcite.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/calcite.test
@@ -1077,3 +1077,23 @@ where 10.1 not in (tinyint_col, smallint_col, int_col,
bigint_col, float_col, do
---- TYPES
bigint
====
+---- QUERY
+# Test for varchar ndv with both 1 and 2 parameters
+select ndv(varchar_col), ndv(varchar_col, 1) from functional.chars_medium;
+---- RESULTS
+963,1030
+---- TYPES
+bigint, bigint
+====
+---- QUERY
+# Test for varchar ndv with out of range ndv for second parameter
+select ndv(varchar_col, 11) from functional.chars_medium;
+---- CATCH
+Error in NDV function, second parameter needs to be between 1 and 10.
+====
+---- QUERY
+# Test for varchar ndv with out of range ndv for second parameter
+select ndv(varchar_col, 0) from functional.chars_medium;
+---- CATCH
+Error in NDV function, second parameter needs to be between 1 and 10.
+====