This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3574fe89b0cd583265805f7052cac1075c6845cf Author: Steve Carlin <[email protected]> AuthorDate: Tue Nov 5 09:40:16 2024 -0800 IMPALA-13513: Support decode function The decode operator is similar to the case operator. The tricky part for supporting the decode is that some of the operands are search parameters and some are return parameters. The search parameters all need to be compatible with each other. And the return parameters need to be compatible with each other as well. Much of the code deals with casting these parameters to compatible types. Change-Id: Ia3b68fda7cfa14799a41428e35d5bbc5984a801a Reviewed-on: http://gerrit.cloudera.org:8080/22031 Reviewed-by: Michael Smith <[email protected]> Reviewed-by: Joe McDonnell <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../calcite/coercenodes/CoerceOperandShuttle.java | 70 +++++++++++- .../impala/calcite/functions/FunctionResolver.java | 27 ++++- .../impala/calcite/functions/RexCallConverter.java | 14 +++ .../operators/ImpalaCustomOperatorTable.java | 2 + .../calcite/operators/ImpalaDecodeFunction.java | 127 +++++++++++++++++++++ 5 files changed, 233 insertions(+), 7 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java index 28e1f84f1..67aac1060 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java @@ -43,6 +43,7 @@ import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.Type; import org.apache.impala.calcite.functions.FunctionResolver; import org.apache.impala.calcite.functions.ImplicitTypeChecker; +import org.apache.impala.calcite.operators.ImpalaDecodeFunction; import org.apache.impala.calcite.type.ImpalaTypeConverter; import java.math.BigDecimal; @@ -109,6 +110,12 @@ public class CoerceOperandShuttle extends RexShuttle { return castedOperandsCall; } + // had to special case decode since the casting is different from common + // functions. + if (castedOperandsCall.getOperator().getName().equals("DECODE")) { + return castDecodedFunction(castedOperandsCall, factory, rexBuilder); + } + Function fn = FunctionResolver.getSupertypeFunction(castedOperandsCall); if (fn == null) { @@ -324,17 +331,23 @@ public class CoerceOperandShuttle extends RexShuttle { return fn.functionName().equals("case"); } + private static RexNode castOperand(RexNode node, Type toImpalaType, + RelDataTypeFactory factory, RexBuilder rexBuilder) { + + RelDataType toType = ImpalaTypeConverter.getRelDataType(toImpalaType); + return castOperand(node, toType, factory, rexBuilder); + } + /** * castOperand takes a RexNode and a toType and returns the RexNode * with a potential cast wrapper. If the types match, then the * original RexNode is returned. If the toType is an incompatible * type, this method returns null. */ - private static RexNode castOperand(RexNode node, Type toImpalaType, + private static RexNode castOperand(RexNode node, RelDataType toType, RelDataTypeFactory factory, RexBuilder rexBuilder) { RelDataType fromType = node.getType(); - RelDataType toType = ImpalaTypeConverter.getRelDataType(toImpalaType); // No need to cast if types are the same if (fromType.getSqlTypeName().equals(toType.getSqlTypeName())) { @@ -359,4 +372,57 @@ public class CoerceOperandShuttle extends RexShuttle { } return rexBuilder.makeCast(toType, node); } + + /** + * Special casting function for the decode operator. While the case operator + * has its operands as boolean types, the operands within the decode operator + * can be any compatible type with the search operator. Therefore, all operands + * need to be checked to see if they need to be cast to a compatible type, not + * just the return parameters. + */ + private static RexNode castDecodedFunction(RexCall decodeCall, + RelDataTypeFactory factory, RexBuilder rexBuilder) { + List<RexNode> operands = decodeCall.getOperands(); + List<RelDataType> argTypes = + Util.transform(operands, RexNode::getType); + List<RexNode> newOperands = new ArrayList<>(operands.size()); + + // Grab the compatible search operand from all the search operands + RelDataType searchOperand = + ImpalaDecodeFunction.getCompatibleSearchOperand(argTypes, factory); + // Grab the compatible return operand from all the return operands + RelDataType returnType = + ImpalaDecodeFunction.getCompatibleReturnType(argTypes, factory); + + boolean hasElse = (argTypes.size() % 2 == 0); + int numNonElseParams = hasElse ? argTypes.size() - 1 : argTypes.size(); + + + // start by potentially casting all the search operands. Check the first one outside + // of the loop. + newOperands.add(castOperand(operands.get(0), searchOperand, factory, rexBuilder)); + + // Now we loop through all the operands, alternating between search type + // and return type. We don't include the last one in case there is an + // else parameter + for (int i = 1; i < numNonElseParams; ++i) { + // Alternate between return and search + RelDataType toType = (i % 2 == 0) ? returnType : searchOperand; + newOperands.add(castOperand(operands.get(i), toType, factory, rexBuilder)); + } + + // Check the else param for casting + if (hasElse) { + int elseParam = operands.size() - 1; + newOperands.add(castOperand(operands.get(elseParam), returnType, factory, + rexBuilder)); + } + + // If any operand was casted, create the new RexNode, otherwise, just send back + // the existing one. + return operands.equals(newOperands) + ? decodeCall + : (RexCall) rexBuilder.makeCall(returnType, decodeCall.getOperator(), + newOperands); + } } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java index 40852e477..1e88f18c5 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java @@ -209,21 +209,38 @@ public class FunctionResolver { // return value as its only operand. This operand needs to be calculated based // on its parameters (see getCaseArgs method comment) if we are looking for a // non-exact match (the best case function that would work if casting the operands). - if (name.equals("case")) { + // The decode statement is also special in that the 3rd parameter is going to + // contain the return value + int caseOperandNum = getCaseOperandNum(name); + if (caseOperandNum != -1) { return exactMatch - ? Lists.newArrayList(ImpalaTypeConverter.createImpalaType(argTypes.get(1))) - : getCaseArgs(argTypes); + ? Lists.newArrayList(ImpalaTypeConverter.createImpalaType( + argTypes.get(caseOperandNum))) + : getCaseArgs(name, argTypes); } return ImpalaTypeConverter.getNormalizedImpalaTypes(argTypes); } + private static int getCaseOperandNum(String name) { + if (name.equals("case")) { + return 1; + } + if (name.equals("decode")) { + return 2; + } + return -1; + } + /** * getCaseArgs needs to calculate the proper operand. It walks through * all the "then" clauses (and the default clause) and calculated the * most compatible type which it will use as its return value. */ - private static List<Type> getCaseArgs(List<RelDataType> argTypes) { + private static List<Type> getCaseArgs(String name, List<RelDataType> argTypes) { + // Only handles the "case" function, not the "decode". + Preconditions.checkState(name.equals("case")); + int numOperands = argTypes.size(); Type compatibleType = Type.NULL; for (int i = 0; i < numOperands; ++i) { @@ -240,7 +257,7 @@ public class FunctionResolver { /** * shouldSkipOperand checks to see if we should skip this operand from casting. * Currently it only checks for CASE. CASE will have some boolean operands which - * will never need casting. TODO: support DECODE + * will never need casting. */ public static boolean shouldSkipOperandForCase(int numOperands, int i) { // The even number parameters are the boolean checks, but the last parameter diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java index 4e8354acc..586ffe238 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java @@ -33,6 +33,7 @@ import org.apache.impala.analysis.BinaryPredicate; import org.apache.impala.analysis.CaseWhenClause; import org.apache.impala.analysis.CompoundPredicate; import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.FunctionCallExpr; import org.apache.impala.analysis.TimestampArithmeticExpr; import org.apache.impala.calcite.type.ImpalaTypeConverter; import org.apache.impala.catalog.Function; @@ -110,6 +111,10 @@ public class RexCallConverter { impalaRetType); } + if (fn.functionName().equals("decode")) { + return createDecodeExpr(fn, params, impalaRetType); + } + switch (rexCall.getOperator().getKind()) { case CASE: return createCaseExpr(fn, params, impalaRetType); @@ -170,6 +175,15 @@ public class RexCallConverter { return new AnalyzedCastExpr(impalaRetType, params.get(0)); } + private static Expr createDecodeExpr(Function fn, List<Expr> params, + Type impalaRetType) throws ImpalaException { + // case expression is wrapped with a function, which is how the Impala + // analyzer Expr code expects to handle the decode function. + FunctionCallExpr decodeExpr = + new AnalyzedFunctionCallExpr(fn, params, impalaRetType); + return new AnalyzedCaseExpr(fn, impalaRetType, decodeExpr); + } + private static Expr createCaseExpr(Function fn, List<Expr> params, Type retType) { List<CaseWhenClause> caseWhenClauses = new ArrayList<>(); Expr whenParam = null; diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java index 2a1a2a8a7..d3e3b45e7 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaCustomOperatorTable.java @@ -222,6 +222,8 @@ public class ImpalaCustomOperatorTable extends ReflectiveSqlOperatorTable { // validator needs to search for the explicit_cast function while validating. public static final ImpalaCastFunction EXPLICIT_CAST = ImpalaCastFunction.INSTANCE; + public static final ImpalaDecodeFunction DECODE = ImpalaDecodeFunction.INSTANCE; + public static ImpalaCustomOperatorTable instance() { return INSTANCE.get(); } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaDecodeFunction.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaDecodeFunction.java new file mode 100644 index 000000000..2f8987f99 --- /dev/null +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaDecodeFunction.java @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.calcite.operators; + +import com.google.common.base.Preconditions; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.SqlSyntax; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.impala.calcite.type.ImpalaTypeConverter; +import org.apache.impala.calcite.type.ImpalaTypeSystemImpl; + +import java.util.List; + +/** + * Special operator for Decode. + * + * The decode operator has to infer its return type off of a compatible + * parameter from all the choices of return parameters. These choices + * are the third parameter, the fifth parameter, the seventh, etc... + * If there are an even number of parameters, the last parameter is part + * of an else clause, so that is also a return parameter. + * + * This class also ensures that the search parameters are all compatible + * as well. These are all the parameters not mentioned; The first, second, + * fourth, sixth, and every other parameter except the last one. + * + * An example of a decode function (in the documentation) is: + * DECODE(day_of_week, 1, "Monday", 2, "Tuesday", 3, "Wednesday", + * 4, "Thursday", 5, "Friday", 6, "Saturday", 7, "Sunday", "Unknown day") + * The return values here are of type string, and the search parameters are of + * some numeric type for day_of_week and the choices for day of the week. + * + */ +public class ImpalaDecodeFunction extends ImpalaOperator { + + public static ImpalaDecodeFunction INSTANCE = new ImpalaDecodeFunction(); + + private ImpalaDecodeFunction() { + super("DECODE"); + } + + + @Override + public RelDataType inferReturnType(SqlOperatorBinding opBinding) { + List<RelDataType> operandTypes = CommonOperatorFunctions.getOperandTypes(opBinding); + + RexBuilder rexBuilder = + new RexBuilder(new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl())); + RelDataTypeFactory factory = rexBuilder.getTypeFactory(); + + // No need to capture the return value, but an exception will be thrown + // if the parameters are not compatible. + getCompatibleSearchOperand(operandTypes, factory); + + return getCompatibleReturnType(operandTypes, factory); + } + + public static RelDataType getCompatibleSearchOperand( + List<RelDataType> operandTypes, RelDataTypeFactory factory) { + // check search operands are all compatible, grab the first. + RelDataType commonSearchOperand = operandTypes.get(0); + int searchOperandsToCheck = operandTypes.size()/2; + // Check that the second, fourth, sixth, etc... parameter is compatible + // but don't check the last one. + for (int i = 1; i < operandTypes.size() - 1; i += 2) { + commonSearchOperand = ImpalaTypeConverter.getCompatibleType(commonSearchOperand, + operandTypes.get(i), factory); + if (commonSearchOperand == null) { + throw new IllegalArgumentException("Decode function has incompatible " + + "types with search argument and argument number " + i); + } + } + return commonSearchOperand; + } + + public static RelDataType getCompatibleReturnType( + List<RelDataType> operandTypes, RelDataTypeFactory factory) { + // Initialize with the third parameter + RelDataType returnType = operandTypes.get(2); + returnType = factory.createTypeWithNullability(returnType, true); + // Skip over every other parameter starting with the fifth one. + for (int i = 4; i < operandTypes.size(); i += 2) { + returnType = ImpalaTypeConverter.getCompatibleType(returnType, + operandTypes.get(i), factory); + if (returnType == null) { + throw new IllegalArgumentException("Decode function has incompatible " + + "return type (argument number) " + i); + } + } + // If there are an even number of parameters, the last one is the else + // clause, so check that one too. + if ((operandTypes.size() % 2) == 0) { + returnType = ImpalaTypeConverter.getCompatibleType(returnType, + operandTypes.get(operandTypes.size() - 1), factory); + } + return returnType; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(3); + } +}
