This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new c67b19daf IMPALA-14405: Labels for Calcite expressions not matching
original planner
c67b19daf is described below
commit c67b19daf6b47ac380058ea28de71f5d9a7bfe6f
Author: Steve Carlin <[email protected]>
AuthorDate: Fri Aug 29 08:58:03 2025 -0700
IMPALA-14405: Labels for Calcite expressions not matching original planner
Calcite sets literal expressions to EXPR$<x> which did not match
expressions given by the Impala planner. For literal expressions
such as "select 1 + 1", Impala creates the column name as "1 + 1".
The field names can be found in the abstract syntax tree, so
they are not set within the CalciteRelNodeConverter before the
logical tree is created.
A small test was added to calcite.test for a basic sanity check,
but more comprehensive tests will be run in the tests/shell module
(e.g. in test_shell_commandline.py and test_shell_interactive) which
contain tests for labels.
Change-Id: Ibd3e6366a284f53807b4b2c42efafa279249c1ea
Reviewed-on: http://gerrit.cloudera.org:8080/23516
Reviewed-by: Steve Carlin <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../calcite/service/CalciteRelNodeConverter.java | 60 +++++++++++++++++++++-
.../calcite/service/CalciteSingleNodePlanner.java | 6 ++-
.../queries/QueryTest/calcite.test | 24 +++++++++
3 files changed, 87 insertions(+), 3 deletions(-)
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java
index e8fde8bee..6d90281e3 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java
@@ -17,7 +17,7 @@
package org.apache.impala.calcite.service;
-import org.apache.calcite.rel.type.RelDataTypeFactory;
+import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.apache.calcite.avatica.util.Quoting;
import org.apache.calcite.plan.ConventionTraitDef;
@@ -37,13 +37,20 @@ import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelRoot;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.rules.CoreRules;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql.parser.SqlParser;
+import org.apache.calcite.sql.SqlBasicCall;
import org.apache.calcite.sql.SqlExplainFormat;
import org.apache.calcite.sql.SqlExplainLevel;
+import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlSelect;
+import org.apache.calcite.sql.SqlWith;
+import org.apache.calcite.sql.dialect.MysqlSqlDialect;
import org.apache.calcite.sql.validate.SqlValidator;
+import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.calcite.sql2rel.RelDecorrelator;
import org.apache.calcite.sql2rel.SqlToRelConverter;
import org.apache.calcite.sql2rel.StandardConvertletTable;
@@ -159,6 +166,57 @@ public class CalciteRelNodeConverter implements
CompilerStep {
return decorrelatedPlan;
}
+ /**
+ * Get the field names given the root level of an AST tree. Calcite creates
some
+ * literal expressions like "1 + 1" as "$EXPR0" whereas Impala sets the
field name
+ * label as "1 + 1", so this method changes the field name appropriately.
+ */
+ public List<String> getFieldNames(SqlNode validatedNode) {
+ ImmutableList.Builder<String> fieldNamesBuilder = new
ImmutableList.Builder();
+
+ for (SqlNode selectItem : getSelectList(validatedNode)) {
+ String fieldName = SqlValidatorUtil.alias(selectItem, 0);
+ if (fieldName.startsWith("EXPR$")) {
+ // If it's a Calcite generated field name, it will be of the form
"EXPR$"
+ // We get the actual SQL expression using the toSqlString method. There
+ // is no Impala Dialect yet, so using MySql dialect to get the field
+ // name. The language chosen is irrelevant because we only are using it
+ // to grab the expression as/is to use for the label.
+ fieldName = selectItem.toSqlString(MysqlSqlDialect.DEFAULT).getSql();
+ }
+ fieldNamesBuilder.add(fieldName.toLowerCase());
+ }
+ return fieldNamesBuilder.build();
+ }
+
+ /**
+ * Retrieve the first select list found from the root node.
+ */
+ public List<SqlNode> getSelectList(SqlNode validatedNode) {
+ // If a with clause exists, it will be on top and we need to
+ // get its child.
+ SqlNode firstSelectNode = (validatedNode instanceof SqlWith)
+ ? ((SqlWith) validatedNode).body
+ : validatedNode;
+
+ // Top level could be some kind of "except/intersect" call. Need to
+ // traverse the tree until we either find a "values" (ROW kind) node
+ // or a select node.
+ if (firstSelectNode instanceof SqlBasicCall) {
+ SqlBasicCall basicCall = (SqlBasicCall) firstSelectNode;
+ // if it's a "values" clause, there is no select list and
+ // we just return the values list.
+ if (basicCall.getOperator().getKind().equals(SqlKind.ROW)) {
+ return basicCall.getOperandList();
+ }
+ // grab the first parameter for the field list. Since it could be
+ // a "with", we call this method recursively
+ return getSelectList(basicCall.operand(0));
+ }
+ Preconditions.checkState(firstSelectNode instanceof SqlSelect);
+ return ((SqlSelect)firstSelectNode).getSelectList();
+ }
+
public RelOptCluster getCluster() {
return cluster_;
}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java
index 97ea69669..6dbc6aa7d 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java
@@ -52,6 +52,7 @@ public class CalciteSingleNodePlanner implements
SingleNodePlannerIntf {
private final PlannerContext ctx_;
private final CalciteAnalysisResult analysisResult_;
private NodeWithExprs rootNode_;
+ private List<String> fieldNames_;
public CalciteSingleNodePlanner(PlannerContext ctx) {
ctx_ = ctx;
@@ -63,6 +64,7 @@ public class CalciteSingleNodePlanner implements
SingleNodePlannerIntf {
CalciteRelNodeConverter relNodeConverter =
new CalciteRelNodeConverter(analysisResult_);
RelNode logicalPlan =
relNodeConverter.convert(analysisResult_.getValidatedNode());
+ fieldNames_ =
relNodeConverter.getFieldNames(analysisResult_.getValidatedNode());
// Optimize the query
CalciteOptimizer optimizer =
@@ -89,7 +91,7 @@ public class CalciteSingleNodePlanner implements
SingleNodePlannerIntf {
@Override
public List<String> getColLabels() {
- return rootNode_.fieldNames_;
+ return fieldNames_;
}
@Override
@@ -97,7 +99,7 @@ public class CalciteSingleNodePlanner implements
SingleNodePlannerIntf {
TResultSetMetadata metadata = new TResultSetMetadata();
int colCnt = rootNode_.outputExprs_.size();
for (int i = 0; i < colCnt; ++i) {
- TColumn colDesc = new TColumn(rootNode_.fieldNames_.get(i).toLowerCase(),
+ TColumn colDesc = new TColumn(fieldNames_.get(i),
rootNode_.outputExprs_.get(i).getType().toThrift());
metadata.addToColumns(colDesc);
}
diff --git a/testdata/workloads/functional-query/queries/QueryTest/calcite.test
b/testdata/workloads/functional-query/queries/QueryTest/calcite.test
index d464ad74f..a514be751 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/calcite.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/calcite.test
@@ -1097,3 +1097,27 @@ select ndv(varchar_col, 0) from functional.chars_medium;
---- CATCH
Error in NDV function, second parameter needs to be between 1 and 10.
====
+---- QUERY
+# Labels test
+select 2, 1 + 1;
+---- LABELS
+2,1 + 1
+---- RESULTS
+2,2
+---- TYPES
+tinyint,smallint
+---- RUNTIME_PROFILE
+row_regex: .*PlannerType: CalcitePlanner.*
+====
+---- QUERY
+# Labels test
+select length('hello')
+---- LABELS
+length('hello')
+---- RESULTS
+5
+---- TYPES
+int
+---- RUNTIME_PROFILE
+row_regex: .*PlannerType: CalcitePlanner.*
+====