This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9008e1bfcbcc40e02285f20503e19e4f38026c65 Author: Steve Carlin <[email protected]> AuthorDate: Wed Oct 16 15:47:09 2024 -0700 IMPALA-13459: Handle duplicate table in same query WHen there are 2 references of the same table in a query, there needs to be a unique alias name used within the TableRef object. Code has been added to generate an alias. IMPALA-13460 has been filed because we should use the user provided alias name rather than a generated alias name. This is a little more difficult to implement because Calcite has a limitation in that their table object at validation time is equivalent to a FeTable in that there is only one object for the multiple tables. In order to fix IMPALA-13460, there is a Calcite bug that has to be fixed. We'd have to generate our own TableScan object underneath their LogicalTableScan that would hold an alias. This TableScan can be generated through their RelBuilder Factory object. But the current code creates the LogicalTableScan directly rather than go through a factory, so that would need to be fixed first. There are no unit tests attached to this Jira, but there are some tpcds queries that will start working when this gets committed. Change-Id: Ib9997bc642c320c2e26294d7d02a05bccbba6a0d Reviewed-on: http://gerrit.cloudera.org:8080/21945 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Michael Smith <[email protected]> --- .../impala/calcite/rel/node/ImpalaHdfsScanRel.java | 3 +- .../calcite/rel/util/ImpalaBaseTableRef.java | 46 ++++++++++++++++++++++ .../apache/impala/calcite/schema/CalciteTable.java | 6 ++- .../impala/calcite/util/SimplifiedAnalyzer.java | 41 +++++++++++++++++++ 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaHdfsScanRel.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaHdfsScanRel.java index 3a294cf2d..18be43898 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaHdfsScanRel.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaHdfsScanRel.java @@ -28,6 +28,7 @@ import org.apache.impala.analysis.TupleDescriptor; import org.apache.impala.calcite.rel.phys.ImpalaHdfsScanNode; import org.apache.impala.calcite.rel.util.ExprConjunctsConverter; import org.apache.impala.calcite.schema.CalciteTable; +import org.apache.impala.calcite.util.SimplifiedAnalyzer; import org.apache.impala.catalog.FeFsPartition; import org.apache.impala.catalog.HdfsTable; import org.apache.impala.common.ImpalaException; @@ -54,7 +55,7 @@ public class ImpalaHdfsScanRel extends TableScan CalciteTable table = (CalciteTable) getTable(); BaseTableRef baseTblRef = - table.createBaseTableRef(context.ctx_.getRootAnalyzer()); + table.createBaseTableRef((SimplifiedAnalyzer) context.ctx_.getRootAnalyzer()); // Create the Tuple Descriptor which will contain only the relevant columns // from the table needed for the query. diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/ImpalaBaseTableRef.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/ImpalaBaseTableRef.java new file mode 100644 index 000000000..766d3d2e4 --- /dev/null +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/ImpalaBaseTableRef.java @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.calcite.rel.util; + +import com.google.common.base.Preconditions; +import org.apache.impala.analysis.BaseTableRef; +import org.apache.impala.analysis.Path; +import org.apache.impala.analysis.TableRef; +import org.apache.impala.calcite.util.SimplifiedAnalyzer; +import org.apache.impala.common.ImpalaException; + +/** + * The ImpalaBaseTableRef derived class allows us the ability to + * override the alias naming of the BaseTableRef + */ +public class ImpalaBaseTableRef extends BaseTableRef { + + public ImpalaBaseTableRef(TableRef tableRef, Path resolvedPath, + SimplifiedAnalyzer basicAnalyzer) throws ImpalaException { + super(tableRef, resolvedPath); + // Impala's table uniqueAlias is within the scope of each Analyzer. + // Since Impala uses a separate Analyzer instance for each query block + // it can maintain the uniqueness. However, since the Calcite planner + // uses a single SimplifiedAnalyzer for entire query and there are no + // longer separate query blocks (they have already been unnested), it needs + // to make the alias globally unique. + Preconditions.checkState(aliases_.length > 0); + aliases_[0] = basicAnalyzer.getUniqueTableAlias(getUniqueAlias()); + } + +} diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java index 7fc45efdc..9f86b0ac0 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java @@ -52,8 +52,10 @@ import org.apache.impala.catalog.FeFsPartition; import org.apache.impala.catalog.FeTable; import org.apache.impala.catalog.HdfsTable; import org.apache.impala.catalog.IcebergTable; +import org.apache.impala.calcite.rel.util.ImpalaBaseTableRef; import org.apache.impala.calcite.type.ImpalaTypeConverter; import org.apache.impala.calcite.type.ImpalaTypeSystemImpl; +import org.apache.impala.calcite.util.SimplifiedAnalyzer; import org.apache.impala.planner.HdfsPartitionPruner; import org.apache.impala.common.AnalysisException; import org.apache.impala.common.ImpalaException; @@ -99,14 +101,14 @@ public class CalciteTable extends RelOptAbstractTable return builder.build(); } - public BaseTableRef createBaseTableRef(Analyzer analyzer + public BaseTableRef createBaseTableRef(SimplifiedAnalyzer analyzer ) throws ImpalaException { TableRef tblRef = new TableRef(qualifiedTableName_, null); Path resolvedPath = analyzer.resolvePath(tblRef.getPath(), Path.PathType.TABLE_REF); - BaseTableRef baseTblRef = new BaseTableRef(tblRef, resolvedPath); + BaseTableRef baseTblRef = new ImpalaBaseTableRef(tblRef, resolvedPath, analyzer); baseTblRef.analyze(analyzer); return baseTblRef; } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/util/SimplifiedAnalyzer.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/util/SimplifiedAnalyzer.java index 001819521..80230f5cb 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/util/SimplifiedAnalyzer.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/util/SimplifiedAnalyzer.java @@ -27,9 +27,12 @@ import org.apache.impala.analysis.StmtMetadataLoader; import org.apache.impala.analysis.TupleDescriptor; import org.apache.impala.analysis.TupleId; import org.apache.impala.authorization.AuthorizationFactory; +import org.apache.impala.common.AnalysisException; +import org.apache.impala.common.ImpalaException; import org.apache.impala.thrift.TNetworkAddress; import org.apache.impala.thrift.TQueryCtx; +import java.util.HashSet; import java.util.List; import java.util.Set; @@ -41,10 +44,13 @@ import java.util.Set; * methods. */ public class SimplifiedAnalyzer extends Analyzer { + public static final int MAX_IDENTIFIER_LENGTH = 128; // List of temporary filter expressions while initializing an Impala plan node. private List<Expr> unassignedConjuncts_ = Lists.newArrayList(); + private Set<String> uniqueTableAlias_ = new HashSet<>(); + public SimplifiedAnalyzer(StmtMetadataLoader.StmtTableCache stmtTableCache, TQueryCtx queryCtx, AuthorizationFactory authzFactory, List<TNetworkAddress> hostLocations) { @@ -125,4 +131,39 @@ public class SimplifiedAnalyzer extends Analyzer { result.setIsMaterialized(true); return result; } + + /** + * Given an alias name, check if it is unique based on previously + * cached names. If not, create a unique name by concatenating + * it with an integer sequence counter. + * + * TODO: IMPALA-13460: Generating a unique alias is a problem because + * the user provided alias in the query should be used in the explain + * plan rather than this alias. There is a Calcite bug that has to be + * fixed. We'd have to generate our own TableScan object underneath their + * LogicalTableScan that would hold an alias. This TableScan can be + * generated through their RelBuilder Factory object. But the current + * code creates the LogicalTableScan directly rather than go through + * a factory, so that would need to be fixed first. + */ + public String getUniqueTableAlias(String alias) throws ImpalaException { + final String base = alias; + String newAlias = base; + int i = 0; + while (uniqueTableAlias_.contains(newAlias)) { + newAlias = base + "_" + i++; + } + + // final alias name should conform to the max identifer length + if (newAlias.length() > MAX_IDENTIFIER_LENGTH) { + newAlias = newAlias.substring(0, MAX_IDENTIFIER_LENGTH); + if (uniqueTableAlias_.contains(newAlias)) { + throw new AnalysisException ("Cannot create a unique identifier since it " + + "exceeds maximum allowed length"); + } + } + + uniqueTableAlias_.add(newAlias); + return newAlias; + } }
