This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new c9e9f86c0 IMPALA-13571: Calcite Planner: Fix join parsing errors.
c9e9f86c0 is described below
commit c9e9f86c0536f0991f70613675bbcfbbccc6a324
Author: Steve Carlin <[email protected]>
AuthorDate: Thu Nov 7 18:46:37 2024 -0800
IMPALA-13571: Calcite Planner: Fix join parsing errors.
This commit fixes two parsing errors related to joins.
1) The straight_join hint is now parsed correctly. The hint, however,
will be ignored. IMPALA-13574 has been filed for this feature.
2) The anti-join and semi-join will no longer throw parse exceptions
but will now throw unsupported exceptions. The jiras IMPALA-13572
and IMPALA-13573 have been filed for these features.
IMPALA-13708: Throw unsupported message for complex column syntax.
There is special complex column syntax where the column is treated
like a table and looks like a table name when parsed. If we get an
analysis error for an unfound table, we can see if it is actually
a complex column and throw an "unsupported" error instead of giving
a cryptic "table not found" message.
Also made the support for types of tables to be a bit more generic
and check if it's not an HdfsTable rather than only throw an
unsupported exception if it's an Iceberg table.
Change-Id: Icd0f68441c84b090ed2cb45de96ccee1054deef5
Reviewed-on: http://gerrit.cloudera.org:8080/22412
Reviewed-by: Aman Sinha <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../src/main/codegen/templates/Parser.jj | 4 +
.../apache/impala/calcite/schema/CalciteTable.java | 18 ++++-
.../impala/calcite/service/CalciteJniFrontend.java | 73 ++++++++++++++++++-
.../calcite/service/CalciteMetadataHandler.java | 85 ++++++++++++++++++++--
4 files changed, 167 insertions(+), 13 deletions(-)
diff --git a/java/calcite-planner/src/main/codegen/templates/Parser.jj
b/java/calcite-planner/src/main/codegen/templates/Parser.jj
index eb72f12f2..09dacd69c 100644
--- a/java/calcite-planner/src/main/codegen/templates/Parser.jj
+++ b/java/calcite-planner/src/main/codegen/templates/Parser.jj
@@ -1306,6 +1306,8 @@ SqlSelect SqlSelect() :
}
{
<SELECT> { s = span(); }
+ ( <CLUSTERED> )?
+ ( <STRAIGHT_JOIN> )?
[ <HINT_BEG> AddHint(hints) ( <COMMA> AddHint(hints) )* <COMMENT_END> ]
SqlSelectKeywords(keywords)
(
@@ -7901,6 +7903,7 @@ SqlPostfixOperator PostfixRowOperator() :
| < CLASS_ORIGIN: "CLASS_ORIGIN" >
| < CLOB: "CLOB" >
| < CLOSE: "CLOSE" >
+| < CLUSTERED: "CLUSTERED" >
| < COALESCE: "COALESCE" >
| < COBOL: "COBOL" >
| < COLLATE: "COLLATE" >
@@ -8427,6 +8430,7 @@ SqlPostfixOperator PostfixRowOperator() :
| < STATIC: "STATIC" >
| < STDDEV_POP: "STDDEV_POP" >
| < STDDEV_SAMP: "STDDEV_SAMP" >
+| < STRAIGHT_JOIN: "STRAIGHT_JOIN" >
| < STREAM: "STREAM" >
| < STRING_AGG: "STRING_AGG" >
| < STRUCTURE: "STRUCTURE" >
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
index 5f70405c8..53f4fd243 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
@@ -50,6 +50,7 @@ import org.apache.impala.analysis.TupleDescriptor;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.FeFsPartition;
import org.apache.impala.catalog.FeTable;
+import org.apache.impala.catalog.FeView;
import org.apache.impala.catalog.HdfsTable;
import org.apache.impala.catalog.IcebergTable;
import org.apache.impala.calcite.rel.util.ImpalaBaseTableRef;
@@ -82,10 +83,7 @@ public class CalciteTable extends RelOptAbstractTable
this.table_ = (HdfsTable) table;
this.qualifiedTableName_ = table.getTableName().toPath();
- if (table instanceof IcebergTable) {
- throw new UnsupportedFeatureException("Calcite parser does not support "
+
- "Iceberg tables yet.");
- }
+ checkIfTableIsSupported(table);
}
private static RelDataType buildColumnsForRelDataType(FeTable table)
@@ -106,6 +104,18 @@ public class CalciteTable extends RelOptAbstractTable
return builder.build();
}
+ private void checkIfTableIsSupported(FeTable table) throws ImpalaException {
+ if (table instanceof FeView) {
+ throw new UnsupportedFeatureException("Views are not supported yet.");
+ }
+
+ if (!(table instanceof HdfsTable)) {
+ String tableType = table.getClass().getSimpleName().replace("Table", "");
+ throw new UnsupportedFeatureException(tableType + " tables are not
supported yet.");
+ }
+
+ }
+
public BaseTableRef createBaseTableRef(SimplifiedAnalyzer analyzer
) throws ImpalaException {
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
index e5ab7e4ab..6d75a90cb 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
@@ -26,6 +26,7 @@ import org.apache.calcite.sql.SqlExplain;
import org.apache.calcite.sql.SqlNode;
import org.apache.impala.analysis.Parser;
import org.apache.impala.analysis.SelectStmt;
+import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
import org.apache.impala.calcite.functions.FunctionResolver;
import org.apache.impala.calcite.operators.ImpalaOperatorTable;
import org.apache.impala.calcite.rel.node.NodeWithExprs;
@@ -34,6 +35,7 @@ import org.apache.impala.catalog.BuiltinsDb;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.JniUtil;
import org.apache.impala.common.ParseException;
+import org.apache.impala.common.UnsupportedFeatureException;
import org.apache.impala.service.Frontend;
import org.apache.impala.service.FrontendProfile;
import org.apache.impala.service.JniFrontend;
@@ -45,6 +47,7 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import java.util.List;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
@@ -60,12 +63,30 @@ public class CalciteJniFrontend extends JniFrontend {
protected static final Logger LOG =
LoggerFactory.getLogger(CalciteJniFrontend.class.getName());
- private static Pattern SEMI_JOIN = Pattern.compile("\\bsemi\\sjoin\\b",
+ private static Pattern LEFT_SEMI = Pattern.compile(".*\\bleft\\ssemi\\b.*",
Pattern.CASE_INSENSITIVE);
- private static Pattern ANTI_JOIN = Pattern.compile("\\banti\\sjoin\\b",
+ private static Pattern RIGHT_SEMI = Pattern.compile(".*\\bright\\ssemi\\b.*",
Pattern.CASE_INSENSITIVE);
+ private static Pattern LEFT_ANTI = Pattern.compile(".*\\bleft\\santi\\b.*",
+ Pattern.CASE_INSENSITIVE);
+
+ private static Pattern RIGHT_ANTI = Pattern.compile(".*\\bright\\santi\\b.*",
+ Pattern.CASE_INSENSITIVE);
+
+ private static Pattern INPUT_FILE_NAME =
Pattern.compile(".*\\binput__file__name\\b.*",
+ Pattern.CASE_INSENSITIVE);
+
+ private static Pattern FILE_POSITION =
Pattern.compile(".*\\bfile__position\\b.*",
+ Pattern.CASE_INSENSITIVE);
+
+ private static Pattern TABLE_NOT_FOUND =
+ Pattern.compile(".*\\bTable '(.*)' not found\\b.*",
Pattern.CASE_INSENSITIVE);
+
+ private static Pattern COLUMN_NOT_FOUND =
+ Pattern.compile(".*\\bColumn '(.*)' not found\\b.*",
Pattern.CASE_INSENSITIVE);
+
public CalciteJniFrontend(byte[] thriftBackendConfig, boolean isBackendTest)
throws ImpalaException, TException {
super(thriftBackendConfig, isBackendTest);
@@ -84,6 +105,8 @@ public class CalciteJniFrontend extends JniFrontend {
QueryContext queryCtx = new QueryContext(thriftQueryContext,
getFrontend());
+ CalciteMetadataHandler mdHandler = null;
+
if (!optionSupportedInCalcite(queryCtx)) {
return runThroughOriginalPlanner(thriftQueryContext, queryCtx);
}
@@ -99,8 +122,7 @@ public class CalciteJniFrontend extends JniFrontend {
markEvent(queryParser, parsedSqlNode, queryCtx, "Parsed query");
// Make sure the metadata cache has all the info for the query.
- CalciteMetadataHandler mdHandler =
- new CalciteMetadataHandler(parsedSqlNode, queryCtx);
+ mdHandler = new CalciteMetadataHandler(parsedSqlNode, queryCtx);
markEvent(mdHandler, null, queryCtx, "Loaded tables");
boolean isExplain = false;
@@ -140,6 +162,7 @@ public class CalciteJniFrontend extends JniFrontend {
return serializedRequest;
} catch (ParseException e) {
+ throwUnsupportedIfKnownException(e);
// do a quick parse just to make sure it's not a select stmt. If it is
// a select statement, we fail the query since all select statements
// should be run through the Calcite Planner.
@@ -149,6 +172,13 @@ public class CalciteJniFrontend extends JniFrontend {
LOG.info("Calcite planner failed to parse query: " + queryCtx.getStmt());
LOG.info("Going to use original Impala planner.");
return runThroughOriginalPlanner(thriftQueryContext, queryCtx);
+ } catch (ImpalaException e) {
+ if (mdHandler != null) {
+ throwUnsupportedIfKnownException(e, mdHandler.getStmtTableCache());
+ }
+ throw e;
+ } catch (Exception e) {
+ throw e;
}
}
@@ -188,6 +218,41 @@ public class CalciteJniFrontend extends JniFrontend {
ImpalaOperatorTable.create(BuiltinsDb.getInstance());
}
+ private static void throwUnsupportedIfKnownException(Exception e)
+ throws ImpalaException {
+ String s = e.toString().replace("\n"," ");;
+ if (LEFT_ANTI.matcher(s).matches() || RIGHT_ANTI.matcher(s).matches()) {
+ throw new UnsupportedFeatureException("Anti joins not supported.");
+ }
+ if (LEFT_SEMI.matcher(s).matches() || RIGHT_SEMI.matcher(s).matches()) {
+ throw new UnsupportedFeatureException("Semi joins not supported.");
+ }
+ if (INPUT_FILE_NAME.matcher(s).matches() ||
FILE_POSITION.matcher(s).matches()) {
+ throw new UnsupportedFeatureException("Virtual columns not supported.");
+ }
+ }
+
+ public static void throwUnsupportedIfKnownException(ImpalaException e,
+ StmtTableCache stmtTableCache) throws ImpalaException {
+ throwUnsupportedIfKnownException(e);
+ String s = e.toString().replace("\n"," ");;
+ Matcher m = TABLE_NOT_FOUND.matcher(s);
+ if (m.matches()) {
+ if (CalciteMetadataHandler.anyTableContainsColumn(stmtTableCache,
m.group(1))) {
+ throw new UnsupportedFeatureException(
+ "Complex column " + m.group(1) + " not supported.");
+ }
+ }
+
+ m = COLUMN_NOT_FOUND.matcher(s);
+ if (m.matches()) {
+ if (CalciteMetadataHandler.anyTableContainsColumn(stmtTableCache,
m.group(1))) {
+ throw new UnsupportedFeatureException(
+ "Complex column " + m.group(1) + " not supported.");
+ }
+ }
+ }
+
public static class QueryContext {
private final TQueryCtx queryCtx_;
private final String stmt_;
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
index 13ff2f678..49fda4bc1 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
@@ -47,6 +47,7 @@ import org.apache.impala.catalog.FeView;
import org.apache.impala.catalog.HdfsTable;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.UnsupportedFeatureException;
+import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Collections;
@@ -96,8 +97,10 @@ public class CalciteMetadataHandler implements CompilerStep {
// populate calcite schema. This step needs to be done after the loader
because the
// schema needs to contain the columns in the table for validation, which
cannot
// be done when it's an IncompleteTable
- populateCalciteSchema(reader_, queryCtx.getFrontend().getCatalog(),
- tableVisitor.tableNames_);
+ List<String> errorTables = populateCalciteSchema(reader_,
+ queryCtx.getFrontend().getCatalog(), tableVisitor.tableNames_);
+
+ tableVisitor.checkForComplexTable(stmtTableCache_, errorTables, queryCtx);
}
/**
@@ -119,22 +122,26 @@ public class CalciteMetadataHandler implements
CompilerStep {
}
/**
- * Populate the CalciteSchema with tables being used by this query.
+ * Populate the CalciteSchema with tables being used by this query. Returns a
+ * list of tables in the query that are not found in the database.
*/
- private void populateCalciteSchema(CalciteCatalogReader reader,
+ private static List<String> populateCalciteSchema(CalciteCatalogReader
reader,
FeCatalog catalog, Set<TableName> tableNames) throws ImpalaException {
+ List<String> notFoundTables = new ArrayList<>();
CalciteSchema rootSchema = reader.getRootSchema();
Map<String, CalciteDb.Builder> dbSchemas = new HashMap<>();
for (TableName tableName : tableNames) {
FeDb db = catalog.getDb(tableName.getDb());
// db is not found, this will probably fail in the validation step
if (db == null) {
+ notFoundTables.add(tableName.toString());
continue;
}
// table is not found, this will probably fail in the validation step
FeTable feTable = db.getTable(tableName.getTbl());
if (feTable == null) {
+ notFoundTables.add(tableName.toString());
continue;
}
if (!(feTable instanceof HdfsTable)) {
@@ -156,6 +163,7 @@ public class CalciteMetadataHandler implements CompilerStep
{
for (String dbName : dbSchemas.keySet()) {
rootSchema.add(dbName, dbSchemas.get(dbName.toLowerCase()).build());
}
+ return notFoundTables;
}
public StmtMetadataLoader.StmtTableCache getStmtTableCache() {
@@ -174,6 +182,11 @@ public class CalciteMetadataHandler implements
CompilerStep {
private final String currentDb_;
public final Set<TableName> tableNames_ = new HashSet<>();
+ // Error condition for now. Complex tables are not yet supported
+ // so if we see a table name that has more than 2 parts, this variable
+ // will contain that table.
+ public final List<String> errorTables_ = new ArrayList<>();
+
public TableVisitor(String currentDb) {
this.currentDb_ = currentDb.toLowerCase();
}
@@ -194,13 +207,15 @@ public class CalciteMetadataHandler implements
CompilerStep {
if (fromNode instanceof SqlIdentifier) {
String tableName = fromNode.toString();
List<String> parts = Splitter.on('.').splitToList(tableName);
- // TODO: 'complex' tables ignored for now
if (parts.size() == 1) {
localTableNames.add(new TableName(
currentDb_.toLowerCase(), parts.get(0).toLowerCase()));
} else if (parts.size() == 2) {
localTableNames.add(
new TableName(parts.get(0).toLowerCase(),
parts.get(1).toLowerCase()));
+ } else {
+ errorTables_.add(tableName);
+ return localTableNames;
}
}
@@ -219,6 +234,66 @@ public class CalciteMetadataHandler implements
CompilerStep {
}
return localTableNames;
}
+
+ /**
+ * Check if the error table is actually a table with a complex column.
There is Impala
+ * syntax where a complex column uses the same syntax in the FROM clause
as a table.
+ * This method is passed in all the tables that are not found and checks
to see if
+ * the table turned out to be a complex column rather than an actual
table. If so,
+ * this throws an unsupported feature exception (for the time being). If
it's not
+ * a table with a complex column, a table not found error will eventually
be thrown
+ * in a different place.
+ */
+ public void checkForComplexTable(StmtMetadataLoader.StmtTableCache
stmtTableCache,
+ List<String> errorTables, CalciteJniFrontend.QueryContext queryCtx)
+ throws ImpalaException {
+ List<String> allErrorTables = new ArrayList<>();
+ allErrorTables.addAll(errorTables_);
+ allErrorTables.addAll(errorTables);
+ for (String errorTable : allErrorTables) {
+ List<String> parts = Splitter.on('.').splitToList(errorTable);
+ // if there are 3 parts, then it has to be db.table.column and must be
a
+ // complex column.
+ if (parts.size() > 2) {
+ throw new UnsupportedFeatureException("Complex column " +
+ errorTable + " not supported.");
+ }
+ // if there are 2 parts, then it is either a missing db.table or a
+ // table.column. We look to see if the column can be found in any
+ // of the tables, in which case, it is a complex column being
referenced.
+ if (parts.size() == 2) {
+ // first check the already existing cache for the error table.
+ if (anyTableContainsColumn(stmtTableCache, parts.get(1))) {
+ throw new UnsupportedFeatureException("Complex column " +
+ errorTable + " not supported.");
+ }
+ // it's possible that the table wasn't loaded yet because this
method is
+ // only called when there is an error finding a table. Try loading
the table
+ // from catalogd just in case, and check to see if it's a complex
column.
+ TableName potentialComplexTable = new TableName(
+ currentDb_.toLowerCase(), parts.get(0).toLowerCase());
+ StmtMetadataLoader errorLoader = new StmtMetadataLoader(
+ queryCtx.getFrontend(), queryCtx.getCurrentDb(),
queryCtx.getTimeline());
+ StmtMetadataLoader.StmtTableCache errorCache =
+ errorLoader.loadTables(Sets.newHashSet(potentialComplexTable));
+ if (anyTableContainsColumn(errorCache, parts.get(1))) {
+ throw new UnsupportedFeatureException("Complex column " +
+ errorTable + " not supported.");
+ }
+ }
+ }
+ }
+ }
+
+ public static boolean anyTableContainsColumn(
+ StmtMetadataLoader.StmtTableCache stmtTableCache, String columnName) {
+ String onlyColumnPart = columnName.split("\\.")[0];
+ for (FeTable table : stmtTableCache.tables.values()) {
+ if (table.getColumn(onlyColumnPart) != null) {
+ return true;
+ }
+ }
+ return false;
}
@Override