tnakama created CALCITE-7614:
--------------------------------
Summary: UNNEST of an unqualified struct-rooted array path fails
validation: "Column 's.s' not found"
Key: CALCITE-7614
URL: https://issues.apache.org/jira/browse/CALCITE-7614
Project: Calcite
Issue Type: Bug
Components: core
Affects Versions: 1.40.0
Reporter: tnakama
A table column `s` is a STRUCT (StructKind.PEEK_FIELDS) containing an array
field `arr ARRAY<VARCHAR>`. UNNEST of that array via an *unqualified* path
fails validation, while the table-qualified path validates. The struct-column
name is duplicated during qualification (note the doubled 's.s').
Schema (Frameworks, default conformance): t(s STRUCT<arr ARRAY<VARCHAR>>)
// PEEK_FIELDS
(1) SELECT s.arr FROM t -> OK
(2) SELECT * FROM t AS r CROSS JOIN UNNEST(r.s.arr) AS x -> OK
(3) SELECT * FROM t CROSS JOIN UNNEST(s.arr) AS x -> FAILS:
org.apache.calcite.sql.validate.SqlValidatorException:
Column 's.s' not found in table 't'
Analysis:
UnnestNamespace.getTable() and UnnestNamespace.getColumnUnnestedFrom() call
scope.fullyQualify(operand) on the UNNEST operand. For an unqualified
identifier whose leading component is a PEEK_FIELDS struct column,
DelegatingScope.fullyQualify() duplicates the struct-column segment, so
resolution throws columnNotFoundInTable (DelegatingScope, the suffix-shortening
loop). The table-qualified form disambiguates and avoids it.
Reproduction Code
```
import java.util.List;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeSystem;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.schema.impl.AbstractTable;
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.Planner;
import org.apache.calcite.rel.type.StructKind;
import org.junit.jupiter.api.Test;
/**
* PROBE (throwaway): pure-Calcite reproduction, with NO Mila SqlConverter,
operator table, or
* custom \{@code MilaSqlConformance}. Uses \{@link Frameworks} default config
(default conformance).
*
* <p>Schema: {@code nested_struct_tbl(user_id VARCHAR, purchase
STRUCT<order_id VARCHAR, items
* ARRAY<STRUCT<name VARCHAR, price BIGINT>>>)} with \{@link
StructKind#PEEK_FIELDS} so
* that compound dotted access into the struct is allowed by identifier
resolution.
*
* <p>Goal: determine whether \{@code UNNEST(purchase.items)} (unqualified)
fails in vanilla Calcite,
* which would confirm a Calcite-core bug rather than a Mila type/conformance
interaction.
*/
class VanillaCalciteUnnestStructReproTest {
private static FrameworkConfig config() {
RelDataTypeFactory tf = new JavaTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
RelDataType varchar = tf.createSqlType(SqlTypeName.VARCHAR);
RelDataType bigint = tf.createSqlType(SqlTypeName.BIGINT);
// STRUCT<name VARCHAR, price BIGINT> with PEEK_FIELDS
RelDataType itemStruct =
tf.createStructType(
StructKind.PEEK_FIELDS, List.of(varchar, bigint), List.of("name",
"price"));
RelDataType itemsArray = tf.createArrayType(itemStruct, -1);
// STRUCT<order_id VARCHAR, items ARRAY<...>> with PEEK_FIELDS
RelDataType purchaseStruct =
tf.createStructType(
StructKind.PEEK_FIELDS, List.of(varchar, itemsArray),
List.of("order_id", "items"));
RelDataType rowType =
tf.createStructType(
StructKind.FULLY_QUALIFIED,
List.of(varchar, purchaseStruct),
List.of("user_id", "purchase"));
AbstractTable table =
new AbstractTable() {
@Override
public RelDataType getRowType(RelDataTypeFactory factory) {
return rowType;
}
};
SchemaPlus root = Frameworks.createRootSchema(true);
root.add("nested_struct_tbl", table);
SqlParser.Config parserConfig =
SqlParser.config().withUnquotedCasing(Casing.UNCHANGED).withCaseSensitive(false);
return
Frameworks.newConfigBuilder().parserConfig(parserConfig).defaultSchema(root).build();
}
private void run(String label, String sql) {
Planner planner = Frameworks.getPlanner(config());
try {
SqlNode parsed = planner.parse(sql);
planner.validate(parsed);
System.out.println("VANILLA[" + label + "]: OK (validated)");
} catch (Throwable t) {
System.out.println("VANILLA[" + label + "]: FAILED");
for (Throwable c = t; c != null && c != c.getCause(); c = c.getCause()) {
System.out.println(" cause: " + c.getClass().getName() + " : " +
c.getMessage());
}
} finally {
planner.close();
}
}
/** Minimal schema: \{@code s STRUCT<arr ARRAY<VARCHAR>>}; tightest possible
repro. */
private static FrameworkConfig minimalConfig() {
RelDataTypeFactory tf = new JavaTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
RelDataType varchar = tf.createSqlType(SqlTypeName.VARCHAR);
RelDataType arr = tf.createArrayType(varchar, -1);
RelDataType s =
tf.createStructType(StructKind.PEEK_FIELDS, List.of(arr),
List.of("arr"));
RelDataType rowType =
tf.createStructType(StructKind.FULLY_QUALIFIED, List.of(s),
List.of("s"));
AbstractTable table =
new AbstractTable() {
@Override
public RelDataType getRowType(RelDataTypeFactory factory) {
return rowType;
}
};
SchemaPlus root = Frameworks.createRootSchema(true);
root.add("t", table);
SqlParser.Config parserConfig =
SqlParser.config().withUnquotedCasing(Casing.UNCHANGED).withCaseSensitive(false);
return
Frameworks.newConfigBuilder().parserConfig(parserConfig).defaultSchema(root).build();
}
private void runMinimal(String label, String sql) {
Planner planner = Frameworks.getPlanner(minimalConfig());
try {
planner.validate(planner.parse(sql));
System.out.println("MINIMAL[" + label + "]: OK (validated)");
} catch (Throwable t) {
System.out.println("MINIMAL[" + label + "]: FAILED");
for (Throwable c = t; c != null && c != c.getCause(); c = c.getCause()) {
System.out.println(" cause: " + c.getClass().getName() + " : " +
c.getMessage());
}
} finally {
planner.close();
}
}
@Test
void minimalMatrix() {
runMinimal("aliased", "SELECT * FROM t AS r CROSS JOIN UNNEST(r.s.arr) AS
x");
runMinimal("unqualified", "SELECT * FROM t CROSS JOIN UNNEST(s.arr) AS x");
}
@Test
void vanillaMatrix() {
// Controls — expected OK in vanilla Calcite.
run("normal-aliased", "SELECT t.purchase.order_id FROM nested_struct_tbl AS
t");
run("normal-unqualified", "SELECT purchase.order_id FROM
nested_struct_tbl");
run(
"unnest-aliased",
"SELECT item.name FROM nested_struct_tbl AS t "
+ "CROSS JOIN UNNEST(t.purchase.items) AS item");
// The suspect case.
run(
"unnest-unqualified",
"SELECT item.name FROM nested_struct_tbl "
+ "CROSS JOIN UNNEST(purchase.items) AS item");
}
}
```
--
This message was sent by Atlassian Jira
(v8.20.10#820010)