tnakama created CALCITE-7614:
--------------------------------

             Summary: UNNEST of an unqualified struct-rooted array path fails 
validation: "Column 's.s' not found"
                 Key: CALCITE-7614
                 URL: https://issues.apache.org/jira/browse/CALCITE-7614
             Project: Calcite
          Issue Type: Bug
          Components: core
    Affects Versions: 1.40.0
            Reporter: tnakama


A table column `s` is a STRUCT (StructKind.PEEK_FIELDS) containing an array 
field `arr ARRAY<VARCHAR>`. UNNEST of that array via an *unqualified* path 
fails validation, while the table-qualified path validates. The struct-column 
name is duplicated during qualification (note the doubled 's.s').

  Schema (Frameworks, default conformance):  t(s STRUCT<arr ARRAY<VARCHAR>>)   
// PEEK_FIELDS

    (1) SELECT s.arr FROM t                                         -> OK
    (2) SELECT * FROM t AS r CROSS JOIN UNNEST(r.s.arr) AS x        -> OK
    (3) SELECT * FROM t        CROSS JOIN UNNEST(s.arr)   AS x       -> FAILS:
        org.apache.calcite.sql.validate.SqlValidatorException:
          Column 's.s' not found in table 't'

  Analysis:
  UnnestNamespace.getTable() and UnnestNamespace.getColumnUnnestedFrom() call 
scope.fullyQualify(operand) on the UNNEST operand. For an unqualified 
identifier whose leading component is a PEEK_FIELDS struct column, 
DelegatingScope.fullyQualify() duplicates the struct-column segment, so 
resolution throws columnNotFoundInTable (DelegatingScope, the suffix-shortening 
loop). The table-qualified form disambiguates and avoids it.

Reproduction Code
```

import java.util.List;

import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeSystem;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.schema.impl.AbstractTable;
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.Planner;
import org.apache.calcite.rel.type.StructKind;
import org.junit.jupiter.api.Test;

/**
 * PROBE (throwaway): pure-Calcite reproduction, with NO Mila SqlConverter, 
operator table, or
 * custom \{@code MilaSqlConformance}. Uses \{@link Frameworks} default config 
(default conformance).
 *
 * <p>Schema: {@code nested_struct_tbl(user_id VARCHAR, purchase 
STRUCT&lt;order_id VARCHAR, items
 * ARRAY&lt;STRUCT&lt;name VARCHAR, price BIGINT&gt;&gt;&gt;)} with \{@link 
StructKind#PEEK_FIELDS} so
 * that compound dotted access into the struct is allowed by identifier 
resolution.
 *
 * <p>Goal: determine whether \{@code UNNEST(purchase.items)} (unqualified) 
fails in vanilla Calcite,
 * which would confirm a Calcite-core bug rather than a Mila type/conformance 
interaction.
 */
class VanillaCalciteUnnestStructReproTest {

  private static FrameworkConfig config() {
    RelDataTypeFactory tf = new JavaTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
    RelDataType varchar = tf.createSqlType(SqlTypeName.VARCHAR);
    RelDataType bigint = tf.createSqlType(SqlTypeName.BIGINT);

    // STRUCT<name VARCHAR, price BIGINT> with PEEK_FIELDS
    RelDataType itemStruct =
        tf.createStructType(
            StructKind.PEEK_FIELDS, List.of(varchar, bigint), List.of("name", 
"price"));
    RelDataType itemsArray = tf.createArrayType(itemStruct, -1);

    // STRUCT<order_id VARCHAR, items ARRAY<...>> with PEEK_FIELDS
    RelDataType purchaseStruct =
        tf.createStructType(
            StructKind.PEEK_FIELDS, List.of(varchar, itemsArray), 
List.of("order_id", "items"));

    RelDataType rowType =
        tf.createStructType(
            StructKind.FULLY_QUALIFIED,
            List.of(varchar, purchaseStruct),
            List.of("user_id", "purchase"));

    AbstractTable table =
        new AbstractTable() {
          @Override
          public RelDataType getRowType(RelDataTypeFactory factory) {
            return rowType;
          }
        };

    SchemaPlus root = Frameworks.createRootSchema(true);
    root.add("nested_struct_tbl", table);
    SqlParser.Config parserConfig =
        
SqlParser.config().withUnquotedCasing(Casing.UNCHANGED).withCaseSensitive(false);
    return 
Frameworks.newConfigBuilder().parserConfig(parserConfig).defaultSchema(root).build();
  }

  private void run(String label, String sql) {
    Planner planner = Frameworks.getPlanner(config());
    try {
      SqlNode parsed = planner.parse(sql);
      planner.validate(parsed);
      System.out.println("VANILLA[" + label + "]: OK (validated)");
    } catch (Throwable t) {
      System.out.println("VANILLA[" + label + "]: FAILED");
      for (Throwable c = t; c != null && c != c.getCause(); c = c.getCause()) {
        System.out.println("   cause: " + c.getClass().getName() + " : " + 
c.getMessage());
      }
    } finally {
      planner.close();
    }
  }

  /** Minimal schema: \{@code s STRUCT<arr ARRAY<VARCHAR>>}; tightest possible 
repro. */
  private static FrameworkConfig minimalConfig() {
    RelDataTypeFactory tf = new JavaTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
    RelDataType varchar = tf.createSqlType(SqlTypeName.VARCHAR);
    RelDataType arr = tf.createArrayType(varchar, -1);
    RelDataType s =
        tf.createStructType(StructKind.PEEK_FIELDS, List.of(arr), 
List.of("arr"));
    RelDataType rowType =
        tf.createStructType(StructKind.FULLY_QUALIFIED, List.of(s), 
List.of("s"));
    AbstractTable table =
        new AbstractTable() {
          @Override
          public RelDataType getRowType(RelDataTypeFactory factory) {
            return rowType;
          }
        };
    SchemaPlus root = Frameworks.createRootSchema(true);
    root.add("t", table);
    SqlParser.Config parserConfig =
        
SqlParser.config().withUnquotedCasing(Casing.UNCHANGED).withCaseSensitive(false);
    return 
Frameworks.newConfigBuilder().parserConfig(parserConfig).defaultSchema(root).build();
  }

  private void runMinimal(String label, String sql) {
    Planner planner = Frameworks.getPlanner(minimalConfig());
    try {
      planner.validate(planner.parse(sql));
      System.out.println("MINIMAL[" + label + "]: OK (validated)");
    } catch (Throwable t) {
      System.out.println("MINIMAL[" + label + "]: FAILED");
      for (Throwable c = t; c != null && c != c.getCause(); c = c.getCause()) {
        System.out.println("   cause: " + c.getClass().getName() + " : " + 
c.getMessage());
      }
    } finally {
      planner.close();
    }
  }

  @Test
  void minimalMatrix() {
    runMinimal("aliased", "SELECT * FROM t AS r CROSS JOIN UNNEST(r.s.arr) AS 
x");
    runMinimal("unqualified", "SELECT * FROM t CROSS JOIN UNNEST(s.arr) AS x");
  }

  @Test
  void vanillaMatrix() {
    // Controls — expected OK in vanilla Calcite.
    run("normal-aliased", "SELECT t.purchase.order_id FROM nested_struct_tbl AS 
t");
    run("normal-unqualified", "SELECT purchase.order_id FROM 
nested_struct_tbl");
    run(
        "unnest-aliased",
        "SELECT item.name FROM nested_struct_tbl AS t "
            + "CROSS JOIN UNNEST(t.purchase.items) AS item");
    // The suspect case.
    run(
        "unnest-unqualified",
        "SELECT item.name FROM nested_struct_tbl "
            + "CROSS JOIN UNNEST(purchase.items) AS item");
  }
}
```



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to