This is an automated email from the ASF dual-hosted git repository.
yqm pushed a commit to branch 37.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/37.0.0 by this push:
new f785b1abb2e fix: clustered by virtual columns that depended on virtual
columns now correctly preserve these dependencies (#19262) (#19279)
f785b1abb2e is described below
commit f785b1abb2eb19f89de266bcc6b9b266f9629a96
Author: Clint Wylie <[email protected]>
AuthorDate: Fri Apr 10 13:10:15 2026 -0700
fix: clustered by virtual columns that depended on virtual columns now
correctly preserve these dependencies (#19262) (#19279)
changes:
* adds `addRequiredVirtualColumns` method to `SegmentGenerationStageSpec`
which resolves transitive virtual column dependencies for virtual columns used
by clustering, fixing a bug where these dependent virtual columns would be lost
in the shard spec and compaction state
* adds `supportsRequiredRewrite` and `rewriteRequiredColumns` to
`VirtualColumn` allowing a virtual column to rewrite its input references to
equivalent names
* adds `Expr.rewriteBindings` to rewrite identifier bindings in an `Expr`
tree
* `VirtualColumns.findEquivalent` is enhanced to transitively resolve
dependent virtual columns across naming contexts before checking equivalence,
enabling detection that e.g. `lower("v1")` ≡ `lower("v0")` when v0 and v1 are
equivalent virtual columns
* `FilterSegmentPruner` updated to use transitive equivalence when matching
shard virtual columns to query virtual columns (with Optional-based caching to
correctly handle nulls)
* `Projections.matchQueryVirtualColumn` updated similarly
* intern range shardspec dimension strings and virtual columns
---
.../embedded/compact/CompactionSupervisorTest.java | 182 +++++++++++++++++++++
.../testing/embedded/msq/MultiStageQueryTest.java | 172 +++++++++++++++++++
.../destination/SegmentGenerationStageSpec.java | 26 +++
.../main/java/org/apache/druid/math/expr/Expr.java | 21 +++
.../java/org/apache/druid/math/expr/Parser.java | 18 +-
.../druid/query/filter/FilterSegmentPruner.java | 56 ++++++-
.../org/apache/druid/segment/VirtualColumn.java | 24 ++-
.../org/apache/druid/segment/VirtualColumns.java | 43 ++++-
.../druid/segment/projections/Projections.java | 5 +-
.../segment/virtual/ExpressionVirtualColumn.java | 18 ++
.../segment/virtual/NestedFieldVirtualColumn.java | 21 +++
.../segment/virtual/NestedMergeVirtualColumn.java | 20 +++
.../segment/virtual/NestedObjectVirtualColumn.java | 24 +++
.../org/apache/druid/timeline/DataSegment.java | 5 +
.../partition/BaseDimensionRangeShardSpec.java | 14 +-
.../apache/druid/timeline/partition/ShardSpec.java | 4 +-
.../query/filter/FilterSegmentPrunerTest.java | 70 +++++++-
.../apache/druid/segment/VirtualColumnsTest.java | 43 ++++-
.../virtual/NestedMergeVirtualColumnTest.java | 1 +
19 files changed, 739 insertions(+), 28 deletions(-)
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
index 97e1bde8d19..18e1a7ccb0f 100644
---
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
@@ -67,6 +67,7 @@ import org.apache.druid.segment.metadata.IndexingStateCache;
import org.apache.druid.segment.metadata.IndexingStateFingerprintMapper;
import org.apache.druid.segment.transform.CompactionTransformSpec;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
+import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.server.compaction.CompactionCandidateSearchPolicy;
import org.apache.druid.server.compaction.InlineReindexingRuleProvider;
import org.apache.druid.server.compaction.MostFragmentedIntervalFirstPolicy;
@@ -709,6 +710,187 @@ public class CompactionSupervisorTest extends
EmbeddedClusterTestBase
);
}
+ @Test
+ public void test_compaction_cluster_by_nested_virtualcolumn()
+ {
+ // Virtual Columns on nested data is only supported with MSQ compaction
engine right now.
+ CompactionEngine compactionEngine = CompactionEngine.MSQ;
+ configureCompaction(compactionEngine, null);
+
+ String jsonDataWithNestedColumn =
+ """
+ {"timestamp": "2023-01-01T00:00:00", "str":"a", "obj":{"a":
"LL"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"", "obj":{"a":
"MM"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"null", "obj":{"a":
"NN"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"b", "obj":{"a":
"OO"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"c", "obj":{"a":
"PP"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"d", "obj":{"a":
"QQ"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":null, "obj":{"a":
"RR"}}
+ """;
+
+ final TaskBuilder.Index task = TaskBuilder
+ .ofTypeIndex()
+ .dataSource(dataSource)
+ .jsonInputFormat()
+ .inlineInputSourceWithData(jsonDataWithNestedColumn)
+ .isoTimestampColumn("timestamp")
+ .schemaDiscovery()
+ .granularitySpec("DAY", null, false);
+
+ cluster.callApi().runTask(task.withId(IdUtils.getRandomId()), overlord);
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ Assertions.assertEquals(7, getTotalRowCount());
+
+ // getClusterByVirtualColumnMappings does the order 'backwards' since it
finds the column referenced by the
+ // clustered by expression and then adds its dependencies after when
collecting virtual columns. this test will
+ // fail if that ever changes (unless we do something like make equals on
VirtualColumns not care about order)
+ VirtualColumns virtualColumns = VirtualColumns.create(
+ new ExpressionVirtualColumn("v1", "lower(\"v0\")", ColumnType.STRING,
TestExprMacroTable.INSTANCE),
+ new NestedFieldVirtualColumn("obj", "$.a", "v0", ColumnType.STRING)
+ );
+
+ InlineSchemaDataSourceCompactionConfig config =
+ InlineSchemaDataSourceCompactionConfig
+ .builder()
+ .forDataSource(dataSource)
+ .withSkipOffsetFromLatest(Period.seconds(0))
+ .withTransformSpec(
+ new CompactionTransformSpec(
+ null,
+ virtualColumns
+ )
+ )
+ .withTuningConfig(
+ UserCompactionTaskQueryTuningConfig
+ .builder()
+ .partitionsSpec(new DimensionRangePartitionsSpec(4, null,
List.of("v1"), false))
+ .build()
+ )
+ .build();
+
+ runCompactionWithSpec(config);
+ waitForAllCompactionTasksToFinish();
+
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ List<DataSegment> segments =
cluster.callApi().getVisibleUsedSegments(dataSource,
overlord).stream().toList();
+ Assertions.assertEquals(2, segments.size());
+ Assertions.assertEquals(
+ new DimensionRangeShardSpec(
+ List.of("v1"),
+ virtualColumns,
+ null,
+ StringTuple.create("oo"),
+ 0,
+ 2
+ ),
+ segments.get(0).getShardSpec()
+ );
+ Assertions.assertEquals(
+ new DimensionRangeShardSpec(
+ List.of("v1"),
+ virtualColumns,
+ StringTuple.create("oo"),
+ null,
+ 1,
+ 2
+ ),
+ segments.get(1).getShardSpec()
+ );
+ }
+
+ @Test
+ public void test_compaction_cluster_by_nested_virtualcolumn_rollup()
+ {
+ // Virtual Columns on nested data is only supported with MSQ compaction
engine right now.
+ CompactionEngine compactionEngine = CompactionEngine.MSQ;
+ configureCompaction(compactionEngine, null);
+
+ String jsonDataWithNestedColumn =
+ """
+ {"timestamp": "2023-01-01T00:00:00", "str":"a", "obj":{"a":
"LL"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"", "obj":{"a":
"MM"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"null", "obj":{"a":
"NN"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"b", "obj":{"a":
"OO"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"c", "obj":{"a":
"PP"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":"d", "obj":{"a":
"QQ"}}
+ {"timestamp": "2023-01-01T00:00:00", "str":null, "obj":{"a":
"RR"}}
+ """;
+
+ final TaskBuilder.Index task = TaskBuilder
+ .ofTypeIndex()
+ .dataSource(dataSource)
+ .jsonInputFormat()
+ .inlineInputSourceWithData(jsonDataWithNestedColumn)
+ .isoTimestampColumn("timestamp")
+ .schemaDiscovery()
+ .dataSchema(builder -> builder.withAggregators(new
CountAggregatorFactory("count")))
+ .granularitySpec("DAY", "MINUTE", true);
+
+ cluster.callApi().runTask(task.withId(IdUtils.getRandomId()), overlord);
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ Assertions.assertEquals(7, getTotalRowCount());
+
+ // getClusterByVirtualColumnMappings does the order 'backwards' since it
finds the column referenced by the
+ // clustered by expression and then adds its dependencies after when
collecting virtual columns. this test will
+ // fail if that ever changes (unless we do something like make equals on
VirtualColumns not care about order)
+ VirtualColumns virtualColumns = VirtualColumns.create(
+ new ExpressionVirtualColumn("v1", "lower(\"v0\")", ColumnType.STRING,
TestExprMacroTable.INSTANCE),
+ new NestedFieldVirtualColumn("obj", "$.a", "v0", ColumnType.STRING)
+ );
+
+ InlineSchemaDataSourceCompactionConfig config =
+ InlineSchemaDataSourceCompactionConfig
+ .builder()
+ .forDataSource(dataSource)
+ .withSkipOffsetFromLatest(Period.seconds(0))
+ .withTransformSpec(
+ new CompactionTransformSpec(
+ null,
+ virtualColumns
+ )
+ )
+ .withTuningConfig(
+ UserCompactionTaskQueryTuningConfig
+ .builder()
+ .partitionsSpec(new DimensionRangePartitionsSpec(4, null,
List.of("v1"), false))
+ .build()
+ )
+ .build();
+
+ runCompactionWithSpec(config);
+ waitForAllCompactionTasksToFinish();
+
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ List<DataSegment> segments =
cluster.callApi().getVisibleUsedSegments(dataSource,
overlord).stream().toList();
+ Assertions.assertEquals(2, segments.size());
+ Assertions.assertEquals(
+ new DimensionRangeShardSpec(
+ List.of("v1"),
+ virtualColumns,
+ null,
+ StringTuple.create("oo"),
+ 0,
+ 2
+ ),
+ segments.get(0).getShardSpec()
+ );
+ Assertions.assertEquals(
+ new DimensionRangeShardSpec(
+ List.of("v1"),
+ virtualColumns,
+ StringTuple.create("oo"),
+ null,
+ 1,
+ 2
+ ),
+ segments.get(1).getShardSpec()
+ );
+ }
+
/**
* Tests that when a compaction task filters out all rows using a transform
spec,
* tombstones are created to properly drop the old segments. This test
covers both
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/MultiStageQueryTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/MultiStageQueryTest.java
index 1d4ec613e6c..7ecf2f3efcf 100644
---
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/MultiStageQueryTest.java
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/MultiStageQueryTest.java
@@ -46,6 +46,9 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -267,6 +270,88 @@ public class MultiStageQueryTest extends
EmbeddedClusterTestBase
assertClusterByVirtualColumnQueries();
}
+ @Test
+ public void testClusterByNestedVirtualColumn() throws IOException
+ {
+ final Path tempFile = createNestedJsonDataFile();
+ final String sqlTemplate =
+ """
+ SET rowsPerSegment = 4;
+ SET groupByEnableMultiValueUnnesting = FALSE;
+ REPLACE INTO %s OVERWRITE ALL
+ WITH "ext" AS (
+ SELECT *
+ FROM TABLE(EXTERN('{"type":"local","files":["%s"]}',
'{"type":"json"}'))
+ EXTEND(
+ "timestamp" VARCHAR,
+ "str" VARCHAR,
+ "obj" TYPE('COMPLEX<json>')
+ )
+ )
+ SELECT
+ TIME_PARSE("timestamp") AS __time,
+ str,
+ obj
+ FROM "ext"
+ PARTITIONED BY DAY
+ CLUSTERED BY LOWER(JSON_VALUE(obj, '$.a' RETURNING VARCHAR))
+ """;
+ final String sql = StringUtils.format(
+ sqlTemplate,
+ dataSource,
+ tempFile.toAbsolutePath()
+ );
+
+ final SqlTaskStatus taskStatus = msqApis.submitTaskSql(sql);
+ cluster.callApi().waitForTaskToSucceed(taskStatus.getTaskId(),
overlord.latchableEmitter());
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ assertClusterByNestedVirtualColumnSegments();
+ assertClusterByNestedVirtualColumnQueries();
+ }
+
+ @Test
+ public void testClusterByNestedVirtualColumnRollup() throws IOException
+ {
+ final Path tempFile = createNestedJsonDataFile();
+ final String sqlTemplate =
+ """
+ SET rowsPerSegment = 4;
+ SET groupByEnableMultiValueUnnesting = FALSE;
+ REPLACE INTO %s OVERWRITE ALL
+ WITH "ext" AS (
+ SELECT *
+ FROM TABLE(EXTERN('{"type":"local","files":["%s"]}',
'{"type":"json"}'))
+ EXTEND(
+ "timestamp" VARCHAR,
+ "str" VARCHAR,
+ "obj" TYPE('COMPLEX<json>')
+ )
+ )
+ SELECT
+ TIME_PARSE("timestamp") AS __time,
+ str,
+ obj,
+ COUNT(*) AS cnt
+ FROM "ext"
+ GROUP BY TIME_PARSE("timestamp"), str, obj, LOWER(JSON_VALUE(obj,
'$.a' RETURNING VARCHAR))
+ PARTITIONED BY DAY
+ CLUSTERED BY LOWER(JSON_VALUE(obj, '$.a' RETURNING VARCHAR))
+ """;
+ final String sql = StringUtils.format(
+ sqlTemplate,
+ dataSource,
+ tempFile.toAbsolutePath()
+ );
+
+ final SqlTaskStatus taskStatus = msqApis.submitTaskSql(sql);
+ cluster.callApi().waitForTaskToSucceed(taskStatus.getTaskId(),
overlord.latchableEmitter());
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ assertClusterByNestedVirtualColumnSegments();
+ assertClusterByNestedVirtualColumnQueries();
+ }
+
private void assertClusterByVirtualColumnSegments()
{
List<DataSegment> segments =
cluster.callApi().getVisibleUsedSegments(dataSource,
overlord).stream().toList();
@@ -331,6 +416,93 @@ public class MultiStageQueryTest extends
EmbeddedClusterTestBase
Assertions.assertEquals(2, getSegmentsScannedForDartQuery(queryId));
}
+
+ private Path createNestedJsonDataFile() throws IOException
+ {
+ final Path tempFile = Files.createTempFile("nested-data", ".json");
+ tempFile.toFile().deleteOnExit();
+ Files.writeString(tempFile,
+ """
+ {"timestamp": "2023-01-01T00:00:00", "str":"a",
"obj":{"a": "A"}}
+ {"timestamp": "2023-01-01T00:00:01", "str":"b",
"obj":{"a": "A"}}
+ {"timestamp": "2023-01-01T00:00:02", "str":"c",
"obj":{"a": "B"}}
+ {"timestamp": "2023-01-01T00:00:03", "str":"d",
"obj":{"a": "A"}}
+ {"timestamp": "2023-01-01T00:00:04", "str":"e",
"obj":{"a": "B"}}
+ {"timestamp": "2023-01-01T00:00:05", "str":"f",
"obj":{"a": "A"}}
+ {"timestamp": "2023-01-01T00:00:06", "str":"g",
"obj":{"a": "A"}}
+ """
+ );
+ return tempFile;
+ }
+
+ private void assertClusterByNestedVirtualColumnSegments()
+ {
+ // all rows in same time chunk, max rows is 4, so we expect 2 segments
with a range split on 'a' since there are
+ // 5 rows with 'A' and 2 rows with 'B'
+ List<DataSegment> segments =
cluster.callApi().getVisibleUsedSegments(dataSource,
overlord).stream().toList();
+ Assertions.assertEquals(2, segments.size());
+
+ final DimensionRangeShardSpec shardSpec0 = (DimensionRangeShardSpec)
segments.get(0).getShardSpec();
+ Assertions.assertEquals(1, shardSpec0.getDimensions().size());
+ Assertions.assertFalse(shardSpec0.getVirtualColumns().isEmpty());
+ Assertions.assertEquals(2,
shardSpec0.getVirtualColumns().getVirtualColumns().length);
+ Assertions.assertEquals(0, shardSpec0.getPartitionNum());
+
+ Assertions.assertNull(shardSpec0.getStartTuple());
+ Assertions.assertEquals(StringTuple.create("a"), shardSpec0.getEndTuple());
+
+ final DimensionRangeShardSpec shardSpec1 = (DimensionRangeShardSpec)
segments.get(1).getShardSpec();
+ Assertions.assertEquals(shardSpec0.getDimensions(),
shardSpec1.getDimensions());
+ Assertions.assertEquals(shardSpec0.getVirtualColumns(),
shardSpec1.getVirtualColumns());
+ Assertions.assertEquals(1, shardSpec1.getPartitionNum());
+
+ Assertions.assertEquals(StringTuple.create("a"),
shardSpec1.getStartTuple());
+ Assertions.assertNull(shardSpec1.getEndTuple());
+ }
+
+ private void assertClusterByNestedVirtualColumnQueries()
+ {
+ String queryId = UUID.randomUUID().toString();
+ cluster.callApi().verifySqlQuery(
+ "SET engine = 'msq-dart'; SET sqlQueryId = '" + queryId + "'; SELECT
str FROM %s ORDER BY __time",
+ dataSource,
+ """
+ a
+ b
+ c
+ d
+ e
+ f
+ g"""
+ );
+ Assertions.assertEquals(2, getSegmentsScannedForDartQuery(queryId));
+
+ queryId = UUID.randomUUID().toString();
+ cluster.callApi().verifySqlQuery(
+ "SET engine = 'msq-dart'; SET sqlQueryId = '" + queryId + "'; SELECT
str FROM %s WHERE LOWER(JSON_VALUE(obj, '$.a' RETURNING VARCHAR)) = 'b' ORDER
BY __time",
+ dataSource,
+ """
+ c
+ e"""
+ );
+ Assertions.assertEquals(1, getSegmentsScannedForDartQuery(queryId));
+
+ queryId = UUID.randomUUID().toString();
+ cluster.callApi().verifySqlQuery(
+ "SET engine = 'msq-dart'; SET sqlQueryId = '" + queryId + "'; SELECT
str FROM %s WHERE LOWER(JSON_VALUE(obj, '$.a' RETURNING VARCHAR)) <= 'b' ORDER
BY __time",
+ dataSource,
+ """
+ a
+ b
+ c
+ d
+ e
+ f
+ g"""
+ );
+ Assertions.assertEquals(2, getSegmentsScannedForDartQuery(queryId));
+ }
+
private long getSegmentsScannedForDartQuery(String sqlQueryId)
{
ChannelCounters.Snapshot segmentChannelCounters =
getDartSegmentChannelCounters(sqlQueryId);
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/SegmentGenerationStageSpec.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/SegmentGenerationStageSpec.java
index 7898cd8ab6a..4d00d19f3fc 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/SegmentGenerationStageSpec.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/SegmentGenerationStageSpec.java
@@ -41,6 +41,7 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.segment.VirtualColumn;
+import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
@@ -148,6 +149,7 @@ public class SegmentGenerationStageSpec implements
TerminalStageSpec
final VirtualColumn vc = outputToVc.get(column.columnName());
if (vc != null) {
clusterByVirtualColumns.put(column.columnName(), vc);
+ addRequiredVirtualColumns(groupByQuery.getVirtualColumns(), vc,
clusterByVirtualColumns);
}
}
} else if (query instanceof ScanQuery scanQuery) {
@@ -155,9 +157,33 @@ public class SegmentGenerationStageSpec implements
TerminalStageSpec
final VirtualColumn vc =
scanQuery.getVirtualColumns().getVirtualColumn(column.columnName());
if (vc != null) {
clusterByVirtualColumns.put(column.columnName(), vc);
+ addRequiredVirtualColumns(scanQuery.getVirtualColumns(), vc,
clusterByVirtualColumns);
}
}
}
return clusterByVirtualColumns;
}
+
+ /**
+ * Recursively adds any {@link VirtualColumn#requiredColumns()} which are
also virtual columns. This handles cases
+ * where a cluster-by virtual column depends on other virtual columns, such
as when clustering by something like
+ * {@code LOWER(JSON_VALUE(obj, '$.path'))} which creates an
ExpressionVirtualColumn that references a
+ * NestedFieldVirtualColumn.
+ */
+ private static void addRequiredVirtualColumns(
+ VirtualColumns allVirtualColumns,
+ VirtualColumn vc,
+ Map<String, VirtualColumn> collected
+ )
+ {
+ for (String requiredColumn : vc.requiredColumns()) {
+ if (!collected.containsKey(requiredColumn)) {
+ final VirtualColumn requiredVc =
allVirtualColumns.getVirtualColumn(requiredColumn);
+ if (requiredVc != null) {
+ collected.put(requiredColumn, requiredVc);
+ addRequiredVirtualColumns(allVirtualColumns, requiredVc, collected);
+ }
+ }
+ }
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/Expr.java
b/processing/src/main/java/org/apache/druid/math/expr/Expr.java
index 53de7eabbc2..faa122d0e70 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/Expr.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/Expr.java
@@ -48,6 +48,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -131,6 +132,26 @@ public interface Expr extends Cacheable
return null;
}
+ /**
+ * Replaces {@link IdentifierExpr} whose {@link IdentifierExpr#binding} are
present as a key in the supplied map with
+ * the map value.
+ */
+ default Expr rewriteBindings(Map<String, String> rewriteMap)
+ {
+ return visit(expr -> {
+ if (expr instanceof IdentifierExpr identifier) {
+ final String replacement = rewriteMap.get(identifier.binding);
+ if (replacement != null) {
+ if (Objects.equals(identifier.identifier, identifier.binding)) {
+ return new IdentifierExpr(replacement, replacement);
+ }
+ return new IdentifierExpr(identifier.identifier, replacement);
+ }
+ }
+ return expr;
+ });
+ }
+
/**
* Evaluate the {@link Expr} with the bindings which supply {@link
IdentifierExpr} with their values, producing an
* {@link ExprEval} with the result.
diff --git a/processing/src/main/java/org/apache/druid/math/expr/Parser.java
b/processing/src/main/java/org/apache/druid/math/expr/Parser.java
index fd42daa30fd..f061dda11b5 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/Parser.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/Parser.java
@@ -166,33 +166,27 @@ public class Parser
public static Expr flatten(Expr expr)
{
return expr.visit(childExpr -> {
- if (childExpr instanceof BinaryOpExprBase) {
- BinaryOpExprBase binary = (BinaryOpExprBase) childExpr;
+ if (childExpr instanceof BinaryOpExprBase binary) {
if (Evals.isAllConstants(binary.left, binary.right)) {
return childExpr.eval(InputBindings.nilBindings()).toExpr();
}
- } else if (childExpr instanceof UnaryExpr) {
- UnaryExpr unary = (UnaryExpr) childExpr;
-
+ } else if (childExpr instanceof UnaryExpr unary) {
if (unary.expr instanceof ConstantExpr) {
return childExpr.eval(InputBindings.nilBindings()).toExpr();
}
- } else if (childExpr instanceof FunctionExpr) {
- FunctionExpr functionExpr = (FunctionExpr) childExpr;
+ } else if (childExpr instanceof FunctionExpr functionExpr) {
List<Expr> args = functionExpr.args;
if (Evals.isAllConstants(args)) {
return childExpr.eval(InputBindings.nilBindings()).toExpr();
}
- } else if (childExpr instanceof ApplyFunctionExpr) {
- ApplyFunctionExpr applyFunctionExpr = (ApplyFunctionExpr) childExpr;
+ } else if (childExpr instanceof ApplyFunctionExpr applyFunctionExpr) {
List<Expr> args = applyFunctionExpr.argsExpr;
if (Evals.isAllConstants(args)) {
- if (applyFunctionExpr.analyzeInputs().getFreeVariables().size() ==
0) {
+ if (applyFunctionExpr.analyzeInputs().getFreeVariables().isEmpty()) {
return childExpr.eval(InputBindings.nilBindings()).toExpr();
}
}
- } else if (childExpr instanceof ExprMacroTable.ExprMacroFunctionExpr) {
- ExprMacroTable.ExprMacroFunctionExpr macroFn =
(ExprMacroTable.ExprMacroFunctionExpr) childExpr;
+ } else if (childExpr instanceof ExprMacroTable.ExprMacroFunctionExpr
macroFn) {
if (Evals.isAllConstants(macroFn.getArgs())) {
return childExpr.eval(InputBindings.nilBindings()).toExpr();
}
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/FilterSegmentPruner.java
b/processing/src/main/java/org/apache/druid/query/filter/FilterSegmentPruner.java
index 0df393661cb..32854ac0d32 100644
---
a/processing/src/main/java/org/apache/druid/query/filter/FilterSegmentPruner.java
+++
b/processing/src/main/java/org/apache/druid/query/filter/FilterSegmentPruner.java
@@ -47,6 +47,7 @@ public class FilterSegmentPruner implements SegmentPruner
private final Set<String> filterFields;
private final VirtualColumns virtualColumns;
private final Map<String, Optional<RangeSet<String>>> rangeCache;
+ private final Map<ShardVirtualColumnCacheEntry, Optional<VirtualColumn>>
shardEquivalenceCache;
public FilterSegmentPruner(
DimFilter filter,
@@ -58,6 +59,7 @@ public class FilterSegmentPruner implements SegmentPruner
this.filterFields = filterFields == null ? filter.getRequiredColumns() :
filterFields;
this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY :
virtualColumns;
this.rangeCache = new HashMap<>();
+ this.shardEquivalenceCache = new HashMap<>();
}
@@ -79,7 +81,10 @@ public class FilterSegmentPruner implements SegmentPruner
for (String dimension : dimensions) {
final VirtualColumn shardVirtualColumn =
shard.getDomainVirtualColumns().getVirtualColumn(dimension);
if (shardVirtualColumn != null) {
- final VirtualColumn queryEquivalent =
virtualColumns.findEquivalent(shardVirtualColumn);
+ final VirtualColumn queryEquivalent = getQueryEquivalent(
+ shard.getDomainVirtualColumns(),
+ shardVirtualColumn
+ );
if (queryEquivalent != null) {
if (filterFields == null ||
filterFields.contains(queryEquivalent.getOutputName())) {
final Optional<RangeSet<String>> optFilterRangeSet = rangeCache
@@ -161,4 +166,53 @@ public class FilterSegmentPruner implements SegmentPruner
", virtualColumns=" + virtualColumns +
'}';
}
+
+ @Nullable
+ private VirtualColumn getQueryEquivalent(VirtualColumns shardVirtualColumns,
VirtualColumn shardVirtualColumn)
+ {
+ final Optional<VirtualColumn> cached =
shardEquivalenceCache.computeIfAbsent(
+ new ShardVirtualColumnCacheEntry(shardVirtualColumn,
shardVirtualColumns),
+ virtualColumn ->
Optional.ofNullable(virtualColumns.findEquivalent(shardVirtualColumns,
virtualColumn.shardVirtualColumn))
+ );
+ return cached.orElse(null);
+ }
+
+ /**
+ * Structure to preserve the VirtualColumn 'tree' to use as a cache key so
that we can distinguish otherwise
+ * identical {@link VirtualColumn} that depend on other virtual columns that
have the same name but are different
+ */
+ private static final class ShardVirtualColumnCacheEntry
+ {
+ private final VirtualColumn shardVirtualColumn;
+ private final List<ShardVirtualColumnCacheEntry> dependents;
+
+ public ShardVirtualColumnCacheEntry(VirtualColumn shardVirtualColumn,
VirtualColumns shardVirtualColumns)
+ {
+ this.shardVirtualColumn = shardVirtualColumn;
+ this.dependents = new ArrayList<>();
+ for (String required : shardVirtualColumn.requiredColumns()) {
+ final VirtualColumn dependent =
shardVirtualColumns.getVirtualColumn(required);
+ if (dependent != null) {
+ dependents.add(new ShardVirtualColumnCacheEntry(dependent,
shardVirtualColumns));
+ }
+ }
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ShardVirtualColumnCacheEntry that = (ShardVirtualColumnCacheEntry) o;
+ return Objects.equals(shardVirtualColumn, that.shardVirtualColumn) &&
+ Objects.equals(dependents, that.dependents);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(shardVirtualColumn, dependents);
+ }
+ }
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
index 389d74eedaa..5768ce4c04b 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
@@ -50,6 +50,7 @@ import
org.apache.druid.segment.virtual.RegexFilteredVirtualColumn;
import javax.annotation.Nullable;
import java.util.List;
+import java.util.Map;
/**
* Virtual columns are "views" created over a {@link ColumnSelectorFactory} or
{@link ColumnSelector}. They can
@@ -472,6 +473,27 @@ public interface VirtualColumn extends Cacheable
*/
List<String> requiredColumns();
+ /**
+ * Returns true if the virtual column supports {@link
#rewriteRequiredColumns(Map)}
+ */
+ default boolean supportsRequiredRewrite()
+ {
+ return false;
+ }
+
+ /**
+ * Return a copy of this virtual column that is identical to this virtual
column except that it operates on different
+ * columns, based on a renaming map where the key is the column to be
renamed and the value is the new column. Callers
+ * should check {@link #supportsRequiredRewrite()} first to ensure that this
method is supported.
+ *
+ * @param columnRewrites Column rewrite map
+ * @return Copy of this virtual column that operates on new columns based on
the rewrite map
+ */
+ default VirtualColumn rewriteRequiredColumns(Map<String, String>
columnRewrites)
+ {
+ throw DruidException.defensive("Required column rewrite is not supported
by virtual column[%s].", getOutputName());
+ }
+
/**
* Indicates that this virtual column can be referenced with dot notation.
For example,
* a virtual column named "foo" could be referred to as "foo.bar" with the
Cursor it is
@@ -518,7 +540,7 @@ public interface VirtualColumn extends Cacheable
* virtual column, regardless of the output name. If this method returns
null, it does not participate in equivalence
* comparisons.
*
- * @see VirtualColumns#findEquivalent(VirtualColumn)
+ * @see VirtualColumns#findEquivalent(VirtualColumns, VirtualColumn)
*/
@Nullable
default EquivalenceKey getEquivalanceKey()
diff --git
a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
index c7d5cc54cb4..089931ac792 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
@@ -199,13 +199,48 @@ public class VirtualColumns implements Cacheable
}
/**
- * Check if a virtual column is already defined which is the same as some
other virtual column, ignoring output name,
- * returning that virtual column if it exists, or null if there is no
equivalent virtual column.
+ * Check if {@link #virtualColumns} contains a virtual column which is
equivalent to some other virtual column,
+ * ignoring output name, returning it if it exists or null if there is no
equivalent virtual column.
+ * <p>
+ * If the other virtual column depends on other virtual columns (from the
supplied {@link VirtualColumns}), this
+ * method will attempt to locate equivalent entries in {@link
#virtualColumns} to build a map of equivalent output
+ * names. Then, we rewrite the inputs of the other virtual column using
+ * {@link VirtualColumn#rewriteRequiredColumns(Map)} so that differently
named inputs are normalized prior to testing
+ * for equivalence.
*/
@Nullable
- public VirtualColumn findEquivalent(VirtualColumn virtualColumn)
+ public VirtualColumn findEquivalent(VirtualColumns otherVirtualColumns,
VirtualColumn otherVirtualColumn)
{
- return equivalence.get().get(virtualColumn.getEquivalanceKey());
+ // check to see if the virtual column refers to other virtual columns to
see if we need to normalize it
+ // by rewriting its inputs first to refer to the equivalent virtual columns
+ final Map<String, String> equivalenceRewriteMap = new HashMap<>();
+ for (String column : otherVirtualColumn.requiredColumns()) {
+ final VirtualColumn dependent =
otherVirtualColumns.getVirtualColumn(column);
+ if (dependent != null) {
+ final VirtualColumn equivalentDependent =
findEquivalent(otherVirtualColumns, dependent);
+ if (equivalentDependent != null) {
+ equivalenceRewriteMap.put(dependent.getOutputName(),
equivalentDependent.getOutputName());
+ } else {
+ // missing an equivalent dependent, that means we cannot be
equivalent so just bail early
+ return null;
+ }
+ }
+ }
+
+ if (!equivalenceRewriteMap.isEmpty() &&
!otherVirtualColumn.supportsRequiredRewrite()) {
+ // cannot safely check for equivalence if the rewrite map is not empty
and rewrites are not supported
+ return null;
+ }
+
+ final VirtualColumn toCheckForEquivalence;
+ // rewrite if needed
+ if (equivalenceRewriteMap.isEmpty()) {
+ toCheckForEquivalence = otherVirtualColumn;
+ } else {
+ toCheckForEquivalence =
otherVirtualColumn.rewriteRequiredColumns(equivalenceRewriteMap);
+ }
+
+ return equivalence.get().get(toCheckForEquivalence.getEquivalanceKey());
}
/**
diff --git
a/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
b/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
index 23321ba0803..0609ad926db 100644
---
a/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
+++
b/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
@@ -405,7 +405,10 @@ public class Projections
)
{
// check to see if we have an equivalent virtual column defined in the
projection, if so we can
- final VirtualColumn projectionEquivalent =
projection.getVirtualColumns().findEquivalent(queryVirtualColumn);
+ final VirtualColumn projectionEquivalent =
projection.getVirtualColumns().findEquivalent(
+ queryCursorBuildSpec.getVirtualColumns(),
+ queryVirtualColumn
+ );
if (projectionEquivalent != null) {
final String remapColumnName;
if (Objects.equals(projectionEquivalent.getOutputName(),
projection.getTimeColumnName())) {
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index ba0ba9a0e37..1ea737b9d35 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -54,6 +54,7 @@ import org.apache.druid.segment.vector.VectorValueSelector;
import javax.annotation.Nullable;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
public class ExpressionVirtualColumn implements VirtualColumn
@@ -336,6 +337,23 @@ public class ExpressionVirtualColumn implements
VirtualColumn
return expressionAnalysis.get().getRequiredBindingsList();
}
+ @Override
+ public boolean supportsRequiredRewrite()
+ {
+ return true;
+ }
+
+ @Override
+ public VirtualColumn rewriteRequiredColumns(Map<String, String>
columnRewrites)
+ {
+ final Expr rewritten =
expression.parsed.get().rewriteBindings(columnRewrites);
+ return new ExpressionVirtualColumn(
+ name,
+ rewritten,
+ expression.outputType
+ );
+ }
+
@Override
public boolean usesDotNotation()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index 8ffde9a4f66..1cbcf36871f 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -91,6 +91,7 @@ import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.Set;
@@ -796,6 +797,26 @@ public class NestedFieldVirtualColumn implements
VirtualColumn
return Collections.singletonList(fieldSpec.columnName);
}
+ @Override
+ public boolean supportsRequiredRewrite()
+ {
+ return true;
+ }
+
+ @Override
+ public VirtualColumn rewriteRequiredColumns(Map<String, String>
columnRewrites)
+ {
+ return new NestedFieldVirtualColumn(
+ columnRewrites.getOrDefault(fieldSpec.columnName,
fieldSpec.columnName),
+ outputName,
+ fieldSpec.expectedType,
+ fieldSpec.parts,
+ fieldSpec.processFromRaw,
+ null,
+ null
+ );
+ }
+
@Override
public boolean usesDotNotation()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumn.java
index 8b546e56e49..7c46a0bd2ef 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumn.java
@@ -36,6 +36,7 @@ import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
@@ -71,6 +72,7 @@ import java.util.stream.Collectors;
public class NestedMergeVirtualColumn extends
SpecializedExpressionVirtualColumn
{
private final List<String> columns;
+ private final ExprMacroTable macroTable;
@JsonCreator
public NestedMergeVirtualColumn(
@@ -93,6 +95,7 @@ public class NestedMergeVirtualColumn extends
SpecializedExpressionVirtualColumn
);
this.columns = columns;
+ this.macroTable = macroTable;
}
@JsonProperty
@@ -121,6 +124,23 @@ public class NestedMergeVirtualColumn extends
SpecializedExpressionVirtualColumn
};
}
+ @Override
+ public boolean supportsRequiredRewrite()
+ {
+ return true;
+ }
+
+ @Override
+ public VirtualColumn rewriteRequiredColumns(Map<String, String>
columnRewrites)
+ {
+ List<String> rewritten = columns.stream().map(x ->
columnRewrites.getOrDefault(x, x)).toList();
+ return new NestedMergeVirtualColumn(
+ delegate.getOutputName(),
+ rewritten,
+ macroTable
+ );
+ }
+
@Override
public String toString()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
index ee473391226..8096b26490f 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java
@@ -134,6 +134,30 @@ public class NestedObjectVirtualColumn extends
SpecializedExpressionVirtualColum
}
}
+ @Override
+ public boolean supportsRequiredRewrite()
+ {
+ return true;
+ }
+
+ @Override
+ public VirtualColumn rewriteRequiredColumns(Map<String, String>
columnRewrites)
+ {
+ final Map<String, TypedExpression> rewrittenKeyExprMap = new HashMap<>();
+ for (Map.Entry<String, TypedExpression> entry : keyExprMap.entrySet()) {
+ final TypedExpression rewrittenExpr = new TypedExpression(
+ Parser.parse(entry.getValue().expression,
macroTable).rewriteBindings(columnRewrites).stringify(),
+ entry.getValue().getType()
+ );
+ rewrittenKeyExprMap.put(entry.getKey(), rewrittenExpr);
+ }
+ return new NestedObjectVirtualColumn(
+ delegate.getOutputName(),
+ rewrittenKeyExprMap,
+ macroTable
+ );
+ }
+
@Override
public String toString()
{
diff --git
a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
index ad6406a1340..601ea7613c5 100644
--- a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
+++ b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
@@ -60,6 +60,11 @@ public class DataSegment implements Comparable<DataSegment>,
Overshadowable<Data
public static final String TOMBSTONE_LOADSPEC_TYPE = "tombstone";
+ public static Interner<String> stringInterner()
+ {
+ return STRING_INTERNER;
+ }
+
/*
* The difference between this class and org.apache.druid.segment.Segment is
that this class contains the segment
* metadata only, while org.apache.druid.segment.Segment represents the
actual body of segment data, queryable.
diff --git
a/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java
b/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java
index b1e0d01c747..d7a9d2018b0 100644
---
a/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java
+++
b/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java
@@ -19,12 +19,16 @@
package org.apache.druid.timeline.partition;
+import com.google.common.collect.Interner;
+import com.google.common.collect.Interners;
import com.google.common.collect.Ordering;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.StringTuple;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.guava.Comparators;
+import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.timeline.DataSegment;
import javax.annotation.Nullable;
import java.util.Arrays;
@@ -33,6 +37,8 @@ import java.util.List;
public abstract class BaseDimensionRangeShardSpec implements ShardSpec
{
+ private static final Interner<VirtualColumn> VIRTUAL_COLUMN_INTERNER =
Interners.newWeakInterner();
+
protected final List<String> dimensions;
protected final VirtualColumns virtualColumns;
@Nullable
@@ -47,8 +53,12 @@ public abstract class BaseDimensionRangeShardSpec implements
ShardSpec
@Nullable StringTuple end
)
{
- this.dimensions = dimensions;
- this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY :
virtualColumns;
+ this.dimensions =
dimensions.stream().map(DataSegment.stringInterner()::intern).toList();
+ this.virtualColumns = virtualColumns == null
+ ? VirtualColumns.EMPTY
+ :
VirtualColumns.create(Arrays.stream(virtualColumns.getVirtualColumns())
+
.map(VIRTUAL_COLUMN_INTERNER::intern)
+ .toList());
this.start = start;
this.end = end;
}
diff --git
a/processing/src/main/java/org/apache/druid/timeline/partition/ShardSpec.java
b/processing/src/main/java/org/apache/druid/timeline/partition/ShardSpec.java
index da1c046465b..e881b32484f 100644
---
a/processing/src/main/java/org/apache/druid/timeline/partition/ShardSpec.java
+++
b/processing/src/main/java/org/apache/druid/timeline/partition/ShardSpec.java
@@ -149,8 +149,8 @@ public interface ShardSpec
/**
* If any of the columns in {@link #getDomainDimensions()} was computed with
an expression and was not stored, the
- * {@link org.apache.druid.segment.VirtualColumn} which computes it is
stored here. This allows matching ranges even
- * when the value is not stored in the shard so long as {@link
VirtualColumns#findEquivalent(VirtualColumn)} exists.
+ * {@link VirtualColumn} which computes it is stored here. This allows
matching ranges even when the value is not
+ * stored in the shard so long as {@link
VirtualColumns#findEquivalent(VirtualColumns, VirtualColumn)} exists.
*
* @return {@link VirtualColumns} associated with columns listed in {@link
#getDomainDimensions()}.
*/
diff --git
a/processing/src/test/java/org/apache/druid/query/filter/FilterSegmentPrunerTest.java
b/processing/src/test/java/org/apache/druid/query/filter/FilterSegmentPrunerTest.java
index 8d7628ad7e6..63da8751544 100644
---
a/processing/src/test/java/org/apache/druid/query/filter/FilterSegmentPrunerTest.java
+++
b/processing/src/test/java/org/apache/druid/query/filter/FilterSegmentPrunerTest.java
@@ -27,6 +27,7 @@ import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
+import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
@@ -212,10 +213,77 @@ class FilterSegmentPrunerTest
Assertions.assertInstanceOf(CompositeSegmentPruner.class, combined);
}
+ @Test
+ void testPruneVirtualColumnWithDependentVirtualColumn()
+ {
+ VirtualColumns shardVirtualColumns = VirtualColumns.create(
+ new NestedFieldVirtualColumn("obj", "$.a", "n0", ColumnType.STRING),
+ new ExpressionVirtualColumn("e0", "lower(\"n0\")", ColumnType.STRING,
TestExprMacroTable.INSTANCE)
+ );
+ // same expressions, different names
+ VirtualColumns shardVirtualColumnsDifferentNames = VirtualColumns.create(
+ new NestedFieldVirtualColumn("obj", "$.a", "n1", ColumnType.STRING),
+ new ExpressionVirtualColumn("e1", "lower(\"n1\")", ColumnType.STRING,
TestExprMacroTable.INSTANCE)
+ );
+
+ VirtualColumns shardVirtualColumnsDifferent = VirtualColumns.create(
+ new NestedFieldVirtualColumn("obj", "$.b", "n0", ColumnType.STRING),
+ new ExpressionVirtualColumn("e0", "lower(\"n0\")", ColumnType.STRING,
TestExprMacroTable.INSTANCE)
+ );
+
+ String interval1 = "2026-02-18T00:00:00Z/2026-02-19T00:00:00Z";
+ String interval2 = "2026-02-19T00:00:00Z/2026-02-20T00:00:00Z";
+ String interval3 = "2026-02-20T00:00:00Z/2026-02-21T00:00:00Z";
+ DataSegment seg1 = makeDataSegment(
+ interval1,
+ makeRange(List.of("e0"), shardVirtualColumns, 0, null,
StringTuple.create("f"))
+ );
+ DataSegment seg2 = makeDataSegment(
+ interval1,
+ makeRange(List.of("e0"), shardVirtualColumns, 1,
StringTuple.create("f"), null)
+ );
+ // same partitioning but different names in these segments
+ DataSegment seg3 = makeDataSegment(
+ interval2,
+ makeRange(List.of("e1"), shardVirtualColumnsDifferentNames, 0, null,
StringTuple.create("f"))
+ );
+ DataSegment seg4 = makeDataSegment(
+ interval2,
+ makeRange(List.of("e1"), shardVirtualColumnsDifferentNames, 1,
StringTuple.create("f"), null)
+ );
+ DataSegment seg5 = makeDataSegment(
+ interval3,
+ makeRange(List.of("e0"), shardVirtualColumnsDifferent, 0, null,
StringTuple.create("f"))
+ );
+ DataSegment seg6 = makeDataSegment(
+ interval3,
+ makeRange(List.of("e0"), shardVirtualColumnsDifferent, 1,
StringTuple.create("f"), null)
+ );
+
+ List<DataSegment> segs = List.of(seg1, seg2, seg3, seg4, seg5, seg6);
+
+ // query uses its own names
+ VirtualColumns queryVirtualColumns = VirtualColumns.create(
+ new NestedFieldVirtualColumn("obj", "$.a", "v0", ColumnType.STRING),
+ new ExpressionVirtualColumn("v1", "lower(\"v0\")", ColumnType.STRING,
TestExprMacroTable.INSTANCE)
+ );
+ // lower($.a) < "f" should prune the second half of the range (seg2 and
seg4)
+ DimFilter filter = new RangeFilter("v1", ColumnType.STRING, null, "f",
false, true, null);
+ FilterSegmentPruner pruner = new FilterSegmentPruner(filter, null,
queryVirtualColumns);
+
+ // prune twice to exercise cache
+ Assertions.assertEquals(Set.of(seg1, seg3, seg5, seg6), pruner.prune(segs,
Function.identity()));
+ Assertions.assertEquals(Set.of(seg1, seg3, seg5, seg6), pruner.prune(segs,
Function.identity()));
+
+ }
+
@Test
void testEqualsAndHashcode()
{
-
EqualsVerifier.forClass(FilterSegmentPruner.class).usingGetClass().withIgnoredFields("rangeCache").verify();
+ EqualsVerifier.forClass(FilterSegmentPruner.class)
+ .usingGetClass()
+ .withIgnoredFields("rangeCache", "shardEquivalenceCache")
+ .verify();
}
private ShardSpec makeRange(
diff --git
a/processing/src/test/java/org/apache/druid/segment/VirtualColumnsTest.java
b/processing/src/test/java/org/apache/druid/segment/VirtualColumnsTest.java
index c3ffe267638..b6323f7de2f 100644
--- a/processing/src/test/java/org/apache/druid/segment/VirtualColumnsTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/VirtualColumnsTest.java
@@ -494,11 +494,46 @@ public class VirtualColumnsTest extends
InitializedNullHandlingTest
ColumnType.DOUBLE,
TestExprMacroTable.INSTANCE
);
+ VirtualColumns otherVirtualColumns = VirtualColumns.create(v1, v2, v3);
- Assert.assertEquals(v0, virtualColumns.findEquivalent(v0));
- Assert.assertEquals(v0, virtualColumns.findEquivalent(v1));
- Assert.assertNull(virtualColumns.findEquivalent(v2));
- Assert.assertNull(virtualColumns.findEquivalent(v3));
+ Assert.assertEquals(v0,
virtualColumns.findEquivalent(VirtualColumns.EMPTY, v0));
+ Assert.assertEquals(v0, virtualColumns.findEquivalent(otherVirtualColumns,
v1));
+ Assert.assertNull(virtualColumns.findEquivalent(otherVirtualColumns, v2));
+ Assert.assertNull(virtualColumns.findEquivalent(otherVirtualColumns, v3));
+ }
+
+ @Test
+ public void testFindEquivalentWithDependentVirtualColumn()
+ {
+ final NestedFieldVirtualColumn n0 = new NestedFieldVirtualColumn("obj",
"$.a", "n0", ColumnType.STRING);
+ final ExpressionVirtualColumn e0 = new ExpressionVirtualColumn(
+ "e0", "lower(\"n0\")", ColumnType.STRING, TestExprMacroTable.INSTANCE
+ );
+ final VirtualColumns virtualColumns = VirtualColumns.create(n0, e0);
+
+ final NestedFieldVirtualColumn n1 = new NestedFieldVirtualColumn("obj",
"$.a", "n1", ColumnType.STRING);
+ final ExpressionVirtualColumn e1 = new ExpressionVirtualColumn(
+ "e1",
+ "lower(\"n1\")",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ );
+ final VirtualColumns otherVirtualColumns = VirtualColumns.create(n1, e1);
+
+ Assert.assertEquals(n0, virtualColumns.findEquivalent(otherVirtualColumns,
n1));
+
+ Assert.assertEquals(e0, virtualColumns.findEquivalent(otherVirtualColumns,
e1));
+
+ // a different nested field path produces no equivalence, even if it has
the same name
+ final NestedFieldVirtualColumn n0different = new
NestedFieldVirtualColumn("obj", "$.b", "n0", ColumnType.STRING);
+ final ExpressionVirtualColumn e0different = new ExpressionVirtualColumn(
+ "e0", "lower(\"n0\")",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ );
+ final VirtualColumns notEquivalent = VirtualColumns.create(n0different,
e0different);
+ Assert.assertNull(virtualColumns.findEquivalent(notEquivalent,
n0different));
+ Assert.assertNull(virtualColumns.findEquivalent(notEquivalent,
e0different));
}
@Test
diff --git
a/processing/src/test/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumnTest.java
b/processing/src/test/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumnTest.java
index 0c0bb8c2275..89e1ce1b520 100644
---
a/processing/src/test/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumnTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/virtual/NestedMergeVirtualColumnTest.java
@@ -113,6 +113,7 @@ public class NestedMergeVirtualColumnTest
{
EqualsVerifier.forClass(NestedMergeVirtualColumn.class)
.withNonnullFields("columns")
+ .withIgnoredFields("macroTable")
.usingGetClass()
.verify();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]