This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new eb44f1e6056 fix ExpressionVirtualColumn equivalence key to use
Expr.stringify instead of raw input string to stabilize comparison (#18334)
eb44f1e6056 is described below
commit eb44f1e605629450450d0c89d0bbf5a5ffe7c218
Author: Clint Wylie <[email protected]>
AuthorDate: Fri Aug 1 20:50:20 2025 -0700
fix ExpressionVirtualColumn equivalence key to use Expr.stringify instead
of raw input string to stabilize comparison (#18334)
---
.../segment/virtual/ExpressionVirtualColumn.java | 70 ++++++++++++----------
.../druid/segment/CursorFactoryProjectionTest.java | 2 +-
.../druid/segment/join/JoinFilterAnalyzerTest.java | 4 +-
3 files changed, 42 insertions(+), 34 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index 91c9c9057f4..fc2e7ddcc45 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -62,7 +62,6 @@ public class ExpressionVirtualColumn implements VirtualColumn
private final String name;
private final Expression expression;
- private final Supplier<Expr> parsedExpression;
private final Supplier<Expr.BindingAnalysis> expressionAnalysis;
private final Supplier<byte[]> cacheKey;
@@ -111,7 +110,7 @@ public class ExpressionVirtualColumn implements
VirtualColumn
@Nullable final ColumnType outputType
)
{
- this(name, parsedExpression.toString(), outputType, () ->
parsedExpression);
+ this(name, parsedExpression.stringify(), outputType, () ->
parsedExpression);
}
/**
@@ -125,9 +124,12 @@ public class ExpressionVirtualColumn implements
VirtualColumn
)
{
this.name = Preconditions.checkNotNull(name, "name");
- this.expression = new Expression(Preconditions.checkNotNull(expression,
"expression"), outputType);
- this.parsedExpression = parsedExpression;
- this.expressionAnalysis =
Suppliers.memoize(parsedExpression.get()::analyzeInputs);
+ this.expression = new Expression(
+ Preconditions.checkNotNull(expression, "expression"),
+ parsedExpression,
+ outputType
+ );
+ this.expressionAnalysis = Suppliers.memoize(() ->
parsedExpression.get().analyzeInputs());
this.cacheKey = makeCacheKeySupplier();
}
@@ -155,7 +157,7 @@ public class ExpressionVirtualColumn implements
VirtualColumn
@VisibleForTesting
public Supplier<Expr> getParsedExpression()
{
- return parsedExpression;
+ return expression.parsed;
}
@Override
@@ -166,14 +168,14 @@ public class ExpressionVirtualColumn implements
VirtualColumn
{
if (isDirectAccess(columnSelectorFactory)) {
return columnSelectorFactory.makeDimensionSelector(
-
dimensionSpec.withDimension(parsedExpression.get().getBindingIfIdentifier())
+
dimensionSpec.withDimension(expression.parsed.get().getBindingIfIdentifier())
);
}
return dimensionSpec.decorate(
ExpressionSelectors.makeDimensionSelector(
columnSelectorFactory,
- parsedExpression.get(),
+ expression.parsed.get(),
dimensionSpec.getExtractionFn()
)
);
@@ -183,7 +185,7 @@ public class ExpressionVirtualColumn implements
VirtualColumn
public ColumnValueSelector<?> makeColumnValueSelector(String columnName,
ColumnSelectorFactory factory)
{
if (isDirectAccess(factory)) {
- return
factory.makeColumnValueSelector(parsedExpression.get().getBindingIfIdentifier());
+ return
factory.makeColumnValueSelector(expression.parsed.get().getBindingIfIdentifier());
}
final ColumnCapabilities capabilities = capabilities(factory, name);
@@ -191,9 +193,9 @@ public class ExpressionVirtualColumn implements
VirtualColumn
// other single and multi-value STRING selectors, whose getObject is
expected to produce a single STRING value
// or List of STRING values.
if (capabilities.is(ValueType.STRING)) {
- return ExpressionSelectors.makeStringColumnValueSelector(factory,
parsedExpression.get());
+ return ExpressionSelectors.makeStringColumnValueSelector(factory,
expression.parsed.get());
}
- return ExpressionSelectors.makeColumnValueSelector(factory,
parsedExpression.get());
+ return ExpressionSelectors.makeColumnValueSelector(factory,
expression.parsed.get());
}
@Override
@@ -204,7 +206,7 @@ public class ExpressionVirtualColumn implements
VirtualColumn
return true;
}
- final ExpressionPlan plan = ExpressionPlanner.plan(inspector,
parsedExpression.get());
+ final ExpressionPlan plan = ExpressionPlanner.plan(inspector,
expression.parsed.get());
return plan.is(ExpressionPlan.Trait.VECTORIZABLE);
}
@@ -216,31 +218,31 @@ public class ExpressionVirtualColumn implements
VirtualColumn
{
if (isDirectAccess(factory)) {
return factory.makeSingleValueDimensionSelector(
-
dimensionSpec.withDimension(parsedExpression.get().getBindingIfIdentifier())
+
dimensionSpec.withDimension(expression.parsed.get().getBindingIfIdentifier())
);
}
- return
ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory,
parsedExpression.get());
+ return
ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory,
expression.parsed.get());
}
@Override
public VectorValueSelector makeVectorValueSelector(String columnName,
VectorColumnSelectorFactory factory)
{
if (isDirectAccess(factory)) {
- return
factory.makeValueSelector(parsedExpression.get().getBindingIfIdentifier());
+ return
factory.makeValueSelector(expression.parsed.get().getBindingIfIdentifier());
}
- return ExpressionVectorSelectors.makeVectorValueSelector(factory,
parsedExpression.get());
+ return ExpressionVectorSelectors.makeVectorValueSelector(factory,
expression.parsed.get());
}
@Override
public VectorObjectSelector makeVectorObjectSelector(String columnName,
VectorColumnSelectorFactory factory)
{
if (isDirectAccess(factory)) {
- return
factory.makeObjectSelector(parsedExpression.get().getBindingIfIdentifier());
+ return
factory.makeObjectSelector(expression.parsed.get().getBindingIfIdentifier());
}
- return ExpressionVectorSelectors.makeVectorObjectSelector(factory,
parsedExpression.get(), expression.outputType);
+ return ExpressionVectorSelectors.makeVectorObjectSelector(factory,
expression.parsed.get(), expression.outputType);
}
@Nullable
@@ -253,14 +255,14 @@ public class ExpressionVirtualColumn implements
VirtualColumn
{
if (isDirectAccess(factory)) {
return factory.makeGroupByVectorColumnSelector(
- parsedExpression.get().getBindingIfIdentifier(),
+ expression.parsed.get().getBindingIfIdentifier(),
deferExpressionDimensions
);
}
return ExpressionVectorSelectors.makeGroupByVectorColumnSelector(
factory,
- parsedExpression.get(),
+ expression.parsed.get(),
deferExpressionDimensions
);
}
@@ -295,12 +297,12 @@ public class ExpressionVirtualColumn implements
VirtualColumn
public ColumnCapabilities capabilities(ColumnInspector inspector, String
columnName)
{
if (isDirectAccess(inspector)) {
- return
inspector.getColumnCapabilities(parsedExpression.get().getBindingIfIdentifier());
+ return
inspector.getColumnCapabilities(expression.parsed.get().getBindingIfIdentifier());
}
final ColumnType outputType = expression.outputType;
- final ExpressionPlan plan = ExpressionPlanner.plan(inspector,
parsedExpression.get());
+ final ExpressionPlan plan = ExpressionPlanner.plan(inspector,
expression.parsed.get());
final ColumnCapabilities inferred =
plan.inferColumnCapabilities(outputType);
// if we can infer the column capabilities from the expression plan, then
use that
if (inferred != null) {
@@ -367,13 +369,14 @@ public class ExpressionVirtualColumn implements
VirtualColumn
}
final ExpressionVirtualColumn that = (ExpressionVirtualColumn) o;
return Objects.equals(name, that.name) &&
- Objects.equals(expression, that.expression);
+ Objects.equals(expression.expressionString,
that.expression.expressionString) &&
+ Objects.equals(expression.outputType, that.expression.outputType);
}
@Override
public int hashCode()
{
- return Objects.hash(name, expression);
+ return Objects.hash(name, expression.expressionString,
expression.outputType);
}
@Override
@@ -391,9 +394,9 @@ public class ExpressionVirtualColumn implements
VirtualColumn
*/
private boolean isDirectAccess(final ColumnInspector inspector)
{
- if (parsedExpression.get().isIdentifier()) {
+ if (expression.parsed.get().isIdentifier()) {
final ColumnCapabilities baseCapabilities =
-
inspector.getColumnCapabilities(parsedExpression.get().getBindingIfIdentifier());
+
inspector.getColumnCapabilities(expression.parsed.get().getBindingIfIdentifier());
if (expression.outputType == null) {
// No desired output type. Anything from the source is fine.
@@ -412,7 +415,7 @@ public class ExpressionVirtualColumn implements
VirtualColumn
return Suppliers.memoize(() -> {
CacheKeyBuilder builder = new
CacheKeyBuilder(VirtualColumnCacheHelper.CACHE_TYPE_ID_EXPRESSION)
.appendString(name)
- .appendCacheable(parsedExpression.get());
+ .appendCacheable(expression.parsed.get());
if (expression.outputType != null) {
builder.appendString(expression.outputType.toString());
@@ -426,16 +429,20 @@ public class ExpressionVirtualColumn implements
VirtualColumn
* expressions, for example it will not currently consider something like 'a
+ b' equivalent to 'b + a'. This is ok
* for current uses of this functionality, but in theory we could push down
equivalence to the parsed expression
* instead of checking for an identical string expression, it would just be
a lot more expensive.
+ *
+ * Equivalence is done using {@link Expr#stringify()} to stabilize the
comparisons
*/
private static final class Expression implements EquivalenceKey
{
private final String expressionString;
+ private final Supplier<Expr> parsed;
@Nullable
private final ColumnType outputType;
- private Expression(String expression, @Nullable ColumnType outputType)
+ private Expression(String expression, Supplier<Expr> parsedExpression,
@Nullable ColumnType outputType)
{
this.expressionString = expression;
+ this.parsed = parsedExpression;
this.outputType = outputType;
}
@@ -449,20 +456,21 @@ public class ExpressionVirtualColumn implements
VirtualColumn
return false;
}
Expression that = (Expression) o;
- return Objects.equals(expressionString, that.expressionString) &&
Objects.equals(outputType, that.outputType);
+ return Objects.equals(parsed.get().stringify(),
that.parsed.get().stringify())
+ && Objects.equals(outputType, that.outputType);
}
@Override
public int hashCode()
{
- return Objects.hash(expressionString, outputType);
+ return Objects.hash(parsed.get().stringify(), outputType);
}
@Override
public String toString()
{
return "Expression{" +
- "expression='" + expressionString + '\'' +
+ "expression='" + parsed.get().stringify() + '\'' +
", outputType=" + outputType +
'}';
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
b/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
index 44ae7b10a7d..aaef9cc8930 100644
---
a/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
@@ -605,7 +605,7 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
.setVirtualColumns(
new ExpressionVirtualColumn(
"v0",
- "concat(b, 'foo')",
+ "concat(\"b\", 'foo')",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
)
diff --git
a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
index 153fa071cb4..29830bb86f7 100644
---
a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java
@@ -2246,13 +2246,13 @@ public class JoinFilterAnalyzerTest extends
BaseHashJoinSegmentCursorFactoryTest
expectedVirtualColumns = ImmutableSet.of(
new ExpressionVirtualColumn(
rewrittenRegionIsoCodeColumnName,
- "(upper [(lower [regionIsoCode])])",
+ "upper(lower(\"regionIsoCode\"))",
ColumnType.STRING,
ExprMacroTable.nil()
),
new ExpressionVirtualColumn(
rewrittenCountryIsoCodeColumnName,
- "(upper [(lower [countryIsoCode])])",
+ "upper(lower(\"countryIsoCode\"))",
ColumnType.STRING,
ExprMacroTable.nil()
)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]