This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new d016df3091 Allow usage of star-tree index with null handling enabled
when no null values in segment columns (#14177)
d016df3091 is described below
commit d016df3091483d4430c668e0b30f6381bd0ba56a
Author: Yash Mayya <[email protected]>
AuthorDate: Thu Oct 10 03:12:07 2024 +0530
Allow usage of star-tree index with null handling enabled when no null
values in segment columns (#14177)
---
.../apache/pinot/core/startree/StarTreeUtils.java | 48 +++++++++++++++++++++-
.../tests/StarTreeClusterIntegrationTest.java | 17 +++++++-
2 files changed, 62 insertions(+), 3 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
index 039da20db0..d518028595 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
@@ -46,6 +46,8 @@ import
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair
import org.apache.pinot.segment.spi.index.startree.AggregationSpec;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
@SuppressWarnings("rawtypes")
@@ -53,6 +55,8 @@ public class StarTreeUtils {
private StarTreeUtils() {
}
+ private static final Logger LOGGER =
LoggerFactory.getLogger(StarTreeUtils.class);
+
/**
* Extracts the {@link AggregationFunctionColumnPair}s from the given {@link
AggregationFunction}s. Returns
* {@code null} if any {@link AggregationFunction} cannot be represented as
an {@link AggregationFunctionColumnPair}
@@ -354,7 +358,7 @@ public class StarTreeUtils {
QueryContext queryContext, AggregationFunction[] aggregationFunctions,
@Nullable FilterContext filter,
List<Pair<Predicate, PredicateEvaluator>> predicateEvaluators) {
List<StarTreeV2> starTrees = indexSegment.getStarTrees();
- if (starTrees == null || queryContext.isSkipStarTree() ||
queryContext.isNullHandlingEnabled()) {
+ if (starTrees == null || queryContext.isSkipStarTree()) {
return null;
}
@@ -363,15 +367,57 @@ public class StarTreeUtils {
if (aggregationFunctionColumnPairs == null) {
return null;
}
+
Map<String, List<CompositePredicateEvaluator>> predicateEvaluatorsMap =
extractPredicateEvaluatorsMap(indexSegment, filter,
predicateEvaluators);
if (predicateEvaluatorsMap == null) {
return null;
}
+
ExpressionContext[] groupByExpressions =
queryContext.getGroupByExpressions() != null ?
queryContext.getGroupByExpressions()
.toArray(new ExpressionContext[0]) : null;
+ if (queryContext.isNullHandlingEnabled()) {
+ // We can still use the star-tree index if there aren't actually any
null values in this segment for all the
+ // metrics being aggregated, all the dimensions being filtered on /
grouped by.
+ for (AggregationFunctionColumnPair aggregationFunctionColumnPair :
aggregationFunctionColumnPairs) {
+ if (aggregationFunctionColumnPair ==
AggregationFunctionColumnPair.COUNT_STAR) {
+ // Null handling is irrelevant for COUNT(*)
+ continue;
+ }
+
+ String column = aggregationFunctionColumnPair.getColumn();
+ DataSource dataSource = indexSegment.getDataSource(column);
+ if (dataSource.getNullValueVector() != null &&
!dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
+ LOGGER.debug("Cannot use star-tree index because aggregation column:
'{}' has null values", column);
+ return null;
+ }
+ }
+
+ for (String column : predicateEvaluatorsMap.keySet()) {
+ DataSource dataSource = indexSegment.getDataSource(column);
+ if (dataSource.getNullValueVector() != null &&
!dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
+ LOGGER.debug("Cannot use star-tree index because filter column: '{}'
has null values", column);
+ return null;
+ }
+ }
+
+ Set<String> groupByColumns = new HashSet<>();
+ if (groupByExpressions != null) {
+ for (ExpressionContext groupByExpression : groupByExpressions) {
+ groupByExpression.getColumns(groupByColumns);
+ }
+ }
+ for (String column : groupByColumns) {
+ DataSource dataSource = indexSegment.getDataSource(column);
+ if (dataSource.getNullValueVector() != null &&
!dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
+ LOGGER.debug("Cannot use star-tree index because group-by column:
'{}' has null values", column);
+ return null;
+ }
+ }
+ }
+
List<Pair<AggregationFunction, AggregationFunctionColumnPair>>
aggregations =
new ArrayList<>(aggregationFunctions.length);
for (int i = 0; i < aggregationFunctions.length; i++) {
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
index 66c8fa4e65..276ffe53b3 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
@@ -159,8 +159,8 @@ public class StarTreeClusterIntegrationTest extends
BaseClusterIntegrationTest {
}
for (String metric : metrics) {
aggregationConfigs.add(
- new StarTreeAggregationConfig(metric, functionType.name(), null,
CompressionCodec.LZ4,
- false, 4, null, null));
+ new StarTreeAggregationConfig(metric, functionType.name(), null,
CompressionCodec.LZ4, false, 4, null,
+ null));
}
}
return new StarTreeIndexConfig(dimensions, null, null, aggregationConfigs,
maxLeafRecords);
@@ -213,22 +213,35 @@ public class StarTreeClusterIntegrationTest extends
BaseClusterIntegrationTest {
throws Exception {
String explain = "EXPLAIN PLAN FOR ";
String disableStarTree = "SET useStarTree = false; ";
+ // The star-tree index doesn't currently support null values, but we
should still be able to use the star-tree index
+ // here since there aren't actually any null values in the dataset.
+ String nullHandlingEnabled = "SET enableNullHandling = true; ";
if (verifyPlan) {
JsonNode starPlan = postQuery(explain + starQuery);
JsonNode referencePlan = postQuery(disableStarTree + explain +
starQuery);
+ JsonNode nullHandlingEnabledPlan = postQuery(nullHandlingEnabled +
explain + starQuery);
assertTrue(starPlan.toString().contains(FILTER_STARTREE_INDEX) ||
starPlan.toString().contains("FILTER_EMPTY")
|| starPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"),
"StarTree query did not indicate use of StarTree index in query
plan. Plan: " + starPlan);
assertFalse(referencePlan.toString().contains(FILTER_STARTREE_INDEX),
"Reference query indicated use of StarTree index in query plan.
Plan: " + referencePlan);
+ assertTrue(
+ nullHandlingEnabledPlan.toString().contains(FILTER_STARTREE_INDEX)
|| nullHandlingEnabledPlan.toString()
+ .contains("FILTER_EMPTY") ||
nullHandlingEnabledPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"),
+ "StarTree query with null handling enabled did not indicate use of
StarTree index in query plan. Plan: "
+ + nullHandlingEnabledPlan);
}
JsonNode starResponse = postQuery(starQuery);
String referenceQuery = disableStarTree + starQuery;
JsonNode referenceResponse = postQuery(referenceQuery);
+ // Don't compare the actual response values since they could differ (e.g.
"null" vs "Infinity" for MIN
+ // aggregation function with no values aggregated)
+ JsonNode nullHandlingEnabledResponse = postQuery(nullHandlingEnabled +
starQuery);
assertEquals(starResponse.get("exceptions").size(), 0);
assertEquals(referenceResponse.get("exceptions").size(), 0);
+ assertEquals(nullHandlingEnabledResponse.get("exceptions").size(), 0);
assertEquals(starResponse.get("resultTable"),
referenceResponse.get("resultTable"), String.format(
"Query comparison failed for: \n"
+ "Star Query: %s\nStar Response: %s\nReference Query:
%s\nReference Response: %s\nRandom Seed: %d",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]