This is an automated email from the ASF dual-hosted git repository.
apurtell pushed a commit to branch PHOENIX-7876-feature
in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/PHOENIX-7876-feature by this
push:
new 114f3f9d6a PHOENIX-7924 EXPLAIN improvement bug fixes (#2531)
114f3f9d6a is described below
commit 114f3f9d6aa8c9413eafe484184ec79683b25cec
Author: Andrew Purtell <[email protected]>
AuthorDate: Sun Jun 14 19:53:38 2026 -0700
PHOENIX-7924 EXPLAIN improvement bug fixes (#2531)
Co-authored-by: Claude Opus 4.8[1m] <[email protected]>
---
.../apache/phoenix/compile/StatementContext.java | 32 +++++++
.../phoenix/expression/LiteralExpression.java | 10 +-
.../apache/phoenix/optimize/QueryOptimizer.java | 28 ++++++
.../parse/IndexExpressionParseNodeRewriter.java | 10 +-
.../org/apache/phoenix/schema/MetaDataClient.java | 9 +-
.../phoenix/query/explain/ExplainPlanTest.java | 103 ++++++++++++++++++++-
6 files changed, 182 insertions(+), 10 deletions(-)
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/compile/StatementContext.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/compile/StatementContext.java
index abe3d446b3..4ce383aed9 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/compile/StatementContext.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/compile/StatementContext.java
@@ -27,6 +27,7 @@ import java.util.Deque;
import java.util.EnumMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -112,6 +113,7 @@ public class StatementContext {
private List<String> appliedRewrites;
private int derivedTableFlattenCount;
private Map<String, List<String>> appliedIndexExpressionMatches;
+ private Map<String, Map<String, String>> appliedIndexExpressionPairs;
private Set<String> functionalIndexNames;
private Set<Pair<String, String>> partialIndexCheckedSet;
private Map<String, List<Expression>> serverParsedProjections;
@@ -176,6 +178,7 @@ public class StatementContext {
this.appliedRewrites = context.appliedRewrites;
this.derivedTableFlattenCount = context.derivedTableFlattenCount;
this.appliedIndexExpressionMatches = context.appliedIndexExpressionMatches;
+ this.appliedIndexExpressionPairs = context.appliedIndexExpressionPairs;
this.functionalIndexNames = context.functionalIndexNames;
this.partialIndexCheckedSet = context.partialIndexCheckedSet;
this.serverParsedProjections = context.serverParsedProjections;
@@ -249,6 +252,7 @@ public class StatementContext {
this.appliedRewrites = new ArrayList<>();
this.derivedTableFlattenCount = 0;
this.appliedIndexExpressionMatches = Maps.newLinkedHashMap();
+ this.appliedIndexExpressionPairs = Maps.newLinkedHashMap();
this.functionalIndexNames = Sets.newHashSet();
this.partialIndexCheckedSet = Sets.newHashSet();
this.serverParsedProjections = null;
@@ -546,6 +550,7 @@ public class StatementContext {
this.appliedRewrites = source.appliedRewrites;
this.derivedTableFlattenCount = source.derivedTableFlattenCount;
this.appliedIndexExpressionMatches = source.appliedIndexExpressionMatches;
+ this.appliedIndexExpressionPairs = source.appliedIndexExpressionPairs;
this.functionalIndexNames = source.functionalIndexNames;
this.partialIndexCheckedSet = source.partialIndexCheckedSet;
this.serverParsedProjections = source.serverParsedProjections;
@@ -591,6 +596,33 @@ public class StatementContext {
return matches == null ? Collections.emptyList() : matches;
}
+ /**
+ * Records the {@code <index column name, indexed expression string>} pairs
for substitutions that
+ * actually fired against this query for the given functional index. Carries
enough information
+ * for the optimizer to emit one {@code REWRITE INDEX EXPRESSION <expr> AS
<col>} breadcrumb per
+ * applied substitution after the chosen plan is selected. Deduplicated per
index, preserving
+ * first-seen insertion order.
+ */
+ public void recordAppliedIndexExpressionPairs(String indexName, Map<String,
String> pairs) {
+ if (pairs == null || pairs.isEmpty()) {
+ return;
+ }
+ Map<String, String> existing =
+ appliedIndexExpressionPairs.computeIfAbsent(indexName, k -> new
LinkedHashMap<>());
+ for (Map.Entry<String, String> entry : pairs.entrySet()) {
+ existing.putIfAbsent(entry.getKey(), entry.getValue());
+ }
+ }
+
+ /**
+ * Returns the {@code <index column name, indexed expression string>} pairs
that actually
+ * substituted against this query for the given functional index, or an
empty map if none.
+ */
+ public Map<String, String> getAppliedIndexExpressionPairs(String indexName) {
+ Map<String, String> pairs = appliedIndexExpressionPairs.get(indexName);
+ return pairs == null ? Collections.emptyMap() : pairs;
+ }
+
/** Marks the given index as a functional index. */
public void recordFunctionalIndex(String indexName) {
functionalIndexNames.add(indexName);
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/LiteralExpression.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/LiteralExpression.java
index 7075661567..7735d12124 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/LiteralExpression.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/LiteralExpression.java
@@ -265,10 +265,12 @@ public class LiteralExpression extends
BaseTerminalExpression {
@Override
public String toString() {
- if (value == null && byteValue != null) {
- return Bytes.toStringBinary(byteValue);
- } else if (value == null) {
- return "null";
+ // Typed null literals carry an empty byteValue
(ByteUtil.EMPTY_BYTE_ARRAY); render those as
+ // "null" rather than the empty string Bytes.toStringBinary would produce,
so callers like
+ // FunctionExpression.toString that emit comma-separated children don't
print a phantom
+ // trailing empty argument such as BSON_VALUE(payload, 'k', 'VARCHAR', ).
+ if (value == null) {
+ return (byteValue != null && byteValue.length > 0) ?
Bytes.toStringBinary(byteValue) : "null";
}
// TODO: move into PDataType?
if (type.isCoercibleTo(PTimestamp.INSTANCE)) {
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
index 63fe8e5a8c..fd5bfaa498 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
@@ -564,6 +564,8 @@ public class QueryOptimizer {
}
dataPlan.getContext().recordAppliedIndexExpressionMatches(index.getTableName().getString(),
indexExpressionRewriter.getAppliedFunctionalSubstitutions().values());
+
dataPlan.getContext().recordAppliedIndexExpressionPairs(index.getTableName().getString(),
+ indexExpressionRewriter.getAppliedFunctionalSubstitutions());
QueryCompiler compiler = new QueryCompiler(statement,
rewrittenIndexSelect, resolver,
targetColumns, parallelIteratorFactory,
dataPlan.getContext().getSequenceManager(),
isProjected, true,
dataPlans).withRewriteContext(dataPlan.getContext());
@@ -970,9 +972,32 @@ public class QueryOptimizer {
winner.setOptimizerDecision(
new
OptimizerDecision(winner.getTableRef().getTable().getTableName().getString(),
rule,
state == null ? null : state.getRejections()));
+ recordFunctionalIndexExpressionBreadcrumbs(winner);
return winner;
}
+ /**
+ * Emit one {@code INDEX EXPRESSION <expr> AS <col>} rewrite breadcrumb on
the chosen plan's
+ * context for every functional-index substitution that actually matched a
query expression. The
+ * pairs were captured at index-candidate-build time (see {@code addPlan} and
+ * {@code rewriteQueryWithIndexReplacement}) and are looked up here only for
the winner, so
+ * unmatched expressions and unchosen indexes never produce breadcrumbs.
+ */
+ private static void recordFunctionalIndexExpressionBreadcrumbs(QueryPlan
winner) {
+ String tableName =
winner.getTableRef().getTable().getTableName().getString();
+ Map<String, String> pairs =
winner.getContext().getAppliedIndexExpressionPairs(tableName);
+ if (pairs.isEmpty()) {
+ return;
+ }
+ for (Map.Entry<String, String> entry : pairs.entrySet()) {
+ // Skip if any caller already recorded an equivalent breadcrumb for this
winner.
+ String breadcrumb = "INDEX EXPRESSION " + entry.getValue() + " AS " +
entry.getKey();
+ if (!winner.getContext().getAppliedRewrites().contains(breadcrumb)) {
+ winner.getContext().addAppliedRewrite(breadcrumb);
+ }
+ }
+ }
+
/**
* Returns {@link OptimizerReasons#matches(String)} for {@code winner} when
it is a functional
* index whose indexed expression actually substituted a path expression in
the user's query, or
@@ -1146,6 +1171,9 @@ public class QueryOptimizer {
breadcrumbContext.recordAppliedIndexExpressionMatches(
indexTableRef.getTable().getTableName().getString(),
indexExpressionRewriter.getAppliedFunctionalSubstitutions().values());
+ breadcrumbContext.recordAppliedIndexExpressionPairs(
+ indexTableRef.getTable().getTableName().getString(),
+ indexExpressionRewriter.getAppliedFunctionalSubstitutions());
}
return indexSelect;
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
index f4863adb02..8dab318ffb 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
@@ -63,6 +63,13 @@ public class IndexExpressionParseNodeRewriter extends
ParseNodeRewriter {
this(index, alias, connection, udfParseNodes, null);
}
+ /**
+ * Retained for API compatibility. The {@code breadcrumbContext} parameter
is no longer used:
+ * {@code REWRITE INDEX EXPRESSION ... AS ...} breadcrumbs are now emitted
by the optimizer after
+ * a winner is selected, against {@link
#getAppliedFunctionalSubstitutions()}, so the breadcrumb
+ * fires once per substitution that actually matched a query expression and
only for the chosen
+ * index plan rather than for every PK column of every candidate index.
+ */
public IndexExpressionParseNodeRewriter(PTable index, String alias,
PhoenixConnection connection,
Map<String, UDFParseNode> udfParseNodes, StatementContext
breadcrumbContext)
throws SQLException {
@@ -103,9 +110,6 @@ public class IndexExpressionParseNodeRewriter extends
ParseNodeRewriter {
indexedParseNodeToFunctionalColumn.put(indexedParseNode,
new String[] { colName, expressionStr.trim() });
}
- if (breadcrumbContext != null) {
- breadcrumbContext.addAppliedRewrite("INDEX EXPRESSION " +
expressionStr + " AS " + colName);
- }
}
}
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
index e70aba774f..1d7ff320f0 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
@@ -1755,8 +1755,15 @@ public class MetaDataClient {
// can lose information during compilation
StringBuilder buf = new StringBuilder();
parseNode.toSQL(resolver, buf);
+ // Several ParseNode.toSQL implementations (FunctionParseNode,
CastParseNode, etc.)
+ // unconditionally prepend a separator space so the serialized form
composes safely
+ // when nested inside a larger expression. At the top of an index
expression there is
+ // no surrounding context, so trim the resulting string before it
becomes part of the
+ // functional index column name. Otherwise the leading space would be
baked into the
+ // PColumn name and would later show up inside the double quotes of
the case-sensitive
+ // display name (e.g. PROJECT " JSON_VALUE(DOC,'$.type')").
// need to escape backslash as this expression will be re-parsed later
- String expressionStr = StringUtil.escapeBackslash(buf.toString());
+ String expressionStr =
StringUtil.escapeBackslash(buf.toString().trim());
ColumnName colName = null;
ColumnRef colRef = expressionIndexCompiler.getColumnRef();
diff --git
a/phoenix-core/src/test/java/org/apache/phoenix/query/explain/ExplainPlanTest.java
b/phoenix-core/src/test/java/org/apache/phoenix/query/explain/ExplainPlanTest.java
index 03aa689c7b..12fa1bc43b 100644
---
a/phoenix-core/src/test/java/org/apache/phoenix/query/explain/ExplainPlanTest.java
+++
b/phoenix-core/src/test/java/org/apache/phoenix/query/explain/ExplainPlanTest.java
@@ -321,13 +321,15 @@ public class ExplainPlanTest extends
BaseConnectionlessQueryTest {
@Test
public void testBsonValueProjection() throws Exception {
ObjectNode bsonBucket = mapper.createObjectNode();
+ // The fourth argument to BSON_VALUE is a typed-null default. It is now
rendered as "null"
+ // rather than the empty string LiteralExpression.toString previously
produced.
bsonBucket.set("BSON",
- mapper.createArrayNode().add("BSON_VALUE(PAYLOAD, 'user.id', 'VARCHAR',
)"));
+ mapper.createArrayNode().add("BSON_VALUE(PAYLOAD, 'user.id', 'VARCHAR',
null)"));
verifyQuery("bsonValueProjection",
"SELECT BSON_VALUE(payload, 'user.id', 'VARCHAR') FROM " + BSON_TBL,
text("CLIENT PARALLEL <N>-WAY FULL SCAN OVER " + BSON_TBL, " INDEX "
+ BSON_TBL,
" REGIONS PLANNED <N>", " SERVER BSON PROJECTION 1",
- " BSON_VALUE(PAYLOAD, 'user.id', 'VARCHAR', )"),
+ " BSON_VALUE(PAYLOAD, 'user.id', 'VARCHAR', null)"),
scanAttrs("FULL SCAN ", BSON_TBL, "").set("serverParsedProjections",
bsonBucket));
}
@@ -997,6 +999,103 @@ public class ExplainPlanTest extends
BaseConnectionlessQueryTest {
}
}
+ /**
+ * A query whose path expression matches a covered functional index's
indexed expression must
+ * choose that index, label the disclosed rule {@code "matches <expr>"}, and
emit exactly one
+ * {@code INDEX EXPRESSION <expr> AS <col>} rewrite breadcrumb on the chosen
plan's context. The
+ * breadcrumb is rendered as a {@code REWRITE INDEX EXPRESSION ...}
top-of-plan line in plain
+ * EXPLAIN text and as a single entry in the structured {@code rewrites}
attribute.
+ */
+ @Test
+ public void testIndexExpressionRewriteEmittedForChosenFunctionalIndex()
throws Exception {
+ try (Connection conn = DriverManager.getConnection(getUrl(),
defaultProps());
+ java.sql.Statement stmt = conn.createStatement()) {
+ String base = generateUniqueName();
+ String idx = generateUniqueName();
+ stmt.execute("CREATE TABLE " + base + " (pk VARCHAR PRIMARY KEY, payload
BSON)");
+ stmt.execute("CREATE INDEX " + idx + " ON " + base
+ + " (BSON_VALUE(payload, 'k', 'VARCHAR')) INCLUDE (payload)");
+ String query = "SELECT BSON_VALUE(payload, 'k', 'VARCHAR') FROM " + base
+ + " WHERE BSON_VALUE(payload, 'k', 'VARCHAR') = 'x'";
+
+ // The functional index is chosen and the rule comment names the matched
expression.
+ ExplainPlanTestUtil.assertPlan(conn, query).indexName(idx)
+ .indexRuleStartsWith("matches BSON_VALUE(PAYLOAD,'k','VARCHAR')")
+ // Exactly one breadcrumb (one applied substitution; no eager
per-PK-column emissions).
+ .rewriteCount(1).rewrite(0,
+ "INDEX EXPRESSION BSON_VALUE(PAYLOAD,'k','VARCHAR') AS
\":BSON_VALUE(PAYLOAD,'k','VARCHAR')\"");
+
+ // Plain EXPLAIN renders the breadcrumb as a REWRITE top-of-plan line.
+ List<String> rows = explainViaJdbc(conn, query);
+ assertEquals("REWRITE INDEX EXPRESSION BSON_VALUE(PAYLOAD,'k','VARCHAR')
AS"
+ + " \":BSON_VALUE(PAYLOAD,'k','VARCHAR')\"", rows.get(0));
+ assertTrue("operator should follow the REWRITE line: " + rows.get(1),
+ rows.get(1).startsWith("CLIENT"));
+ }
+ }
+
+ /**
+ * A query that references the indexed path expression more than once must
still emit the
+ * {@code INDEX EXPRESSION <expr> AS <col>} breadcrumb exactly once. Guards
against duplicates.
+ */
+ @Test
+ public void testIndexExpressionRewriteEmittedOnceForRepeatedQueryReference()
throws Exception {
+ try (Connection conn = DriverManager.getConnection(getUrl(),
defaultProps());
+ java.sql.Statement stmt = conn.createStatement()) {
+ String base = generateUniqueName();
+ String idx = generateUniqueName();
+ stmt.execute("CREATE TABLE " + base + " (pk VARCHAR PRIMARY KEY, payload
BSON)");
+ stmt.execute("CREATE INDEX " + idx + " ON " + base
+ + " (BSON_VALUE(payload, 'k', 'VARCHAR')) INCLUDE (payload)");
+ // The same path expression appears in the WHERE, the projection, and
the ORDER BY.
+ String query = "SELECT BSON_VALUE(payload, 'k', 'VARCHAR') FROM " + base
+ + " WHERE BSON_VALUE(payload, 'k', 'VARCHAR') >= 'a'"
+ + " ORDER BY BSON_VALUE(payload, 'k', 'VARCHAR')";
+ ExplainPlanTestUtil.assertPlan(conn,
query).indexName(idx).rewriteCount(1).rewrite(0,
+ "INDEX EXPRESSION BSON_VALUE(PAYLOAD,'k','VARCHAR') AS
\":BSON_VALUE(PAYLOAD,'k','VARCHAR')\"");
+ }
+ }
+
+ /**
+ * A plain global index chosen for a query must not emit an
+ * {@code INDEX EXPRESSION <expr> AS <col>} breadcrumb for ordinary indexed
columns.
+ */
+ @Test
+ public void testIndexExpressionRewriteOmittedForNonFunctionalIndex() throws
Exception {
+ try (Connection conn = DriverManager.getConnection(getUrl(),
defaultProps());
+ java.sql.Statement stmt = conn.createStatement()) {
+ String base = generateUniqueName();
+ String idx = generateUniqueName();
+ stmt.execute("CREATE TABLE " + base + " (k VARCHAR PRIMARY KEY, v1
VARCHAR, v2 VARCHAR)");
+ stmt.execute("CREATE INDEX " + idx + " ON " + base + " (v1) INCLUDE
(v2)");
+ String query = "SELECT v1, v2 FROM " + base + " WHERE v1 = 'x'";
+ ExplainPlanTestUtil.assertPlan(conn,
query).indexName(idx).rewritesNone();
+ assertNoPlanLineContains(conn, query, "INDEX EXPRESSION");
+ }
+ }
+
+ /**
+ * A functional index that is considered but rejected must not emit an
+ * {@code INDEX EXPRESSION <expr> AS <col>} breadcrumb.
+ */
+ @Test
+ public void testIndexExpressionRewriteOmittedWhenFunctionalIndexNotChosen()
throws Exception {
+ try (Connection conn = DriverManager.getConnection(getUrl(),
defaultProps());
+ java.sql.Statement stmt = conn.createStatement()) {
+ String base = generateUniqueName();
+ String idx = generateUniqueName();
+ stmt.execute("CREATE TABLE " + base + " (pk VARCHAR PRIMARY KEY, payload
BSON)");
+ stmt.execute("CREATE INDEX " + idx + " ON " + base
+ + " (BSON_VALUE(payload, 'k', 'VARCHAR')) INCLUDE (payload)");
+ // The functional index is over the 'k' path. The query reads the
'other' path and is on
+ // the data-table primary key, so the optimizer never substitutes the
indexed expression.
+ String query =
+ "SELECT BSON_VALUE(payload, 'other', 'VARCHAR') FROM " + base + "
WHERE pk = 'p1'";
+ ExplainPlanTestUtil.assertPlan(conn,
query).indexName(base).rewritesNone();
+ assertNoPlanLineContains(conn, query, "INDEX EXPRESSION");
+ }
+ }
+
@Test
public void testVerboseProjectLine() throws Exception {
try (Connection conn = DriverManager.getConnection(getUrl(),
defaultProps());