This is an automated email from the ASF dual-hosted git repository.
abhishekrb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 746cae6bf12 Preserve row signature column order when column analysis
has errors (#19162)
746cae6bf12 is described below
commit 746cae6bf1261f401904400d0645eac3050a341b
Author: Abhishek Radhakrishnan <[email protected]>
AuthorDate: Tue Mar 17 20:42:00 2026 -0700
Preserve row signature column order when column analysis has errors (#19162)
* Fix https://github.com/apache/druid/issues/18437
When column analysis encounters errors during fold, the current behavior
can cause row signatures to flap on the Brokers, which in turn leads to
sporadic query failures or incorrect query results, since query plans rely on
the Broker’s segment metadata cache. This issue is more pronounced during
segment analysis on realtime servers with JSON columns, where the fold may
sometimes produce column analysis errors, presumably due to type coercion.
This patch ensures that columns are not skipped when such errors occur
preserving the row signature's order.
Note: https://github.com/apache/druid/issues/19176 may still occur, where
the current behavior is that types will fall back to string when such errors
are encountered.
---
.../metadata/AbstractSegmentMetadataCache.java | 5 ---
.../CoordinatorSegmentMetadataCacheTest.java | 44 ++++++++++++++++++++++
2 files changed, 44 insertions(+), 5 deletions(-)
diff --git
a/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java
b/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java
index 684b65884ac..3cb8dbd7600 100644
---
a/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java
+++
b/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java
@@ -991,11 +991,6 @@ public abstract class AbstractSegmentMetadataCache<T
extends DataSourceInformati
{
final RowSignature.Builder rowSignatureBuilder = RowSignature.builder();
for (Map.Entry<String, ColumnAnalysis> entry :
analysis.getColumns().entrySet()) {
- if (entry.getValue().isError()) {
- // Skip columns with analysis errors.
- continue;
- }
-
ColumnType valueType = entry.getValue().getTypeSignature();
// this shouldn't happen, but if it does, first try to fall back to
legacy type information field in case
diff --git
a/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java
b/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java
index a62114a62a5..ea4301f8b31 100644
---
a/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java
+++
b/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java
@@ -1190,6 +1190,50 @@ public class CoordinatorSegmentMetadataCacheTest extends
CoordinatorSegmentMetad
);
}
+ /**
+ * Verifies that columns with analysis errors are included in the row
signature rather than skipped.
+ * Skipping error columns can cause invalid query plans and results as seen
in
+ * <a href="https://github.com/apache/druid/issues/18437">issue 18437</a>
and <a href="https://github.com/apache/druid/pull/18966">pr 18966</a>.
+ */
+ @Test
+ public void
testAnalysisToRowSignatureDoesNotSkipColumnsWhenAnalysisHasErrors()
+ {
+ final LinkedHashMap<String, ColumnAnalysis> columns = new
LinkedHashMap<>();
+ columns.put("a", new ColumnAnalysis(ColumnType.STRING,
ColumnType.STRING.asTypeString(), false, true, 1234, 26, "a", "z", null));
+ columns.put("error_col", ColumnAnalysis.error("unknown_type"));
+ columns.put("b", new ColumnAnalysis(ColumnType.LONG,
ColumnType.LONG.asTypeString(), false, true, 1234, 26, null, null, null));
+ columns.put("c", new ColumnAnalysis(ColumnType.DOUBLE,
ColumnType.DOUBLE.asTypeString(), false, true, 1234, 26, null, null, null));
+ columns.put("d", new ColumnAnalysis(ColumnType.STRING,
ColumnType.STRING.asTypeString(), false, true, 1234, 10, "x", "y", null));
+ columns.put("error_col2", ColumnAnalysis.error("multi_value"));
+
+ final RowSignature signature =
AbstractSegmentMetadataCache.analysisToRowSignature(
+ new SegmentAnalysis(
+ "id",
+ ImmutableList.of(Intervals.utc(1L, 2L)),
+ columns,
+ 1234,
+ 100,
+ null,
+ null,
+ null,
+ null,
+ null
+ )
+ );
+
+ Assert.assertEquals(
+ RowSignature.builder()
+ .add("a", ColumnType.STRING)
+ .add("error_col", ColumnType.STRING)
+ .add("b", ColumnType.LONG)
+ .add("c", ColumnType.DOUBLE)
+ .add("d", ColumnType.STRING)
+ .add("error_col2", ColumnType.STRING)
+ .build(),
+ signature
+ );
+ }
+
@Test
public void testStaleDatasourceRefresh() throws IOException,
InterruptedException
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]