This is an automated email from the ASF dual-hosted git repository.
somandal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new ba2013728b Add validations for MV columns: reject MV primary-keys for
upsert/dedup, BIG_DECIMAL, and JSON (#16079)
ba2013728b is described below
commit ba2013728b29178df2033fe5af75d79a75f9e30e
Author: Sonam Mandal <[email protected]>
AuthorDate: Wed Jun 11 16:02:23 2025 -0700
Add validations for MV columns: reject MV primary-keys for upsert/dedup,
BIG_DECIMAL, and JSON (#16079)
* Add validations for MV columns: reject MV primary-keys for upsert/dedup,
BIG_DECIMAL, and JSON
* Address review comments
---
.../apache/pinot/core/util/SchemaUtilsTest.java | 29 +++++++++++++++
.../pinot/segment/local/utils/SchemaUtils.java | 13 +++++++
.../segment/local/utils/TableConfigUtils.java | 8 +++++
.../segment/local/utils/TableConfigUtilsTest.java | 41 ++++++++++++++++++++++
4 files changed, 91 insertions(+)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
index 69f58aa5eb..a37f9fe635 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
@@ -290,6 +290,35 @@ public class SchemaUtilsTest {
checkValidationFails(pinotSchema);
}
+ @Test
+ public void testValidateMultiValueFieldSpec() {
+ Schema pinotSchema;
+
+ pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addMultiValueDimension("myJsonCol", FieldSpec.DataType.JSON).build();
+
+ checkValidationFails(pinotSchema);
+
+ pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addMultiValueDimension("myBigDecimalCol",
FieldSpec.DataType.BIG_DECIMAL).build();
+
+ checkValidationFails(pinotSchema);
+
+ pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myJsonCol", FieldSpec.DataType.JSON).build();
+
+ SchemaUtils.validate(pinotSchema);
+
+ pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myBigDecimalCol",
FieldSpec.DataType.BIG_DECIMAL).build();
+
+ SchemaUtils.validate(pinotSchema);
+ }
+
@Test
public void testValidateCaseInsensitive() {
Schema pinotSchema = new Schema.SchemaBuilder()
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
index 09ebf24063..45e6754052 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
@@ -148,6 +148,9 @@ public class SchemaUtils {
validateDefaultIsNotNaN(fieldSpec);
}
}
+ if (!fieldSpec.isSingleValueField()) {
+ validateMultiValueCompatibility(fieldSpec);
+ }
}
Preconditions.checkState(Collections.disjoint(transformedColumns,
argumentColumns),
"Columns: %s are a result of transformations, and cannot be used as
arguments to other transform functions",
@@ -166,6 +169,16 @@ public class SchemaUtils {
fieldSpec.getName());
}
+ /**
+ * Validations for MV type columns
+ */
+ private static void validateMultiValueCompatibility(FieldSpec fieldSpec) {
+
Preconditions.checkState(!fieldSpec.getDataType().equals(FieldSpec.DataType.JSON),
+ "JSON columns cannot be of multi-value type");
+
Preconditions.checkState(!fieldSpec.getDataType().equals(FieldSpec.DataType.BIG_DECIMAL),
+ "BIG_DECIMAL columns cannot be of multi-value type");
+ }
+
/**
* Validates that the schema is compatible with the given table config
*/
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
index c1edee398f..3901c4e5a8 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
@@ -707,6 +707,14 @@ public final class TableConfigUtils {
// primary key exists
Preconditions.checkState(CollectionUtils.isNotEmpty(schema.getPrimaryKeyColumns()),
"Upsert/Dedup table must have primary key columns in the schema");
+ // primary key columns are not of multi-value type
+ for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) {
+ FieldSpec fieldSpec = schema.getFieldSpecFor(primaryKeyColumn);
+ if (fieldSpec != null) {
+ Preconditions.checkState(fieldSpec.isSingleValueField(),
+ String.format("Upsert/Dedup primary key column: %s cannot be of
multi-value type", primaryKeyColumn));
+ }
+ }
// replica group is configured for routing
Preconditions.checkState(
tableConfig.getRoutingConfig() != null &&
isRoutingStrategyAllowedForUpsert(tableConfig.getRoutingConfig()),
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
index 33c6737cc9..ce14be01ef 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
@@ -1709,6 +1709,20 @@ public class TableConfigUtilsTest {
Assert.assertEquals(e.getMessage(), "Upsert/Dedup table must have
primary key columns in the schema");
}
+ schema =
+ new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addMultiValueDimension("myCol",
FieldSpec.DataType.STRING)
+ .setPrimaryKeyColumns(Lists.newArrayList("myCol")).build();
+ tableConfig = new
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME)
+ .setDedupConfig(new DedupConfig())
+ .build();
+ try {
+ TableConfigUtils.validateUpsertAndDedupConfig(tableConfig, schema);
+ Assert.fail();
+ } catch (IllegalStateException e) {
+ Assert.assertEquals(e.getMessage(),
+ "Upsert/Dedup primary key column: myCol cannot be of multi-value
type");
+ }
+
schema =
new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension("myCol",
FieldSpec.DataType.STRING)
.setPrimaryKeyColumns(Lists.newArrayList("myCol")).build();
@@ -1932,6 +1946,7 @@ public class TableConfigUtilsTest {
String invalidCol = "invalidCol";
schema = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
.addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
.addSingleValueDimension(stringTypeDelCol, FieldSpec.DataType.STRING)
.addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN)
.addSingleValueDimension(timestampCol, FieldSpec.DataType.TIMESTAMP)
@@ -2001,10 +2016,33 @@ public class TableConfigUtilsTest {
Assert.assertEquals(e.getMessage(), "The deleteRecordColumn - mvCol must
be a single-valued column");
}
+ schema = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
+ .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addMultiValueDimension("myPkCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension(stringTypeDelCol, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN)
+ .addSingleValueDimension(timestampCol, FieldSpec.DataType.TIMESTAMP)
+ .addMultiValueDimension(mvCol, FieldSpec.DataType.STRING).build();
+ streamConfigs = getStreamConfigs();
+
+ upsertConfig = new UpsertConfig(UpsertConfig.Mode.FULL);
+ upsertConfig.setDeleteRecordColumn(stringTypeDelCol);
+ tableConfig = new
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).setStreamConfigs(streamConfigs)
+ .setUpsertConfig(upsertConfig).setRoutingConfig(
+ new RoutingConfig(null, null,
RoutingConfig.STRICT_REPLICA_GROUP_INSTANCE_SELECTOR_TYPE, false))
+ .build();
+ try {
+ TableConfigUtils.validateUpsertAndDedupConfig(tableConfig, schema);
+ Assert.fail("Should fail table creation when PK column type is
multi-valued.");
+ } catch (IllegalStateException e) {
+ Assert.assertEquals(e.getMessage(), "Upsert/Dedup primary key column:
myPkCol cannot be of multi-value type");
+ }
+
// upsert deleted-keys-ttl configs with no deleted column
schema = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
.addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG,
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
.addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
.addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN).build();
upsertConfig = new UpsertConfig(UpsertConfig.Mode.FULL);
upsertConfig.setDeletedKeysTTL(3600);
@@ -2021,6 +2059,7 @@ public class TableConfigUtilsTest {
// multiple comparison columns set for deleted-keys-ttl
schema = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
.addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
.addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG,
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
.addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN).build();
upsertConfig.setComparisonColumns(Lists.newArrayList(TIME_COLUMN,
"myCol"));
@@ -2058,6 +2097,7 @@ public class TableConfigUtilsTest {
boolean dropOutOfOrderRecord = true;
schema = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
.addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
.addSingleValueDimension(outOfOrderRecordColumn,
FieldSpec.DataType.BOOLEAN).build();
streamConfigs = getStreamConfigs();
@@ -2078,6 +2118,7 @@ public class TableConfigUtilsTest {
// outOfOrderRecordColumn not of type BOOLEAN
schema = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
.addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
.addSingleValueDimension(outOfOrderRecordColumn,
FieldSpec.DataType.STRING).build();
streamConfigs = getStreamConfigs();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]