This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 8880c7a feat(blob): harden create-time schema contract (#251)
8880c7a is described below
commit 8880c7a1740a861b09319a282559ef4f21c93e73
Author: Zach <[email protected]>
AuthorDate: Thu Apr 16 13:37:45 2026 +0800
feat(blob): harden create-time schema contract (#251)
---
crates/paimon/src/spec/schema.rs | 107 ++++++++++++++++++++++++++++++++-
crates/paimon/src/table/table_write.rs | 1 +
2 files changed, 107 insertions(+), 1 deletion(-)
diff --git a/crates/paimon/src/spec/schema.rs b/crates/paimon/src/spec/schema.rs
index f923434..63302ad 100644
--- a/crates/paimon/src/spec/schema.rs
+++ b/crates/paimon/src/spec/schema.rs
@@ -247,6 +247,7 @@ impl Schema {
let primary_keys = Self::normalize_primary_keys(&primary_keys, &mut
options)?;
let partition_keys = Self::normalize_partition_keys(&partition_keys,
&mut options)?;
let fields = Self::normalize_fields(&fields, &partition_keys,
&primary_keys)?;
+ Self::validate_blob_fields(&fields, &partition_keys, &options)?;
Ok(Self {
fields,
@@ -401,6 +402,54 @@ impl Schema {
Ok(())
}
+ fn validate_blob_fields(
+ fields: &[DataField],
+ partition_keys: &[String],
+ options: &HashMap<String, String>,
+ ) -> crate::Result<()> {
+ let blob_field_names = Self::top_level_blob_field_names(fields);
+ if blob_field_names.is_empty() {
+ return Ok(());
+ }
+
+ let core_options = CoreOptions::new(options);
+ if !core_options.data_evolution_enabled() {
+ return Err(crate::Error::ConfigInvalid {
+ message: "Data evolution config must enabled for table with
BLOB type column."
+ .to_string(),
+ });
+ }
+
+ if fields.len() == blob_field_names.len() {
+ return Err(crate::Error::ConfigInvalid {
+ message: "Table with BLOB type column must have other normal
columns.".to_string(),
+ });
+ }
+
+ let partition_key_set: HashSet<&str> =
partition_keys.iter().map(String::as_str).collect();
+ if blob_field_names
+ .iter()
+ .any(|name| partition_key_set.contains(name))
+ {
+ return Err(crate::Error::ConfigInvalid {
+ message: "The BLOB type column can not be part of partition
keys.".to_string(),
+ });
+ }
+
+ Ok(())
+ }
+
+ /// Returns top-level Blob field names for create-time Blob contract
checks.
+ fn top_level_blob_field_names(fields: &[DataField]) -> Vec<&str> {
+ fields
+ .iter()
+ .filter_map(|field| match field.data_type() {
+ DataType::Blob(_) => Some(field.name()),
+ _ => None,
+ })
+ .collect()
+ }
+
/// Returns the set of names that appear more than once.
pub fn duplicate_fields(names: &[String]) -> HashSet<String> {
let mut seen = HashMap::new();
@@ -582,7 +631,7 @@ impl Default for SchemaBuilder {
#[cfg(test)]
mod tests {
- use crate::spec::IntType;
+ use crate::spec::{BlobType, IntType};
use super::*;
@@ -755,6 +804,62 @@ mod tests {
assert_eq!(schema.primary_keys(), &["a", "b"]);
}
+ #[test]
+ fn test_blob_schema_validation_requires_data_evolution() {
+ let err = Schema::builder()
+ .column("id", DataType::Int(IntType::new()))
+ .column("payload", DataType::Blob(BlobType::new()))
+ .build()
+ .unwrap_err();
+
+ assert!(
+ matches!(err, crate::Error::ConfigInvalid { message } if
message.contains("Data evolution config must enabled")),
+ "blob columns should require data-evolution.enabled"
+ );
+ }
+
+ #[test]
+ fn test_blob_schema_validation_rejects_all_blob_columns() {
+ let err = Schema::builder()
+ .column("payload", DataType::Blob(BlobType::new()))
+ .option("data-evolution.enabled", "true")
+ .build()
+ .unwrap_err();
+
+ assert!(
+ matches!(err, crate::Error::ConfigInvalid { message } if
message.contains("must have other normal columns")),
+ "blob-only tables should be rejected"
+ );
+ }
+
+ #[test]
+ fn test_blob_schema_validation_rejects_blob_partition_keys() {
+ let err = Schema::builder()
+ .column("id", DataType::Int(IntType::new()))
+ .column("payload", DataType::Blob(BlobType::new()))
+ .partition_keys(["payload"])
+ .option("data-evolution.enabled", "true")
+ .build()
+ .unwrap_err();
+
+ assert!(
+ matches!(err, crate::Error::ConfigInvalid { message } if
message.contains("can not be part of partition keys")),
+ "blob columns should be rejected as partition keys during schema
validation"
+ );
+ }
+
+ #[test]
+ fn test_blob_schema_validation_accepts_valid_blob_table() {
+ let schema = Schema::builder()
+ .column("id", DataType::Int(IntType::new()))
+ .column("payload", DataType::Blob(BlobType::new()))
+ .option("data-evolution.enabled", "true")
+ .build()
+ .unwrap();
+
+ assert_eq!(schema.fields().len(), 2);
+ }
+
#[test]
fn test_schema_builder_column_row_type() {
let row_type = RowType::new(vec![DataField::new(
diff --git a/crates/paimon/src/table/table_write.rs
b/crates/paimon/src/table/table_write.rs
index 77b8e97..de7e0fa 100644
--- a/crates/paimon/src/table/table_write.rs
+++ b/crates/paimon/src/table/table_write.rs
@@ -556,6 +556,7 @@ mod tests {
let schema = Schema::builder()
.column("id", DataType::Int(IntType::new()))
.column("payload", DataType::Blob(BlobType::new()))
+ .option("data-evolution.enabled", "true")
.build()
.unwrap();
TableSchema::new(0, &schema)