bvaradar commented on code in PR #14265: URL: https://github.com/apache/hudi/pull/14265#discussion_r2539050608
########## hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java: ########## @@ -0,0 +1,931 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.schema; + +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.exception.HoodieAvroSchemaException; + +import org.apache.avro.JsonProperties; +import org.apache.avro.Schema; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Wrapper class for Avro Schema that provides Hudi-specific schema functionality + * while maintaining binary compatibility with Avro. + * + * <p>This class serves as the primary interface for schema operations within Hudi. + * It encapsulates an Avro Schema and provides a consistent, type-safe API while + * maintaining full compatibility with existing Avro-based code.</p> + * + * <p>Key features: + * <ul> + * <li>Binary compatibility with Avro Schema</li> + * <li>Type-safe field access through HoodieSchemaField</li> + * <li>Support for all Avro schema types and operations</li> + * <li>Consistent error handling using Hudi exceptions</li> + * <li>Integration with Hudi's Option type for null safety</li> + * </ul></p> + * + * <p>Usage examples: + * <pre>{@code + * // Create from JSON + * HoodieSchema schema = HoodieSchema.parse(jsonSchemaString); + * + * // Create primitive schemas + * HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING); + * + * // Access schema properties + * HoodieSchemaType type = schema.getType(); + * List<HoodieSchemaField> fields = schema.getFields(); + * Option<HoodieSchemaField> field = schema.getField("fieldName"); + * + * // Convert back to Avro for compatibility + * Schema avroSchema = schema.getAvroSchema(); + * }</pre></p> + * + * @since 1.2.0 + */ +public class HoodieSchema implements Serializable { + + /** + * Constant representing a null JSON value, equivalent to JsonProperties.NULL_VALUE. + * This provides compatibility with Avro's JsonProperties while maintaining Hudi's API. + */ + public static final Object NULL_VALUE = JsonProperties.NULL_VALUE; + private static final long serialVersionUID = 1L; + private final Schema avroSchema; + private final HoodieSchemaType type; + + /** + * Creates a new HoodieSchema wrapping the given Avro schema. + * + * @param avroSchema the Avro schema to wrap, cannot be null + * @throws IllegalArgumentException if avroSchema is null + */ + public HoodieSchema(Schema avroSchema) { Review Comment: Sure. ########## hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaFactory.java: ########## @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.schema; + +import org.apache.hudi.common.util.ValidationUtils; + +import org.apache.avro.Schema; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Factory class for creating common HoodieSchema instances. + * + * <p>This factory provides convenient methods for creating commonly used + * schema patterns without requiring complex builder logic.</p> + * + * @since 1.2.0 + */ +public final class HoodieSchemaFactory { + + private HoodieSchemaFactory() { + // Utility class, no instantiation + } + + /** + * Creates a record schema with the specified name and fields. + * + * @param recordName the name of the record + * @param fields the fields for the record + * @return new HoodieSchema representing a record + * @throws IllegalArgumentException if recordName is null/empty or fields is null/empty + */ + public static HoodieSchema createRecord(String recordName, List<HoodieSchemaField> fields) { + ValidationUtils.checkArgument(recordName != null && !recordName.isEmpty(), + "Record name cannot be null or empty"); + ValidationUtils.checkArgument(fields != null && !fields.isEmpty(), + "Fields list cannot be null or empty"); + + List<Schema.Field> avroFields = new ArrayList<>(); + for (HoodieSchemaField field : fields) { + avroFields.add(field.getAvroField()); + } + + Schema recordSchema = Schema.createRecord(recordName, null, null, false, avroFields); + return new HoodieSchema(recordSchema); + } + + /** + * Creates a record schema with the specified name, namespace, and fields. + * + * @param recordName the name of the record + * @param namespace the namespace for the record + * @param doc the documentation string + * @param fields the fields for the record + * @return new HoodieSchema representing a record + */ + public static HoodieSchema createRecord(String recordName, String namespace, String doc, + List<HoodieSchemaField> fields) { + ValidationUtils.checkArgument(recordName != null && !recordName.isEmpty(), + "Record name cannot be null or empty"); + ValidationUtils.checkArgument(fields != null && !fields.isEmpty(), + "Fields list cannot be null or empty"); + + List<Schema.Field> avroFields = new ArrayList<>(); + for (HoodieSchemaField field : fields) { + avroFields.add(field.getAvroField()); + } + + Schema recordSchema = Schema.createRecord(recordName, doc, namespace, false, avroFields); + return new HoodieSchema(recordSchema); + } + + /** + * Creates an enum schema with the specified name and symbols. + * + * @param enumName the name of the enum + * @param symbols the enum symbols + * @return new HoodieSchema representing an enum + */ + public static HoodieSchema createEnum(String enumName, List<String> symbols) { + ValidationUtils.checkArgument(enumName != null && !enumName.isEmpty(), + "Enum name cannot be null or empty"); + ValidationUtils.checkArgument(symbols != null && !symbols.isEmpty(), + "Symbols list cannot be null or empty"); + + Schema enumSchema = Schema.createEnum(enumName, null, null, symbols); + return new HoodieSchema(enumSchema); + } + + /** + * Creates a fixed schema with the specified name and size. + * + * @param fixedName the name of the fixed schema + * @param size the size in bytes + * @return new HoodieSchema representing a fixed type + */ + public static HoodieSchema createFixed(String fixedName, int size) { Review Comment: The goal was to make it easy to migrate the rest of the code. So you see the APIs structured like Avro code. If you find something redundant, we can cleanup the ones whose signature is not going to be used during migration. ########## hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaField.java: ########## @@ -0,0 +1,392 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.schema; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; + +import org.apache.avro.Schema; + +import java.io.Serializable; +import java.util.Objects; + +/** + * Wrapper class for Avro Schema.Field that provides Hudi-specific field functionality + * while maintaining binary compatibility with Avro. + * + * <p>This class encapsulates an Avro Schema.Field and provides a consistent interface + * for field operations across the Hudi codebase. It maintains full compatibility with + * Avro by delegating all operations to the underlying Avro field.</p> + * + * <p>Usage example: + * <pre>{@code + * // Create from Avro field + * Schema.Field avroField = new Schema.Field("name", Schema.create(Schema.Type.STRING)); + * HoodieSchemaField hoodieField = HoodieSchemaField.fromAvroField(avroField); + * + * // Access field properties + * String name = hoodieField.name(); + * HoodieSchema schema = hoodieField.schema(); + * Option<Object> defaultValue = hoodieField.defaultVal(); + * }</pre></p> + * + * @since 1.2.0 + */ +public class HoodieSchemaField implements Serializable { + + private static final long serialVersionUID = 1L; + + private final Schema.Field avroField; + private final HoodieSchema hoodieSchema; + + /** + * Creates a new HoodieSchemaField wrapping the given Avro field. + * + * @param avroField the Avro field to wrap, cannot be null + */ + public HoodieSchemaField(Schema.Field avroField) { + ValidationUtils.checkArgument(avroField != null, "Avro field cannot be null"); + this.avroField = avroField; + this.hoodieSchema = new HoodieSchema(avroField.schema()); + } + + /** + * Factory method to create HoodieSchemaField from an Avro field. + * + * @param avroField the Avro field to wrap + * @return new HoodieSchemaField instance + * @throws IllegalArgumentException if avroField is null + */ + public static HoodieSchemaField fromAvroField(Schema.Field avroField) { + return new HoodieSchemaField(avroField); + } + + /** + * Creates a new HoodieSchemaField with the specified properties. + * + * @param name the name of the field + * @param schema the schema of the field + * @param doc the documentation string, can be null + * @param defaultVal the default value, can be null + * @return new HoodieSchemaField instance + */ + public static HoodieSchemaField of(String name, HoodieSchema schema, String doc, Object defaultVal) { + return of(name, schema, doc, defaultVal, HoodieFieldOrder.ASCENDING); + } + + /** + * Creates a new HoodieSchemaField with the specified properties, including field order. + * + * @param name the name of the field + * @param schema the schema of the field + * @param doc the documentation string, can be null + * @param defaultVal the default value, can be null + * @param order the field order for sorting + * @return new HoodieSchemaField instance + */ + public static HoodieSchemaField of(String name, HoodieSchema schema, String doc, Object defaultVal, HoodieFieldOrder order) { + ValidationUtils.checkArgument(name != null && !name.isEmpty(), "Field name cannot be null or empty"); + ValidationUtils.checkArgument(schema != null, "Field schema cannot be null"); + ValidationUtils.checkArgument(order != null, "Field order cannot be null"); + + Schema avroSchema = schema.getAvroSchema(); + ValidationUtils.checkState(avroSchema != null, "Schema's Avro schema cannot be null"); + + Schema.Field avroField = new Schema.Field(name, avroSchema, doc, defaultVal, order.toAvroOrder()); + return new HoodieSchemaField(avroField); + } + + /** + * Creates a new HoodieSchemaField with the specified name and schema. + * + * @param name the name of the field + * @param schema the schema of the field + * @return new HoodieSchemaField instance + */ + public static HoodieSchemaField of(String name, HoodieSchema schema) { + return of(name, schema, null, null); + } + + /** + * Creates a new HoodieSchemaField with the specified name, schema, and doc. + * + * @param name the name of the field + * @param schema the schema of the field + * @param doc the documentation string + * @return new HoodieSchemaField instance + */ + public static HoodieSchemaField of(String name, HoodieSchema schema, String doc) { + return of(name, schema, doc, null); + } + + /** + * Creates a metadata field for Hudi internal use. + * This is a convenience method for creating fields that are part of Hudi's metadata. + * + * @param name the metadata field name + * @param schema the metadata field schema + * @return new HoodieSchemaField configured as a metadata field + * @throws IllegalArgumentException if name is null/empty or schema is null + */ + public static HoodieSchemaField createMetadataField(String name, HoodieSchema schema) { + ValidationUtils.checkArgument(name != null && !name.isEmpty(), "Metadata field name cannot be null or empty"); + ValidationUtils.checkArgument(schema != null, "Metadata field schema cannot be null"); + + return HoodieSchemaField.of(name, schema, "Hudi metadata field: " + name, HoodieJsonProperties.NULL_VALUE); + } + + /** + * Returns the name of this field. + * + * @return the field name + */ + public String name() { + return avroField.name(); + } + + /** + * Returns the schema of this field. + * + * @return the field schema as HoodieSchema + */ + public HoodieSchema schema() { + return hoodieSchema; + } + + /** + * Returns the documentation string for this field. + * + * @return Option containing the documentation string, or Option.empty() if none + */ + public Option<String> doc() { + return Option.ofNullable(avroField.doc()); + } + + /** + * Returns the default value for this field. + * + * @return Option containing the default value, or Option.empty() if none + */ + public Option<Object> defaultVal() { + if (avroField != null && avroField.hasDefaultValue()) { + return Option.of(avroField.defaultVal()); + } + return Option.empty(); + } + + /** + * Returns the sort order for this field. + * + * @return the field order + */ + public HoodieFieldOrder order() { + return HoodieFieldOrder.fromAvroOrder(avroField.order()); + } + + /** + * Returns the position of this field within its enclosing record. + * + * @return the field position (0-based index) + */ + public int pos() { + return avroField.pos(); + } + + /** + * Checks if this field has a default value. + * + * @return true if the field has a default value + */ + public boolean hasDefaultValue() { + return avroField.hasDefaultValue(); + } + + /** + * Returns custom properties attached to this field. + * + * @return map of custom properties + */ + public java.util.Map<String, Object> getObjectProps() { Review Comment: sure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
