alexeykudinkin commented on a change in pull request #5077:
URL: https://github.com/apache/hudi/pull/5077#discussion_r831547314
##########
File path: hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
##########
@@ -511,6 +512,132 @@ public static Object getNestedFieldVal(GenericRecord
record, String fieldName, b
}
}
+ /**
+ * Get schema for the given field and record. Field can be nested, denoted
by dot notation. e.g: a.b.c
+ *
+ * @param record - record containing the value of the given field
+ * @param fieldName - name of the field
+ * @return
+ */
+ public static Schema getNestedFieldSchemaFromRecord(GenericRecord record,
String fieldName) {
+ String[] parts = fieldName.split("\\.");
+ GenericRecord valueNode = record;
+ int i = 0;
+ for (; i < parts.length; i++) {
+ String part = parts[i];
+ Object val = valueNode.get(part);
+
+ if (i == parts.length - 1) {
+ return resolveUnion(valueNode.getSchema().getField(part).schema());
+ } else {
+ if (!(val instanceof GenericRecord)) {
+ throw new HoodieException("Cannot find a record at part value :" +
part);
+ }
+ valueNode = (GenericRecord) val;
+ }
+ }
+ throw new HoodieException("Failed to get schema. Not a valid field name: "
+ fieldName);
+ }
+
+
+ /**
+ * Get schema for the given field and write schema. Field can be nested,
denoted by dot notation. e.g: a.b.c
+ * Use this method when record is not available. Otherwise, prefer to use
{@link #getNestedFieldSchemaFromRecord(GenericRecord, String)}
+ *
+ * @param writeSchema - write schema of the record
+ * @param fieldName - name of the field
+ * @return
+ */
+ public static Schema getNestedFieldSchemaFromWriteSchema(Schema writeSchema,
String fieldName) {
+ String[] parts = fieldName.split("\\.");
+ int i = 0;
+ for (; i < parts.length; i++) {
+ String part = parts[i];
+ Schema schema = writeSchema.getField(part).schema();
+
+ if (i == parts.length - 1) {
+ return resolveUnion(schema);
+ }
+ }
+ throw new HoodieException("Failed to get schema. Not a valid field name: "
+ fieldName);
+ }
+
+ /**
+ * Given a field schema, convert its value to native Java type.
+ *
+ * @param schema - field schema
+ * @param val - field value
+ * @return
+ */
+ public static Comparable<?> convertToNativeJavaType(Schema schema, Object
val) {
Review comment:
Please check other comments for context
##########
File path: hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
##########
@@ -511,6 +512,132 @@ public static Object getNestedFieldVal(GenericRecord
record, String fieldName, b
}
}
+ /**
+ * Get schema for the given field and record. Field can be nested, denoted
by dot notation. e.g: a.b.c
+ *
+ * @param record - record containing the value of the given field
+ * @param fieldName - name of the field
+ * @return
+ */
+ public static Schema getNestedFieldSchemaFromRecord(GenericRecord record,
String fieldName) {
+ String[] parts = fieldName.split("\\.");
+ GenericRecord valueNode = record;
+ int i = 0;
+ for (; i < parts.length; i++) {
+ String part = parts[i];
+ Object val = valueNode.get(part);
+
+ if (i == parts.length - 1) {
+ return resolveUnion(valueNode.getSchema().getField(part).schema());
+ } else {
+ if (!(val instanceof GenericRecord)) {
+ throw new HoodieException("Cannot find a record at part value :" +
part);
+ }
+ valueNode = (GenericRecord) val;
+ }
+ }
+ throw new HoodieException("Failed to get schema. Not a valid field name: "
+ fieldName);
+ }
+
+
+ /**
+ * Get schema for the given field and write schema. Field can be nested,
denoted by dot notation. e.g: a.b.c
+ * Use this method when record is not available. Otherwise, prefer to use
{@link #getNestedFieldSchemaFromRecord(GenericRecord, String)}
+ *
+ * @param writeSchema - write schema of the record
+ * @param fieldName - name of the field
+ * @return
+ */
+ public static Schema getNestedFieldSchemaFromWriteSchema(Schema writeSchema,
String fieldName) {
+ String[] parts = fieldName.split("\\.");
+ int i = 0;
+ for (; i < parts.length; i++) {
+ String part = parts[i];
+ Schema schema = writeSchema.getField(part).schema();
+
+ if (i == parts.length - 1) {
+ return resolveUnion(schema);
+ }
+ }
+ throw new HoodieException("Failed to get schema. Not a valid field name: "
+ fieldName);
+ }
+
+ /**
+ * Given a field schema, convert its value to native Java type.
+ *
+ * @param schema - field schema
+ * @param val - field value
+ * @return
+ */
+ public static Comparable<?> convertToNativeJavaType(Schema schema, Object
val) {
Review comment:
I don't think we need this method. Instead we should make sure at the
place where we collect min/max stats that they're proper Java objects, and
simply treat min/max as `Comparable`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]