codope commented on a change in pull request #4848:
URL: https://github.com/apache/hudi/pull/4848#discussion_r816955786
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
##########
@@ -320,7 +325,48 @@ private void updateWriteStatus(HoodieDeltaWriteStat stat,
AppendResult result) {
statuses.add(this.writeStatus);
}
- private void processAppendResult(AppendResult result) {
+ /**
+ * Compute column statistics for the records part of this append handle.
+ *
+ * @param filePath - Log file that records are part of
+ * @param recordList - List of records appended to the log for which
column statistics is needed for
+ * @param columnRangeMap - Output map to accumulate the column statistics
for the records
+ */
+ private void computeRecordsStats(final String filePath, List<IndexedRecord>
recordList,
+ Map<String,
HoodieColumnRangeMetadata<Comparable>> columnRangeMap) {
+ recordList.forEach(record -> accumulateColumnRanges(record,
writeSchemaWithMetaFields, filePath, columnRangeMap,
config.isConsistentLogicalTimestampEnabled()));
+ }
+
+ /**
+ * Accumulate column range statistics for the requested record.
+ *
+ * @param record - Record to get the column range statistics for
+ * @param schema - Schema for the record
+ * @param filePath - File that record belongs to
+ */
+ private static void accumulateColumnRanges(IndexedRecord record, Schema
schema, String filePath,
+ Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangeMap,
boolean consistentLogicalTimestampEnabled) {
+ if (!(record instanceof GenericRecord)) {
+ throw new HoodieIOException("Record is not a generic type to get column
range metadata!");
+ }
+ schema.getFields().forEach(field -> {
+ final String fieldVal =
HoodieAvroUtils.getNestedFieldValAsString((GenericRecord) record, field.name(),
true, consistentLogicalTimestampEnabled);
+ final int fieldSize = fieldVal == null ? 0 : fieldVal.length();
+ final HoodieColumnRangeMetadata<Comparable> fieldRange = new
HoodieColumnRangeMetadata<>(
Review comment:
Good point! Have changed the code accordingly.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]