manojpec commented on a change in pull request #4352: URL: https://github.com/apache/hudi/pull/4352#discussion_r793425071
########## File path: hudi-common/src/main/avro/HoodieMetadata.avsc ########## @@ -30,27 +30,108 @@ "doc": "Type of the metadata record", "type": "int" }, - { "name": "filesystemMetadata", + { "doc": "Contains information about partitions and files within the dataset", - "type": ["null", { - "type": "map", - "values": { + "name": "filesystemMetadata", + "type": [ + "null", + { + "type": "map", + "values": { + "type": "record", + "name": "HoodieMetadataFileInfo", + "fields": [ + { + "name": "size", + "type": "long", + "doc": "Size of the file" + }, + { + "name": "isDeleted", + "type": "boolean", + "doc": "True if this file has been deleted" + } + ] + } + } + ] + }, + { + "doc": "Metadata Index of bloom filters for all data files in the user table", + "name": "BloomFilterMetadata", + "type": [ + "null", + { + "doc": "Data file bloom filter details", + "name": "HoodieMetadataBloomFilter", "type": "record", - "name": "HoodieMetadataFileInfo", "fields": [ { - "name": "size", - "type": "long", - "doc": "Size of the file" + "doc": "Bloom filter type code", + "name": "type", + "type": "string" + }, + { + "doc": "Instant timestamp when this metadata was created/updated", + "name": "timestamp", + "type": "string" + }, + { + "doc": "Bloom filter binary byte array", + "name": "bloomFilter", + "type": "bytes" + }, + { + "doc": "Bloom filter entry valid/deleted flag", + "name": "isDeleted", + "type": "boolean" + } + ] + } + ] + }, + { + "doc": "Metadata Index of column ranges for all data files in the user table", + "name": "ColumnStatsMetadata", + "type": [ + "null", + { + "doc": "Data file column ranges details", + "name": "HoodieColumnStats", + "type": "record", + "fields": [ + { + "doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type", + "name": "minValue", + "type": [ + "null", + "string" + ] + }, + { + "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type", + "name": "maxValue", + "type": [ + "null", + "string" + ] + }, + { + "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type", + "name": "nullCount", Review comment: This will be used for the spark datasource query integration. Not used for the index lookup. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org