cashmand commented on code in PR #3221:
URL: https://github.com/apache/parquet-java/pull/3221#discussion_r2130702144
##########
parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java:
##########
@@ -181,9 +184,58 @@ public void write(T record) {
}
private void writeRecord(GroupType schema, Schema avroSchema, Object record)
{
- recordConsumer.startGroup();
- writeRecordFields(schema, avroSchema, record);
- recordConsumer.endGroup();
+ if (schema.getLogicalTypeAnnotation() instanceof
LogicalTypeAnnotation.VariantLogicalTypeAnnotation) {
+ writeVariantFields(schema, avroSchema, record);
+ } else {
+ recordConsumer.startGroup();
+ writeRecordFields(schema, avroSchema, record);
+ recordConsumer.endGroup();
+ }
+ }
+
+ private void writeVariantFields(GroupType schema, Schema avroSchema, Object
record) {
+ List<Type> fields = schema.getFields();
+ List<Schema.Field> avroFields = avroSchema.getFields();
+ boolean binarySchema = true;
+ ByteBuffer metadata = null;
+ ByteBuffer value = null;
+ // Extract the value and metadata binary.
+ for (int index = 0; index < avroFields.size(); index++) {
+ Schema.Field avroField = avroFields.get(index);
+ Schema fieldSchema = AvroSchemaConverter.getNonNull(avroField.schema());
+ if (!fieldSchema.getType().equals(Schema.Type.BYTES)) {
+ binarySchema = false;
+ break;
+ }
+ Type fieldType = fields.get(index);
+ if (fieldType.getName() == "value") {
+ Object valueObj = model.getField(record, avroField.name(), index);
+ if (valueObj instanceof byte[]) {
+ value = ByteBuffer.wrap((byte[]) valueObj);
+ } else {
+ value = (ByteBuffer) valueObj;
+ }
+ } else if (fieldType.getName() == "metadata") {
+ Object metadataObj = model.getField(record, avroField.name(), index);
+ if (metadataObj instanceof byte[]) {
+ metadata = ByteBuffer.wrap((byte[]) metadataObj);
+ } else {
+ metadata = (ByteBuffer) metadataObj;
+ }
+ } else {
+ binarySchema = false;
+ break;
+ }
+ }
+
+ if (binarySchema) {
+ VariantValueWriter.write(recordConsumer, schema, new Variant(value,
metadata));
+ } else {
+ // If the schema was something other than value and metaadata, treat the
value as a non-variant record.
Review Comment:
Sounds good. My thought was that someone might want to write data that has
already been shredded - i.e. annotate the Parquet type as Variant, but directly
write using an Avro record that already matches the shredded schema. But if
there's a real use case for that, and no other workaround, it can always be
added later.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]