This is an automated email from the ASF dual-hosted git repository. wgtmac pushed a commit to branch parquet-1.17.x in repository https://gitbox.apache.org/repos/asf/parquet-java.git
commit ef00c4634233c714b3fd8d82a09a6c7e58c4e8a3 Author: 0x26res <[email protected]> AuthorDate: Tue May 5 06:38:46 2026 +0100 GH-3112: Fix reading of proto Uint32Value (#3113) * Fix reading of proto Uint32Value * Add test for overflow * Apply spotless * Roll back change to original test * Better uint32 example --------- Co-authored-by: aandres3 <[email protected]> --- .../parquet/proto/ProtoMessageConverter.java | 6 +++ .../apache/parquet/proto/ProtoSchemaConverter.java | 2 +- .../apache/parquet/proto/ProtoWriteSupport.java | 2 +- .../parquet/proto/ProtoInputOutputFormatTest.java | 28 ++++++++++++ .../parquet/proto/ProtoSchemaConverterTest.java | 2 +- .../parquet/proto/ProtoWriteSupportTest.java | 51 +++++++++++++++------- 6 files changed, 72 insertions(+), 19 deletions(-) diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java index d446598f0..ff9e4ca33 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java @@ -763,6 +763,12 @@ class ProtoMessageConverter extends GroupConverter { this.parent = parent; } + @Override + public void addInt(int value) { + parent.add(UInt32Value.of(value)); + } + + // This is left for backward compatibility with the old implementation which used int64 for uint32 @Override public void addLong(long value) { parent.add(UInt32Value.of(Math.toIntExact(value))); diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java index 74bb4235a..ff27b263e 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java @@ -260,7 +260,7 @@ public class ProtoSchemaConverter { return builder.primitive(INT32, getRepetition(descriptor)); } if (messageType.equals(UInt32Value.getDescriptor())) { - return builder.primitive(INT64, getRepetition(descriptor)); + return builder.primitive(INT32, getRepetition(descriptor)); } if (messageType.equals(BytesValue.getDescriptor())) { return builder.primitive(BINARY, getRepetition(descriptor)); diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index 389e84284..51e2d7e25 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -766,7 +766,7 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport< class UInt32ValueWriter extends FieldWriter { @Override void writeRawValue(Object value) { - recordConsumer.addLong(((UInt32Value) value).getValue()); + recordConsumer.addInteger(((UInt32Value) value).getValue()); } } diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java index 57ad4d4f0..ca59d3db5 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java @@ -678,6 +678,34 @@ public class ProtoInputOutputFormatTest { assertEquals(msgNonEmpty, result.get(1)); } + @Test + public void testProto3Uint32Behaviour() throws Exception { + + TestProto3.SchemaConverterAllDatatypes intMin = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(Integer.MIN_VALUE) + .build(); + assertEquals(intMin.toString(), "optionalUInt32: 2147483648\n"); + TestProto3.SchemaConverterAllDatatypes uintMin = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(-1) + .build(); + assertEquals(uintMin.toString(), "optionalUInt32: 4294967295\n"); + TestProto3.SchemaConverterAllDatatypes uintMax = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(Integer.MAX_VALUE) + .build(); + assertEquals(uintMax.toString(), "optionalUInt32: 2147483647\n"); + + Configuration conf = new Configuration(); + Path outputPath = new WriteUsingMR(conf).write(intMin, uintMin, uintMax); + ReadUsingMR readUsingMR = new ReadUsingMR(conf); + String customClass = TestProto3.SchemaConverterAllDatatypes.class.getName(); + ProtoReadSupport.setProtobufClass(readUsingMR.getConfiguration(), customClass); + List<Message> result = readUsingMR.read(outputPath); + + assertEquals(result.get(0), intMin); + assertEquals(result.get(1), uintMin); + assertEquals(result.get(2), uintMax); + } + /** * Runs job that writes input to file and then job reading data back. */ diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java index 5240be5a3..e9f08f33f 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java @@ -400,7 +400,7 @@ public class ProtoSchemaConverterTest { + " optional int64 wrappedInt64 = 3;\n" + " optional int64 wrappedUInt64 = 4;\n" + " optional int32 wrappedInt32 = 5;\n" - + " optional int64 wrappedUInt32 = 6;\n" + + " optional int32 wrappedUInt32 = 6;\n" + " optional boolean wrappedBool = 7;\n" + " optional binary wrappedString (UTF8) = 8;\n" + " optional binary wrappedBytes = 9;\n" diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 360da8b74..e80524d14 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -22,22 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import com.google.protobuf.BoolValue; -import com.google.protobuf.ByteString; -import com.google.protobuf.BytesValue; -import com.google.protobuf.Descriptors; -import com.google.protobuf.DoubleValue; -import com.google.protobuf.DynamicMessage; -import com.google.protobuf.FloatValue; -import com.google.protobuf.Int32Value; -import com.google.protobuf.Int64Value; -import com.google.protobuf.Message; -import com.google.protobuf.MessageOrBuilder; -import com.google.protobuf.StringValue; -import com.google.protobuf.Timestamp; -import com.google.protobuf.UInt32Value; -import com.google.protobuf.UInt64Value; -import com.google.protobuf.Value; +import com.google.protobuf.*; import com.google.protobuf.util.Timestamps; import java.io.IOException; import java.time.LocalDate; @@ -1372,6 +1357,40 @@ public class ProtoWriteSupportTest { gotBackFirst.getWrappedBytes().getValue()); } + @Test + public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception { + + TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMin), "wrappedUInt32 { value: 2147483648 }"); + + TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMax), "wrappedUInt32 { value: 2147483647 }"); + + TestProto3.WrappedMessage msgMinusOne = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(-1)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMinusOne), "wrappedUInt32 { value: 4294967295 }"); + + Path tmpFilePath = TestUtils.someTemporaryFilePath(); + ParquetWriter<MessageOrBuilder> writer = ProtoParquetWriter.<MessageOrBuilder>builder(tmpFilePath) + .withMessage(TestProto3.WrappedMessage.class) + .config(ProtoWriteSupport.PB_UNWRAP_PROTO_WRAPPERS, "true") + .build(); + writer.write(msgMin); + writer.write(msgMax); + writer.write(msgMinusOne); + writer.close(); + List<TestProto3.WrappedMessage> gotBack = TestUtils.readMessages(tmpFilePath, TestProto3.WrappedMessage.class); + + assertEquals(msgMin, gotBack.get(0)); + assertEquals(msgMax, gotBack.get(1)); + assertEquals(msgMinusOne, gotBack.get(2)); + } + @Test public void testProto3WrappedMessageWithNullsRoundTrip() throws Exception { TestProto3.WrappedMessage.Builder msg = TestProto3.WrappedMessage.newBuilder();
