alamb commented on code in PR #90:
URL: https://github.com/apache/parquet-testing/pull/90#discussion_r2257912290


##########
shredded_variant/cases.json:
##########
@@ -0,0 +1,819 @@
+[ {
+  "case_number" : 1,
+  "test" : "testSimpleArray",
+  "parquet_file" : "case-001.parquet",
+  "variant_file" : "case-001_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=VariantArray([Variant(type=STRING, value=comedy), Variant(type=STRING, 
value=drama)]))"
+}, {
+  "case_number" : 2,
+  "test" : "testEmptyArray",
+  "parquet_file" : "case-002.parquet",
+  "variant_file" : "case-002_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=VariantArray([]))"
+}, {
+  "case_number" : 3
+}, {
+  "case_number" : 4,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-004.parquet",
+  "variant_file" : "case-004_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=BOOLEAN_TRUE, value=true))"
+}, {
+  "case_number" : 5,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-005.parquet",
+  "variant_file" : "case-005_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=BOOLEAN_FALSE, value=false))"
+}, {
+  "case_number" : 6,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-006.parquet",
+  "variant_file" : "case-006_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT8, value=34))"
+}, {
+  "case_number" : 7,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-007.parquet",
+  "variant_file" : "case-007_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT8, value=-34))"
+}, {
+  "case_number" : 8,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-008.parquet",
+  "variant_file" : "case-008_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT16, value=1234))"
+}, {
+  "case_number" : 9,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-009.parquet",
+  "variant_file" : "case-009_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT16, value=-1234))"
+}, {
+  "case_number" : 10,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-010.parquet",
+  "variant_file" : "case-010_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT32, value=12345))"
+}, {
+  "case_number" : 11,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-011.parquet",
+  "variant_file" : "case-011_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT32, value=-12345))"
+}, {
+  "case_number" : 12,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-012.parquet",
+  "variant_file" : "case-012_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT64, value=9876543210))"
+}, {
+  "case_number" : 13,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-013.parquet",
+  "variant_file" : "case-013_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT64, value=-9876543210))"
+}, {
+  "case_number" : 14,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-014.parquet",
+  "variant_file" : "case-014_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=FLOAT, value=10.11))"
+}, {
+  "case_number" : 15,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-015.parquet",
+  "variant_file" : "case-015_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=FLOAT, value=-10.11))"
+}, {
+  "case_number" : 16,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-016.parquet",
+  "variant_file" : "case-016_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DOUBLE, value=14.3))"
+}, {
+  "case_number" : 17,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-017.parquet",
+  "variant_file" : "case-017_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DOUBLE, value=-14.3))"
+}, {
+  "case_number" : 18,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-018.parquet",
+  "variant_file" : "case-018_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DATE, value=2024-11-07))"
+}, {
+  "case_number" : 19,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-019.parquet",
+  "variant_file" : "case-019_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DATE, value=1957-11-07))"
+}, {
+  "case_number" : 20,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-020.parquet",
+  "variant_file" : "case-020_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ, value=2024-11-07T12:33:54.123456+00:00))"
+}, {
+  "case_number" : 21,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-021.parquet",
+  "variant_file" : "case-021_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ, value=1957-11-07T12:33:54.123456+00:00))"
+}, {
+  "case_number" : 22,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-022.parquet",
+  "variant_file" : "case-022_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ, value=2024-11-07T12:33:54.123456))"
+}, {
+  "case_number" : 23,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-023.parquet",
+  "variant_file" : "case-023_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ, value=1957-11-07T12:33:54.123456))"
+}, {
+  "case_number" : 24,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-024.parquet",
+  "variant_file" : "case-024_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL4, value=123456.7890))"
+}, {
+  "case_number" : 25,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-025.parquet",
+  "variant_file" : "case-025_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL4, value=-123456.7890))"
+}, {
+  "case_number" : 26,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-026.parquet",
+  "variant_file" : "case-026_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL8, value=1234567890.987654321))"
+}, {
+  "case_number" : 27,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-027.parquet",
+  "variant_file" : "case-027_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL8, value=-1234567890.987654321))"
+}, {
+  "case_number" : 28,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-028.parquet",
+  "variant_file" : "case-028_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL16, value=9876543210.123456789))"
+}, {
+  "case_number" : 29,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-029.parquet",
+  "variant_file" : "case-029_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL16, value=-9876543210.123456789))"
+}, {
+  "case_number" : 30,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-030.parquet",
+  "variant_file" : "case-030_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=BINARY, value=0A0B0C0D))"
+}, {
+  "case_number" : 31,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-031.parquet",
+  "variant_file" : "case-031_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=STRING, value=iceberg))"
+}, {
+  "case_number" : 32,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-032.parquet",
+  "variant_file" : "case-032_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIME, value=12:33:54.123456))"
+}, {
+  "case_number" : 33,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-033.parquet",
+  "variant_file" : "case-033_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ_NANOS, 
value=2024-11-07T12:33:54.123456789+00:00))"
+}, {
+  "case_number" : 34,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-034.parquet",
+  "variant_file" : "case-034_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ_NANOS, 
value=1957-11-07T12:33:54.123456789+00:00))"
+}, {
+  "case_number" : 35,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-035.parquet",
+  "variant_file" : "case-035_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ_NANOS, value=2024-11-07T12:33:54.123456789))"
+}, {
+  "case_number" : 36,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-036.parquet",
+  "variant_file" : "case-036_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ_NANOS, value=1957-11-07T12:33:54.123456789))"
+}, {
+  "case_number" : 37,
+  "test" : "testShreddedVariantPrimitives",
+  "parquet_file" : "case-037.parquet",
+  "variant_file" : "case-037_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=UUID, value=f24f9b64-81fa-49d1-b74e-8c09a6e31c56))"
+}, {
+  "case_number" : 38,
+  "test" : "testShreddedObjectMissingTypedValue",
+  "parquet_file" : "case-038.parquet",
+  "variant_file" : "case-038_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 
3 => d, 4 => e}), value=VariantObject(fields={b: Variant(type=STRING, 
value=iceberg)}))"
+}, {
+  "case_number" : 39,
+  "test" : "testNonObjectWithNullShreddedFields",
+  "parquet_file" : "case-039.parquet",
+  "variant_file" : "case-039_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 
3 => d, 4 => e}), value=Variant(type=INT32, value=34))"
+}, {
+  "case_number" : 40,
+  "test" : "testArrayWithElementValueTypedValueConflict",
+  "parquet_file" : "case-040.parquet",
+  "error_message" : "Invalid variant, conflicting value and typed_value"
+}, {
+  "case_number" : 41,
+  "test" : "testArrayMissingValueColumn",
+  "parquet_file" : "case-041.parquet",
+  "variant_file" : "case-041_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=VariantArray([Variant(type=STRING, value=comedy), Variant(type=STRING, 
value=drama)]))"
+}, {
+  "case_number" : 42,
+  "test" : "testValueAndTypedValueConflict",
+  "parquet_file" : "case-042.parquet",
+  "error_message" : "Invalid variant, conflicting value and typed_value"
+}, {
+  "case_number" : 43,
+  "test" : "testPartiallyShreddedObjectMissingFieldConflict",
+  "parquet_file" : "case-043.parquet",
+  "variant_file" : "case-043_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 
3 => d, 4 => e}), value=VariantObject(fields={a: Variant(type=NULL, 
value=null)}))"
+}, {
+  "case_number" : 44,
+  "test" : "testShreddedObjectWithinShreddedObject",
+  "parquet_file" : "case-044.parquet",
+  "variant_file" : "case-044_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 
3 => d, 4 => e}), value=VariantObject(fields={c: VariantObject(fields={a: 
Variant(type=INT32, value=34), b: Variant(type=STRING, value=iceberg)}), d: 
Variant(type=DOUBLE, value=-0.0)}))"
+}, {
+  "case_number" : 45,
+  "test" : "testArrayWithNestedObject",
+  "parquet_file" : "case-045.parquet",
+  "variant_files" : [ "case-045_row-0.variant.bin", 
"case-045_row-1.variant.bin", "case-045_row-2.variant.bin", 
"case-045_row-3.variant.bin" ],
+  "variants" : "[Variant(metadata=VariantMetadata(dict={}), 
value=VariantArray([Variant(type=STRING, value=comedy), Variant(type=STRING, 
value=drama)])), Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT32, value=34)), Variant(metadata=VariantMetadata(dict={0 
=> a, 1 => b, 2 => c, 3 => d, 4 => e}), value=VariantObject(fields={a: 
Variant(type=NULL, value=null), d: Variant(type=STRING, value=iceberg)})), 
Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 3 => d, 4 => 
e}), value=VariantArray([Variant(type=STRING, value=action), 
Variant(type=STRING, value=horror)]))]"
+}, {
+  "case_number" : 46,
+  "test" : "testShreddedObject",
+  "parquet_file" : "case-046.parquet",
+  "variant_file" : "case-046_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 
3 => d, 4 => e}), value=VariantObject(fields={a: Variant(type=NULL, 
value=null), b: Variant(type=STRING, value=)}))"
+}, {
+  "case_number" : 47,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-047.parquet",
+  "variant_file" : "case-047_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=NULL, value=null))"
+}, {
+  "case_number" : 48,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-048.parquet",
+  "variant_file" : "case-048_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=BOOLEAN_TRUE, value=true))"
+}, {
+  "case_number" : 49,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-049.parquet",
+  "variant_file" : "case-049_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=BOOLEAN_FALSE, value=false))"
+}, {
+  "case_number" : 50,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-050.parquet",
+  "variant_file" : "case-050_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT8, value=34))"
+}, {
+  "case_number" : 51,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-051.parquet",
+  "variant_file" : "case-051_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT8, value=-34))"
+}, {
+  "case_number" : 52,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-052.parquet",
+  "variant_file" : "case-052_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT16, value=1234))"
+}, {
+  "case_number" : 53,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-053.parquet",
+  "variant_file" : "case-053_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT16, value=-1234))"
+}, {
+  "case_number" : 54,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-054.parquet",
+  "variant_file" : "case-054_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT32, value=12345))"
+}, {
+  "case_number" : 55,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-055.parquet",
+  "variant_file" : "case-055_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT32, value=-12345))"
+}, {
+  "case_number" : 56,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-056.parquet",
+  "variant_file" : "case-056_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT64, value=9876543210))"
+}, {
+  "case_number" : 57,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-057.parquet",
+  "variant_file" : "case-057_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=INT64, value=-9876543210))"
+}, {
+  "case_number" : 58,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-058.parquet",
+  "variant_file" : "case-058_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=FLOAT, value=10.11))"
+}, {
+  "case_number" : 59,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-059.parquet",
+  "variant_file" : "case-059_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=FLOAT, value=-10.11))"
+}, {
+  "case_number" : 60,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-060.parquet",
+  "variant_file" : "case-060_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DOUBLE, value=14.3))"
+}, {
+  "case_number" : 61,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-061.parquet",
+  "variant_file" : "case-061_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DOUBLE, value=-14.3))"
+}, {
+  "case_number" : 62,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-062.parquet",
+  "variant_file" : "case-062_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DATE, value=2024-11-07))"
+}, {
+  "case_number" : 63,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-063.parquet",
+  "variant_file" : "case-063_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DATE, value=1957-11-07))"
+}, {
+  "case_number" : 64,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-064.parquet",
+  "variant_file" : "case-064_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ, value=2024-11-07T12:33:54.123456+00:00))"
+}, {
+  "case_number" : 65,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-065.parquet",
+  "variant_file" : "case-065_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ, value=1957-11-07T12:33:54.123456+00:00))"
+}, {
+  "case_number" : 66,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-066.parquet",
+  "variant_file" : "case-066_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ, value=2024-11-07T12:33:54.123456))"
+}, {
+  "case_number" : 67,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-067.parquet",
+  "variant_file" : "case-067_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ, value=1957-11-07T12:33:54.123456))"
+}, {
+  "case_number" : 68,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-068.parquet",
+  "variant_file" : "case-068_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL4, value=123456.7890))"
+}, {
+  "case_number" : 69,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-069.parquet",
+  "variant_file" : "case-069_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL4, value=-123456.7890))"
+}, {
+  "case_number" : 70,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-070.parquet",
+  "variant_file" : "case-070_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL8, value=1234567890.987654321))"
+}, {
+  "case_number" : 71,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-071.parquet",
+  "variant_file" : "case-071_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL8, value=-1234567890.987654321))"
+}, {
+  "case_number" : 72,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-072.parquet",
+  "variant_file" : "case-072_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL16, value=9876543210.123456789))"
+}, {
+  "case_number" : 73,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-073.parquet",
+  "variant_file" : "case-073_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=DECIMAL16, value=-9876543210.123456789))"
+}, {
+  "case_number" : 74,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-074.parquet",
+  "variant_file" : "case-074_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=BINARY, value=0A0B0C0D))"
+}, {
+  "case_number" : 75,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-075.parquet",
+  "variant_file" : "case-075_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=STRING, value=iceberg))"
+}, {
+  "case_number" : 76,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-076.parquet",
+  "variant_file" : "case-076_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIME, value=12:33:54.123456))"
+}, {
+  "case_number" : 77,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-077.parquet",
+  "variant_file" : "case-077_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ_NANOS, 
value=2024-11-07T12:33:54.123456789+00:00))"
+}, {
+  "case_number" : 78,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-078.parquet",
+  "variant_file" : "case-078_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPTZ_NANOS, 
value=1957-11-07T12:33:54.123456789+00:00))"
+}, {
+  "case_number" : 79,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-079.parquet",
+  "variant_file" : "case-079_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ_NANOS, value=2024-11-07T12:33:54.123456789))"
+}, {
+  "case_number" : 80,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-080.parquet",
+  "variant_file" : "case-080_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=TIMESTAMPNTZ_NANOS, value=1957-11-07T12:33:54.123456789))"
+}, {
+  "case_number" : 81,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-081.parquet",
+  "variant_file" : "case-081_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={}), 
value=Variant(type=UUID, value=f24f9b64-81fa-49d1-b74e-8c09a6e31c56))"
+}, {
+  "case_number" : 82,
+  "test" : "testUnshreddedVariants",
+  "parquet_file" : "case-082.parquet",
+  "variant_file" : "case-082_row-0.variant.bin",
+  "variant" : "Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 
3 => d, 4 => e}), value=VariantObject(fields={a: Variant(type=NULL, 
value=null), d: Variant(type=STRING, value=iceberg)}))"
+}, {
+  "case_number" : 83,
+  "test" : "testMixedRecords",
+  "parquet_file" : "case-083.parquet",
+  "variant_files" : [ null, "case-083_row-1.variant.bin", 
"case-083_row-2.variant.bin", "case-083_row-3.variant.bin" ],
+  "variants" : "[null, Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 
2 => c, 3 => d, 4 => e}), value=VariantObject(fields={c: 
VariantObject(fields={b: Variant(type=STRING, value=iceberg)})})), 
Variant(metadata=VariantMetadata(dict={0 => a, 1 => b, 2 => c, 3 => d, 4 => 
e}), value=VariantObject(fields={c: Variant(type=INT8, value=8), d: 
Variant(type=DOUBLE, value=-0.0)})), Variant(metadata=VariantMetadata(dict={0 
=> a, 1 => b, 2 => c, 3 => d, 4 => e}), value=VariantObject(fields={c: 
VariantObject(fields={a: Variant(type=INT32, value=34), b: Variant(type=STRING, 
value=)}), d: Variant(type=DOUBLE, value=0.0)}))]"
+}, {
+  "case_number" : 84,

Review Comment:
   It came up on the PR with the go integration test 
   - https://github.com/apache/arrow-go/pull/455
   
   
   > Test case 84, testShreddedObjectWithOptionalFieldStructs tests the 
schenario where the shredded fields of an object are listed as optional in the 
schema, but the spec states that they must be required. Thus, the Go 
implementation errors on this test as the spec says this is an error. 
Clarification is needed on if this is a valid test case.
   
   
   I think this case doesn't have an error_message because it was created wby 
iceberg which chose (which is allowed per the spec) to still read the invalid 
data
   
   @rdblue says in 
https://github.com/apache/arrow-go/pull/455#issuecomment-3152748447: 
   
   > They are not allowed by the spec. The implementation I generated these 
cases from is defensive and tries to read if it can rather than producing 
errors. I'd recommend doing the same thing to handle outside-of-spec cases.
   
   Thus, I suggest we resolve the confusion by updating these tests in this PR 
to make it clearer that they are not valid. For example, @julienledem suggested 
naming such invalid files as `case-084-INVALID.parquet` 
   
   ANother option might be to add a notes field, something like:
   ```json
     "notes": "This parquet file is not valid according to the spec and 
implementations can choose to error, or read the non shredded value",
   ```
   cc @aihuaxu @RussellSpitzer 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to