This is an automated email from the ASF dual-hosted git repository.

wgtmac pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-testing.git


The following commit(s) were added to refs/heads/master by this push:
     new fa255df  Correct `fixed_length_byte_array.parquet` (#111)
fa255df is described below

commit fa255dfacf58c8bab428b5d0117d188acc8ad03f
Author: ccleva <[email protected]>
AuthorDate: Wed May 20 11:54:31 2026 +0200

    Correct `fixed_length_byte_array.parquet` (#111)
    
    * Change fixed_length_byte_array.parquet flba_field to optional
    
    * Add previous fixed_length_byte_array.parquet file to bad_data
---
 .../ARROW-GH-47662.parquet                         | Bin
 bad_data/README.md                                 |   2 ++
 data/fixed_length_byte_array.md                    |  28 ++++++++++-----------
 data/fixed_length_byte_array.parquet               | Bin 4335 -> 4437 bytes
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/data/fixed_length_byte_array.parquet 
b/bad_data/ARROW-GH-47662.parquet
similarity index 100%
copy from data/fixed_length_byte_array.parquet
copy to bad_data/ARROW-GH-47662.parquet
diff --git a/bad_data/README.md b/bad_data/README.md
index bb12b6f..4fbc0c4 100644
--- a/bad_data/README.md
+++ b/bad_data/README.md
@@ -33,3 +33,5 @@ These are files used for reproducing various bugs that have 
been reported.
 * ARROW-GH-43605.parquet: dictionary index page uses rle encoding but 0 as rle 
bit-width.
 * ARROW-GH-45185.parquet: test case of 
https://github.com/apache/arrow/issues/45185
   where repetition levels start with a 1 instead of 0.
+* ARROW-GH-47662.parquet: test case identified in 
https://github.com/apache/arrow/issues/47662
+  where a required column contains null values (an incorrect version of 
data/fixed_length_byte_array.parquet).
diff --git a/data/fixed_length_byte_array.md b/data/fixed_length_byte_array.md
index a0d98ac..c255753 100644
--- a/data/fixed_length_byte_array.md
+++ b/data/fixed_length_byte_array.md
@@ -31,14 +31,14 @@ Properties:
   writer.model.name: example
 Schema:
 message schema {
-  required fixed_len_byte_array(4) flba_field;
+  optional fixed_len_byte_array(4) flba_field;
 }
 
 
-Row group 0:  count: 1000  3.84 B records  start: 4  total(compressed): 3.749 
kB total(uncompressed):3.749 kB
+Row group 0:  count: 1000  3,94 B records  start: 4  total(compressed): 3,848 
kB total(uncompressed):3,848 kB 
 
--------------------------------------------------------------------------------
             type      encodings count     avg size   nulls   min / max
-flba_field  FIXED[4] _   _     1000      3.84 B   105     "0x00000001" / 
"0x000003E8"
+flba_field  FIXED[4] _   _     1000      3,94 B   105     "0x00000001" / 
"0x000003E8"
 ```
 
 # Column Index (from parquet-cli column-index command)
@@ -59,15 +59,15 @@ page-8                         9  0x00000065                
                0x00
 page-9                         6  0x00000001                                
0x00000064
 
 offset index for column flba_field:
-                          offset   compressed size       first row index
-page-0                         4               390                     0
-page-1                       394               390                   100
-page-2                       784               350                   200
-page-3                      1134               386                   300
-page-4                      1520               373                   400
-page-5                      1893               382                   500
-page-6                      2275               382                   600
-page-7                      2657               394                   700
-page-8                      3051               390                   800
-page-9                      3441               402                   900
+                          offset       compressed size       first row index   
    unencoded bytes
+page-0                         4                   400                     0   
                  -
+page-1                       404                   400                   100   
                  -
+page-2                       804                   361                   200   
                  -
+page-3                      1165                   396                   300   
                  -
+page-4                      1561                   384                   400   
                  -
+page-5                      1945                   392                   500   
                  -
+page-6                      2337                   392                   600   
                  -
+page-7                      2729                   404                   700   
                  -
+page-8                      3133                   400                   800   
                  -
+page-9                      3533                   411                   900   
                  -
 ```
diff --git a/data/fixed_length_byte_array.parquet 
b/data/fixed_length_byte_array.parquet
index e86a886..240d002 100644
Binary files a/data/fixed_length_byte_array.parquet and 
b/data/fixed_length_byte_array.parquet differ

Reply via email to