This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new bede4c22 Test and fix for bug in selectMapimpl (#726)
bede4c22 is described below

commit bede4c2242796026a8054d4057fe9d398b20be34
Author: wwarner-inf <[email protected]>
AuthorDate: Tue Mar 24 23:27:42 2026 -0400

    Test and fix for bug in selectMapimpl (#726)
    
    ### Rationale for this change
    
    Maps of Nullables produce corrupted parquet.
    
    ### What changes are included in this PR?
    
    The existing tests for `TakeKernelMap` are made runnable by correcting
    the test json to be suitable input for `array.FromJSON()`. Added a
    condition in `assertTakeArrays()` that checks that the data type has
    been preserved. Using the C++ implementation as a reference.
    
    ### Are these changes tested?
    Yes, but not deeply.
    
    ### Are there any user-facing changes?
    The api is unchanged, however parquet output will change slightly for
    those files containing maps with optional values.
---
 arrow/compute/selection.go             | 37 +++++++++-------------------------
 arrow/compute/vector_selection_test.go | 27 +++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/arrow/compute/selection.go b/arrow/compute/selection.go
index a225febd..7e2bd0e5 100644
--- a/arrow/compute/selection.go
+++ b/arrow/compute/selection.go
@@ -26,7 +26,6 @@ import (
        "github.com/apache/arrow-go/v18/arrow/array"
        "github.com/apache/arrow-go/v18/arrow/compute/exec"
        "github.com/apache/arrow-go/v18/arrow/compute/internal/kernels"
-       "github.com/apache/arrow-go/v18/arrow/memory"
        "golang.org/x/sync/errgroup"
 )
 
@@ -375,37 +374,19 @@ func selectMapImpl(fn exec.ArrayKernelExec) 
exec.ArrayKernelExec {
                childIndices := out.Children[0].MakeArray()
                defer childIndices.Release()
 
-               // Maps have a single struct child containing the keys and items
-               // We need to take from both the keys and items arrays
-               takenKeys, err := TakeArrayOpts(ctx.Ctx, values.Keys(), 
childIndices, kernels.TakeOptions{BoundsCheck: false})
-               if err != nil {
-                       return err
-               }
-               defer takenKeys.Release()
+               // Take the entire struct child as a unit, preserving all field 
metadata
+               // (including Nullable flags and validity bitmaps).  This 
matches the C++
+               // reference implementation which calls Take on 
typed_values.values() whole.
+               structChild := 
array.MakeFromData(values.Data().(*array.Data).Children()[0])
+               defer structChild.Release()
 
-               takenItems, err := TakeArrayOpts(ctx.Ctx, values.Items(), 
childIndices, kernels.TakeOptions{BoundsCheck: false})
+               takenStruct, err := TakeArrayOpts(ctx.Ctx, structChild, 
childIndices, kernels.TakeOptions{BoundsCheck: false})
                if err != nil {
                        return err
                }
-               defer takenItems.Release()
-
-               // Build the struct child array with the taken keys and items
-               // Maps have a single struct child with "key" and "value" fields
-               structType := arrow.StructOf(
-                       arrow.Field{Name: "key", Type: 
values.Keys().DataType()},
-                       arrow.Field{Name: "value", Type: 
values.Items().DataType()},
-               )
-
-               // Create struct data with taken keys and items as children
-               structData := array.NewData(
-                       structType,
-                       int(childIndices.Len()),
-                       []*memory.Buffer{nil},
-                       []arrow.ArrayData{takenKeys.Data(), takenItems.Data()},
-                       0, 0)
-               defer structData.Release()
-
-               out.Children[0].TakeOwnership(structData)
+               defer takenStruct.Release()
+
+               out.Children[0].TakeOwnership(takenStruct.Data())
                return nil
        }
 }
diff --git a/arrow/compute/vector_selection_test.go 
b/arrow/compute/vector_selection_test.go
index a8353fcc..62fcce08 100644
--- a/arrow/compute/vector_selection_test.go
+++ b/arrow/compute/vector_selection_test.go
@@ -1769,6 +1769,33 @@ func (tk *TakeKernelTestTable) TestTakeTable() {
        tk.assertChunkedTake(schm, tblJSON, []string{`[0, 1]`, `[2, 3]`}, 
tblJSON)
 }
 
+// TestMapPreservesValueNullable verifies that Take preserves the Map's struct
+// child type exactly.  arrow.MapOf marks the value field Nullable: true; a
+// buggy selectMapImpl reconstructs the struct child via arrow.StructOf field
+// literals whose Nullable defaults to false, silently dropping the flag.
+// The outer MapType is always preserved by the kernel framework, so the
+// mismatch must be checked on the struct child directly.
+func TestMapPreservesValueNullable(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+       defer mem.AssertSize(t, 0)
+       ctx := compute.WithAllocator(context.TODO(), mem)
+
+       dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)
+       values, _, _ := array.FromJSON(mem, dt, strings.NewReader(`[[{"key": 
"a", "value": 1}]]`), array.WithUseNumber())
+       defer values.Release()
+       indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, 
strings.NewReader(`[0]`))
+       defer indices.Release()
+
+       actual, err := compute.TakeArray(ctx, values, indices)
+       require.NoError(t, err)
+       defer actual.Release()
+
+       wantChildType := values.Data().Children()[0].DataType()
+       gotChildType := actual.Data().Children()[0].DataType()
+       assert.Truef(t, arrow.TypeEqual(wantChildType, gotChildType),
+               "Map struct child type not preserved:\nwant: %s\n got: %s", 
wantChildType, gotChildType)
+}
+
 func TestTakeKernels(t *testing.T) {
        suite.Run(t, new(TakeKernelTest))
        for _, dt := range numericTypes {

Reply via email to