This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new bede4c22 Test and fix for bug in selectMapimpl (#726)
bede4c22 is described below
commit bede4c2242796026a8054d4057fe9d398b20be34
Author: wwarner-inf <[email protected]>
AuthorDate: Tue Mar 24 23:27:42 2026 -0400
Test and fix for bug in selectMapimpl (#726)
### Rationale for this change
Maps of Nullables produce corrupted parquet.
### What changes are included in this PR?
The existing tests for `TakeKernelMap` are made runnable by correcting
the test json to be suitable input for `array.FromJSON()`. Added a
condition in `assertTakeArrays()` that checks that the data type has
been preserved. Using the C++ implementation as a reference.
### Are these changes tested?
Yes, but not deeply.
### Are there any user-facing changes?
The api is unchanged, however parquet output will change slightly for
those files containing maps with optional values.
---
arrow/compute/selection.go | 37 +++++++++-------------------------
arrow/compute/vector_selection_test.go | 27 +++++++++++++++++++++++++
2 files changed, 36 insertions(+), 28 deletions(-)
diff --git a/arrow/compute/selection.go b/arrow/compute/selection.go
index a225febd..7e2bd0e5 100644
--- a/arrow/compute/selection.go
+++ b/arrow/compute/selection.go
@@ -26,7 +26,6 @@ import (
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/compute/exec"
"github.com/apache/arrow-go/v18/arrow/compute/internal/kernels"
- "github.com/apache/arrow-go/v18/arrow/memory"
"golang.org/x/sync/errgroup"
)
@@ -375,37 +374,19 @@ func selectMapImpl(fn exec.ArrayKernelExec)
exec.ArrayKernelExec {
childIndices := out.Children[0].MakeArray()
defer childIndices.Release()
- // Maps have a single struct child containing the keys and items
- // We need to take from both the keys and items arrays
- takenKeys, err := TakeArrayOpts(ctx.Ctx, values.Keys(),
childIndices, kernels.TakeOptions{BoundsCheck: false})
- if err != nil {
- return err
- }
- defer takenKeys.Release()
+ // Take the entire struct child as a unit, preserving all field
metadata
+ // (including Nullable flags and validity bitmaps). This
matches the C++
+ // reference implementation which calls Take on
typed_values.values() whole.
+ structChild :=
array.MakeFromData(values.Data().(*array.Data).Children()[0])
+ defer structChild.Release()
- takenItems, err := TakeArrayOpts(ctx.Ctx, values.Items(),
childIndices, kernels.TakeOptions{BoundsCheck: false})
+ takenStruct, err := TakeArrayOpts(ctx.Ctx, structChild,
childIndices, kernels.TakeOptions{BoundsCheck: false})
if err != nil {
return err
}
- defer takenItems.Release()
-
- // Build the struct child array with the taken keys and items
- // Maps have a single struct child with "key" and "value" fields
- structType := arrow.StructOf(
- arrow.Field{Name: "key", Type:
values.Keys().DataType()},
- arrow.Field{Name: "value", Type:
values.Items().DataType()},
- )
-
- // Create struct data with taken keys and items as children
- structData := array.NewData(
- structType,
- int(childIndices.Len()),
- []*memory.Buffer{nil},
- []arrow.ArrayData{takenKeys.Data(), takenItems.Data()},
- 0, 0)
- defer structData.Release()
-
- out.Children[0].TakeOwnership(structData)
+ defer takenStruct.Release()
+
+ out.Children[0].TakeOwnership(takenStruct.Data())
return nil
}
}
diff --git a/arrow/compute/vector_selection_test.go
b/arrow/compute/vector_selection_test.go
index a8353fcc..62fcce08 100644
--- a/arrow/compute/vector_selection_test.go
+++ b/arrow/compute/vector_selection_test.go
@@ -1769,6 +1769,33 @@ func (tk *TakeKernelTestTable) TestTakeTable() {
tk.assertChunkedTake(schm, tblJSON, []string{`[0, 1]`, `[2, 3]`},
tblJSON)
}
+// TestMapPreservesValueNullable verifies that Take preserves the Map's struct
+// child type exactly. arrow.MapOf marks the value field Nullable: true; a
+// buggy selectMapImpl reconstructs the struct child via arrow.StructOf field
+// literals whose Nullable defaults to false, silently dropping the flag.
+// The outer MapType is always preserved by the kernel framework, so the
+// mismatch must be checked on the struct child directly.
+func TestMapPreservesValueNullable(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+ ctx := compute.WithAllocator(context.TODO(), mem)
+
+ dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)
+ values, _, _ := array.FromJSON(mem, dt, strings.NewReader(`[[{"key":
"a", "value": 1}]]`), array.WithUseNumber())
+ defer values.Release()
+ indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8,
strings.NewReader(`[0]`))
+ defer indices.Release()
+
+ actual, err := compute.TakeArray(ctx, values, indices)
+ require.NoError(t, err)
+ defer actual.Release()
+
+ wantChildType := values.Data().Children()[0].DataType()
+ gotChildType := actual.Data().Children()[0].DataType()
+ assert.Truef(t, arrow.TypeEqual(wantChildType, gotChildType),
+ "Map struct child type not preserved:\nwant: %s\n got: %s",
wantChildType, gotChildType)
+}
+
func TestTakeKernels(t *testing.T) {
suite.Run(t, new(TakeKernelTest))
for _, dt := range numericTypes {