This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new f3677ae3 ci: fix tinygo build with maphash (#734)
f3677ae3 is described below
commit f3677ae347146da54a11eda3666b680b70d20c8f
Author: Matt Topol <[email protected]>
AuthorDate: Fri Mar 27 16:17:13 2026 -0400
ci: fix tinygo build with maphash (#734)
### Rationale for this change
tinygo doesn't meet the `hash/maphash` abi properly and so doesn't build
correctly with go1.25
### What changes are included in this PR?
Create an implementation that works for tinygo and convert the rest of
the code to point to a conditionally compiled maphash which either uses
the stdlib or an implementation that works on tinygo depending on what
is being built.
---
.github/workflows/test.yml | 2 +-
arrow/array/data.go | 5 +-
arrow/compute/exec/kernel.go | 7 +-
arrow/compute/expression.go | 5 +-
arrow/compute/exprs/types.go | 3 +-
arrow/compute/fieldref.go | 13 ++--
arrow/compute/fieldref_hash.go | 5 +-
arrow/datatype.go | 5 +-
arrow/internal/dictutils/dict.go | 5 +-
arrow/scalar/scalar.go | 5 +-
arrow/scalar/scalar_test.go | 3 +-
.../utils/maphash/maphash.go | 25 ++-----
internal/utils/maphash/maphash_tinygo.go | 79 ++++++++++++++++++++++
13 files changed, 120 insertions(+), 42 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d4020a50..d8dbb174 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -330,7 +330,7 @@ jobs:
name: TinyGo
runs-on: ubuntu-latest
env:
- TINYGO_VERSION: 0.38.0
+ TINYGO_VERSION: 0.40.1
timeout-minutes: 20
steps:
- name: Checkout
diff --git a/arrow/array/data.go b/arrow/array/data.go
index 6dafd8a9..0f017032 100644
--- a/arrow/array/data.go
+++ b/arrow/array/data.go
@@ -17,11 +17,12 @@
package array
import (
- "hash/maphash"
"math/bits"
"sync/atomic"
"unsafe"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/internal/debug"
"github.com/apache/arrow-go/v18/arrow/memory"
@@ -266,7 +267,7 @@ func NewSliceData(data arrow.ArrayData, i, j int64)
arrow.ArrayData {
return o
}
-func Hash(h *maphash.Hash, data arrow.ArrayData) {
+func Hash(h *maphash.MapHash, data arrow.ArrayData) {
a := data.(*Data)
h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:])
diff --git a/arrow/compute/exec/kernel.go b/arrow/compute/exec/kernel.go
index 108c7629..377cf6de 100644
--- a/arrow/compute/exec/kernel.go
+++ b/arrow/compute/exec/kernel.go
@@ -21,10 +21,11 @@ package exec
import (
"context"
"fmt"
- "hash/maphash"
"slices"
"strings"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/bitutil"
"github.com/apache/arrow-go/v18/arrow/internal/debug"
@@ -381,7 +382,7 @@ func (it *InputType) Equals(other *InputType) bool {
}
func (it InputType) Hash() uint64 {
- var h maphash.Hash
+ var h maphash.MapHash
h.SetSeed(hashSeed)
result := HashCombine(h.Sum64(), uint64(it.Kind))
@@ -569,7 +570,7 @@ func (k *KernelSignature) Hash() uint64 {
return k.hashCode
}
- var h maphash.Hash
+ var h maphash.MapHash
h.SetSeed(hashSeed)
result := h.Sum64()
for _, typ := range k.InputTypes {
diff --git a/arrow/compute/expression.go b/arrow/compute/expression.go
index 6a494cef..18dcaacb 100644
--- a/arrow/compute/expression.go
+++ b/arrow/compute/expression.go
@@ -23,11 +23,12 @@ import (
"encoding/hex"
"errors"
"fmt"
- "hash/maphash"
"reflect"
"strconv"
"strings"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/compute/exec"
@@ -331,7 +332,7 @@ func (c *Call) Hash() uint64 {
return c.cachedHash
}
- var h maphash.Hash
+ var h maphash.MapHash
h.SetSeed(hashSeed)
h.WriteString(c.funcName)
diff --git a/arrow/compute/exprs/types.go b/arrow/compute/exprs/types.go
index 58fa1e94..5f672c00 100644
--- a/arrow/compute/exprs/types.go
+++ b/arrow/compute/exprs/types.go
@@ -20,10 +20,11 @@ package exprs
import (
"fmt"
- "hash/maphash"
"strconv"
"strings"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/compute"
"github.com/apache/arrow-go/v18/arrow/scalar"
diff --git a/arrow/compute/fieldref.go b/arrow/compute/fieldref.go
index 92786bf8..9400cc0f 100644
--- a/arrow/compute/fieldref.go
+++ b/arrow/compute/fieldref.go
@@ -19,12 +19,13 @@ package compute
import (
"errors"
"fmt"
- "hash/maphash"
"reflect"
"strconv"
"strings"
"unicode"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
)
@@ -191,7 +192,7 @@ func (ref nameRef) findAll(fields []arrow.Field)
[]FieldPath {
return out
}
-func (ref nameRef) hash(h *maphash.Hash) { h.WriteString(string(ref)) }
+func (ref nameRef) hash(h *maphash.MapHash) { h.WriteString(string(ref)) }
type matches struct {
prefixes []FieldPath
@@ -223,7 +224,7 @@ func (r refList) String() string {
return ret[:len(ret)-1] + ")"
}
-func (ref refList) hash(h *maphash.Hash) {
+func (ref refList) hash(h *maphash.MapHash) {
for _, r := range ref {
r.hash(h)
}
@@ -254,7 +255,7 @@ func (ref refList) findAll(fields []arrow.Field)
[]FieldPath {
type refImpl interface {
fmt.Stringer
findAll(fields []arrow.Field) []FieldPath
- hash(h *maphash.Hash)
+ hash(h *maphash.MapHash)
}
// FieldRef is a descriptor of a (potentially nested) field within a schema.
@@ -407,12 +408,12 @@ func NewFieldRefFromDotPath(dotpath string) (out
FieldRef, err error) {
return
}
-func (f FieldRef) hash(h *maphash.Hash) { f.impl.hash(h) }
+func (f FieldRef) hash(h *maphash.MapHash) { f.impl.hash(h) }
// Hash produces a hash of this field reference and takes in a seed so that
// it can maintain consistency across multiple places / processes /etc.
func (f FieldRef) Hash(seed maphash.Seed) uint64 {
- h := maphash.Hash{}
+ h := maphash.MapHash{}
h.SetSeed(seed)
f.hash(&h)
return h.Sum64()
diff --git a/arrow/compute/fieldref_hash.go b/arrow/compute/fieldref_hash.go
index 02efc46d..cb134e3a 100644
--- a/arrow/compute/fieldref_hash.go
+++ b/arrow/compute/fieldref_hash.go
@@ -19,14 +19,15 @@
package compute
import (
- "hash/maphash"
"math/bits"
"unsafe"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
)
-func (f FieldPath) hash(h *maphash.Hash) {
+func (f FieldPath) hash(h *maphash.MapHash) {
raw := unsafe.Pointer(unsafe.SliceData(f))
var byteLen int
if bits.UintSize == 32 {
diff --git a/arrow/datatype.go b/arrow/datatype.go
index 95565859..43ec789c 100644
--- a/arrow/datatype.go
+++ b/arrow/datatype.go
@@ -18,9 +18,10 @@ package arrow
import (
"fmt"
- "hash/maphash"
"strings"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow/internal/debug"
)
@@ -227,7 +228,7 @@ type OffsetsDataType interface {
}
func HashType(seed maphash.Seed, dt DataType) uint64 {
- var h maphash.Hash
+ var h maphash.MapHash
h.SetSeed(seed)
h.WriteString(dt.Fingerprint())
return h.Sum64()
diff --git a/arrow/internal/dictutils/dict.go b/arrow/internal/dictutils/dict.go
index fa075ceb..a90058f1 100644
--- a/arrow/internal/dictutils/dict.go
+++ b/arrow/internal/dictutils/dict.go
@@ -19,7 +19,8 @@ package dictutils
import (
"errors"
"fmt"
- "hash/maphash"
+
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
@@ -57,7 +58,7 @@ func (f *FieldPos) Path() []int32 {
type Mapper struct {
pathToID map[uint64]int64
- hasher maphash.Hash
+ hasher maphash.MapHash
}
func (d *Mapper) NumDicts() int {
diff --git a/arrow/scalar/scalar.go b/arrow/scalar/scalar.go
index 0f8ec616..29885336 100644
--- a/arrow/scalar/scalar.go
+++ b/arrow/scalar/scalar.go
@@ -19,13 +19,14 @@ package scalar
import (
"encoding/binary"
"fmt"
- "hash/maphash"
"math"
"math/big"
"reflect"
"strconv"
"unsafe"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/bitutil"
@@ -969,7 +970,7 @@ func MakeArrayFromScalar(sc Scalar, length int, mem
memory.Allocator) (arrow.Arr
}
func Hash(seed maphash.Seed, s Scalar) uint64 {
- var h maphash.Hash
+ var h maphash.MapHash
h.SetSeed(seed)
binary.Write(&h, endian.Native, arrow.HashType(seed, s.DataType()))
diff --git a/arrow/scalar/scalar_test.go b/arrow/scalar/scalar_test.go
index a3a97664..bfd00af1 100644
--- a/arrow/scalar/scalar_test.go
+++ b/arrow/scalar/scalar_test.go
@@ -19,12 +19,13 @@ package scalar_test
import (
"bytes"
"fmt"
- "hash/maphash"
"math/bits"
"strings"
"testing"
"time"
+ "github.com/apache/arrow-go/v18/internal/utils/maphash"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/decimal128"
diff --git a/arrow/compute/fieldref_hash.go b/internal/utils/maphash/maphash.go
similarity index 65%
copy from arrow/compute/fieldref_hash.go
copy to internal/utils/maphash/maphash.go
index 02efc46d..79940330 100644
--- a/arrow/compute/fieldref_hash.go
+++ b/internal/utils/maphash/maphash.go
@@ -14,26 +14,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-//go:build go1.20 || tinygo
+//go:build !tinygo
-package compute
+package maphash
-import (
- "hash/maphash"
- "math/bits"
- "unsafe"
+import "hash/maphash"
- "github.com/apache/arrow-go/v18/arrow"
-)
+type MapHash = maphash.Hash
+type Seed = maphash.Seed
-func (f FieldPath) hash(h *maphash.Hash) {
- raw := unsafe.Pointer(unsafe.SliceData(f))
- var byteLen int
- if bits.UintSize == 32 {
- byteLen = arrow.Int32Traits.BytesRequired(len(f))
- } else {
- byteLen = arrow.Int64Traits.BytesRequired(len(f))
- }
-
- h.Write(unsafe.Slice((*byte)(raw), byteLen))
+func MakeSeed() Seed {
+ return maphash.MakeSeed()
}
diff --git a/internal/utils/maphash/maphash_tinygo.go
b/internal/utils/maphash/maphash_tinygo.go
new file mode 100644
index 00000000..855bb561
--- /dev/null
+++ b/internal/utils/maphash/maphash_tinygo.go
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build tinygo
+
+package maphash
+
+import (
+ "hash"
+ "hash/fnv"
+ "math/rand"
+ "time"
+)
+
+type MapHash struct {
+ h hash.Hash64
+}
+
+func (h *MapHash) Write(p []byte) (n int, err error) {
+ if h.h == nil {
+ h.h = fnv.New64a()
+ }
+ return h.h.Write(p)
+}
+
+func (h *MapHash) WriteByte(c byte) error {
+ if h.h == nil {
+ h.h = fnv.New64a()
+ }
+ _, err := h.h.Write([]byte{c})
+ return err
+}
+
+func (h *MapHash) WriteString(s string) (n int, err error) {
+ if h.h == nil {
+ h.h = fnv.New64a()
+ }
+ return h.h.Write([]byte(s))
+}
+
+func (h *MapHash) Reset() {
+ if h.h != nil {
+ h.h.Reset()
+ }
+}
+
+func (h *MapHash) Sum64() uint64 {
+ if h.h == nil {
+ h.h = fnv.New64a()
+ }
+ return h.h.Sum64()
+}
+
+func (h *MapHash) SetSeed(seed Seed) {
+ // fnv doesn't have a seed. So we ignore this.
+ // But we need to define the method to match the interface.
+}
+
+type Seed struct {
+ s uint64
+}
+
+func MakeSeed() Seed {
+ rand.Seed(time.Now().UnixNano())
+ return Seed{s: rand.Uint64()}
+}