(datasketches-go) 02/02: Add a bloomfilter implementation to Go

placave Thu, 11 Dec 2025 15:25:22 -0800

This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch bloomfilter
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git


commit 1c684a925366cceb61fb248a22c0234a25a5317d
Author: Pierre Lacave <[email protected]>
AuthorDate: Fri Dec 12 00:24:56 2025 +0100

    Add a bloomfilter implementation to Go
---
 README.md                                          |   2 +-
 filters/bit_array.go                               | 103 +++
 filters/bit_array_test.go                          | 186 +++++
 filters/bloom_filter.go                            | 600 +++++++++++++++
 filters/bloom_filter_builder.go                    | 272 +++++++
 filters/bloom_filter_serialization_test.go         | 534 ++++++++++++++
 filters/bloom_filter_test.go                       | 804 +++++++++++++++++++++
 filters/preamble_utils.go                          | 148 ++++
 go.mod                                             |   2 +
 go.sum                                             |   2 +
 internal/family.go                                 |   7 +-
 .../bf_byte_array_n10000_h3_go.sk                  | Bin 0 -> 1288 bytes
 .../bf_double_array_n10000_h3_go.sk                | Bin 0 -> 1288 bytes
 .../go_generated_files/bf_double_n10000_h3_go.sk   | Bin 0 -> 1288 bytes
 .../bf_long_array_n10000_h3_go.sk                  | Bin 0 -> 1288 bytes
 .../go_generated_files/bf_n0_h3_go.sk              | Bin 0 -> 24 bytes
 .../go_generated_files/bf_n0_h5_go.sk              | Bin 0 -> 24 bytes
 .../go_generated_files/bf_n10000_h3_go.sk          | Bin 0 -> 1288 bytes
 .../go_generated_files/bf_n10000_h5_go.sk          | Bin 0 -> 1288 bytes
 .../go_generated_files/bf_n2000000_h3_go.sk        | Bin 0 -> 250032 bytes
 .../go_generated_files/bf_n2000000_h5_go.sk        | Bin 0 -> 250032 bytes
 .../go_generated_files/bf_string_n10000_h3_go.sk   | Bin 0 -> 1288 bytes
 22 files changed, 2658 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 4f0f94d..4ca1a27 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ If you are interested in making contributions to this site 
please see our [Commu
 |  | ReserviorItemsSketch<T> | ❌ |
 |        | VarOptItemsSketch<T>    | ❌ |
 | Membership | | |
-| | BloomFilterSketch | ❌ |
+| | BloomFilter | 🚧 |
 
 
 ## Specialty Sketches
diff --git a/filters/bit_array.go b/filters/bit_array.go
new file mode 100644
index 0000000..9bfcc52
--- /dev/null
+++ b/filters/bit_array.go
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package filters
+
+import "math/bits"
+
+// getBit returns the value of the bit at the specified index.
+func getBit(array []uint64, index uint64) bool {
+       longIdx := index >> 6  // divide by 64
+       bitIdx := index & 0x3F // mod 64
+       return (array[longIdx] & (1 << bitIdx)) != 0
+}
+
+// setBit sets the bit at the specified index to 1.
+func setBit(array []uint64, index uint64) {
+       longIdx := index >> 6
+       bitIdx := index & 0x3F
+       array[longIdx] |= (1 << bitIdx)
+}
+
+// clearBit sets the bit at the specified index to 0.
+func clearBit(array []uint64, index uint64) {
+       longIdx := index >> 6
+       bitIdx := index & 0x3F
+       array[longIdx] &^= (1 << bitIdx)
+}
+
+// assignBit sets the bit at the specified index to the given value.
+func assignBit(array []uint64, index uint64, value bool) {
+       if value {
+               setBit(array, index)
+       } else {
+               clearBit(array, index)
+       }
+}
+
+// getAndSetBit atomically gets the current bit value and sets it to 1.
+// Returns true if the bit was already set, false if it was newly set.
+func getAndSetBit(array []uint64, index uint64) bool {
+       longIdx := index >> 6
+       bitIdx := index & 0x3F
+       mask := uint64(1) << bitIdx
+       wasSet := (array[longIdx] & mask) != 0
+       array[longIdx] |= mask
+       return wasSet
+}
+
+// countBitsSet counts the number of bits set to 1 in the array.
+func countBitsSet(array []uint64) uint64 {
+       count := uint64(0)
+       for _, val := range array {
+               count += uint64(bits.OnesCount64(val))
+       }
+       return count
+}
+
+// unionWith performs a bitwise OR operation between target and source arrays.
+// The result is stored in target. Returns the number of bits set in the 
result.
+func unionWith(target, source []uint64) uint64 {
+       count := uint64(0)
+       for i := range target {
+               target[i] |= source[i]
+               count += uint64(bits.OnesCount64(target[i]))
+       }
+       return count
+}
+
+// intersect performs a bitwise AND operation between target and source arrays.
+// The result is stored in target. Returns the number of bits set in the 
result.
+func intersect(target, source []uint64) uint64 {
+       count := uint64(0)
+       for i := range target {
+               target[i] &= source[i]
+               count += uint64(bits.OnesCount64(target[i]))
+       }
+       return count
+}
+
+// invert performs a bitwise NOT operation on the array.
+// Returns the number of bits set in the result.
+func invert(array []uint64) uint64 {
+       count := uint64(0)
+       for i := range array {
+               array[i] = ^array[i]
+               count += uint64(bits.OnesCount64(array[i]))
+       }
+       return count
+}
diff --git a/filters/bit_array_test.go b/filters/bit_array_test.go
new file mode 100644
index 0000000..7f1172d
--- /dev/null
+++ b/filters/bit_array_test.go
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package filters
+
+import (
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestBitArrayBasicOperations(t *testing.T) {
+       // Create array with 128 bits (2 longs)
+       array := make([]uint64, 2)
+
+       // Test initial state - all bits should be 0
+       assert.Equal(t, uint64(0), countBitsSet(array))
+       assert.False(t, getBit(array, 0))
+       assert.False(t, getBit(array, 63))
+       assert.False(t, getBit(array, 64))
+       assert.False(t, getBit(array, 127))
+
+       // Test setBit
+       setBit(array, 5)
+       assert.True(t, getBit(array, 5))
+       assert.Equal(t, uint64(1), countBitsSet(array))
+
+       setBit(array, 65)
+       assert.True(t, getBit(array, 65))
+       assert.Equal(t, uint64(2), countBitsSet(array))
+
+       // Test clearBit
+       clearBit(array, 5)
+       assert.False(t, getBit(array, 5))
+       assert.Equal(t, uint64(1), countBitsSet(array))
+
+       // Test assignBit
+       assignBit(array, 10, true)
+       assert.True(t, getBit(array, 10))
+       assert.Equal(t, uint64(2), countBitsSet(array))
+
+       assignBit(array, 10, false)
+       assert.False(t, getBit(array, 10))
+       assert.Equal(t, uint64(1), countBitsSet(array))
+
+       // Test getAndSetBit
+       wasSet := getAndSetBit(array, 20)
+       assert.False(t, wasSet) // Was not set
+       assert.True(t, getBit(array, 20))
+       assert.Equal(t, uint64(2), countBitsSet(array))
+
+       wasSet = getAndSetBit(array, 20)
+       assert.True(t, wasSet) // Was already set
+       assert.True(t, getBit(array, 20))
+       assert.Equal(t, uint64(2), countBitsSet(array))
+}
+
+func TestBitArrayInversion(t *testing.T) {
+       // Create array with 128 bits
+       array := make([]uint64, 2)
+
+       // Set some bits
+       setBit(array, 0)
+       setBit(array, 10)
+       setBit(array, 63)
+       setBit(array, 100)
+       assert.Equal(t, uint64(4), countBitsSet(array))
+
+       // Invert
+       count := invert(array)
+       assert.Equal(t, uint64(128-4), count)
+       assert.Equal(t, uint64(128-4), countBitsSet(array))
+
+       // Previously set bits should now be clear
+       assert.False(t, getBit(array, 0))
+       assert.False(t, getBit(array, 10))
+       assert.False(t, getBit(array, 63))
+       assert.False(t, getBit(array, 100))
+
+       // Previously clear bits should now be set
+       assert.True(t, getBit(array, 1))
+       assert.True(t, getBit(array, 50))
+       assert.True(t, getBit(array, 64))
+       assert.True(t, getBit(array, 127))
+}
+
+func TestBitArrayUnion(t *testing.T) {
+       // Create two arrays with 192 bits (3 longs)
+       array1 := make([]uint64, 3)
+       array2 := make([]uint64, 3)
+       array3 := make([]uint64, 3)
+
+       // Array1: bits 0-9
+       for i := uint64(0); i < 10; i++ {
+               setBit(array1, i)
+       }
+
+       // Array2: bits 5-14
+       for i := uint64(5); i < 15; i++ {
+               setBit(array2, i)
+       }
+
+       // Array3: even bits 0-18
+       for i := uint64(0); i < 19; i += 2 {
+               setBit(array3, i)
+       }
+
+       // Union of array2 and array3
+       count := unionWith(array2, array3)
+       // Array2 had bits 5-14 (10 bits)
+       // Array3 had even bits 0-18 (10 bits: 0,2,4,6,8,10,12,14,16,18)
+       // Union should have: 5,6,7,8,9,10,11,12,13,14 + 0,2,4,16,18 = 15 bits
+       assert.Equal(t, uint64(15), count)
+       assert.Equal(t, uint64(15), countBitsSet(array2))
+}
+
+func TestBitArrayIntersection(t *testing.T) {
+       // Create two arrays
+       array1 := make([]uint64, 3)
+       array2 := make([]uint64, 3)
+
+       // Array1: bits 0-9
+       for i := uint64(0); i < 10; i++ {
+               setBit(array1, i)
+       }
+
+       // Array2: bits 5-14
+       for i := uint64(5); i < 15; i++ {
+               setBit(array2, i)
+       }
+
+       // Intersection
+       count := intersect(array1, array2)
+       // Overlap is bits 5-9 = 5 bits
+       assert.Equal(t, uint64(5), count)
+       assert.Equal(t, uint64(5), countBitsSet(array1))
+
+       // Verify the overlap bits
+       for i := uint64(5); i < 10; i++ {
+               assert.True(t, getBit(array1, i))
+       }
+       // Verify non-overlap bits are cleared
+       for i := uint64(0); i < 5; i++ {
+               assert.False(t, getBit(array1, i))
+       }
+}
+
+func TestBitArrayBoundaries(t *testing.T) {
+       // Test bit operations at long boundaries
+       array := make([]uint64, 3) // 192 bits
+
+       // Test at boundary of first and second long (bit 63-64)
+       setBit(array, 63)
+       setBit(array, 64)
+       assert.True(t, getBit(array, 63))
+       assert.True(t, getBit(array, 64))
+
+       clearBit(array, 63)
+       assert.False(t, getBit(array, 63))
+       assert.True(t, getBit(array, 64))
+
+       // Test at boundary of second and third long (bit 127-128)
+       setBit(array, 127)
+       setBit(array, 128)
+       assert.True(t, getBit(array, 127))
+       assert.True(t, getBit(array, 128))
+
+       // Test last bit
+       setBit(array, 191)
+       assert.True(t, getBit(array, 191))
+       assert.Equal(t, uint64(4), countBitsSet(array))
+}
diff --git a/filters/bloom_filter.go b/filters/bloom_filter.go
new file mode 100644
index 0000000..f562938
--- /dev/null
+++ b/filters/bloom_filter.go
@@ -0,0 +1,600 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Package filters provides probabilistic membership data structures for 
efficient
+// set membership testing with controlled false positive rates.
+//
+// The Bloom filter is a space-efficient probabilistic data structure that is 
used
+// to test whether an element is a member of a set. False positive matches are
+// possible, but false negatives are not. This implementation uses XXHash64 
with
+// Kirsch-Mitzenmacher double hashing optimization.
+package filters
+
+import (
+       "encoding/binary"
+       "fmt"
+       "math"
+       "math/bits"
+
+       "github.com/cespare/xxhash/v2"
+)
+
+// BloomFilter is a probabilistic data structure for set membership testing.
+// It provides constant-time updates and queries with a configurable false
+// positive rate. No false negatives are possible.
+type BloomFilter interface {
+       // Update methods add items to the filter
+       UpdateUInt64(datum uint64) error
+       UpdateInt64(datum int64) error
+       UpdateString(datum string) error
+       UpdateSlice(datum []byte) error
+       UpdateFloat64(datum float64) error
+       UpdateInt64Array(data []int64) error
+       UpdateFloat64Array(data []float64) error
+
+       // Query methods test membership
+       QueryUInt64(datum uint64) bool
+       QueryInt64(datum int64) bool
+       QueryString(datum string) bool
+       QuerySlice(datum []byte) bool
+       QueryFloat64(datum float64) bool
+       QueryInt64Array(data []int64) bool
+       QueryFloat64Array(data []float64) bool
+
+       // QueryAndUpdate atomically queries and updates (test-and-set)
+       QueryAndUpdateUInt64(datum uint64) (bool, error)
+       QueryAndUpdateInt64(datum int64) (bool, error)
+       QueryAndUpdateString(datum string) (bool, error)
+       QueryAndUpdateSlice(datum []byte) (bool, error)
+       QueryAndUpdateFloat64(datum float64) (bool, error)
+       QueryAndUpdateInt64Array(data []int64) (bool, error)
+       QueryAndUpdateFloat64Array(data []float64) (bool, error)
+
+       // Set operations
+       Union(other BloomFilter) error
+       Intersect(other BloomFilter) error
+       Invert() error
+       IsCompatible(other BloomFilter) bool
+
+       // State queries
+       IsEmpty() bool
+       GetBitsUsed() uint64
+       GetCapacity() uint64
+       GetNumHashes() uint16
+       GetSeed() uint64
+
+       // Serialization
+       ToCompactSlice() ([]byte, error)
+       Reset() error
+}
+
+// bloomFilterImpl is the concrete implementation of BloomFilter.
+type bloomFilterImpl struct {
+       seed         uint64
+       numHashes    uint16
+       isDirty      bool
+       capacityBits uint64
+       numBitsSet   uint64
+       bitArray     []uint64
+}
+
+// IsEmpty returns true if no bits are set in the filter.
+func (bf *bloomFilterImpl) IsEmpty() bool {
+       return bf.GetBitsUsed() == 0
+}
+
+// GetBitsUsed returns the number of bits currently set to 1.
+// If the count is dirty, it will be recomputed.
+func (bf *bloomFilterImpl) GetBitsUsed() uint64 {
+       if bf.isDirty {
+               bf.numBitsSet = countBitsSet(bf.bitArray)
+               bf.isDirty = false
+       }
+       return bf.numBitsSet
+}
+
+// GetCapacity returns the total number of bits in the filter.
+func (bf *bloomFilterImpl) GetCapacity() uint64 {
+       return bf.capacityBits
+}
+
+// GetNumHashes returns the number of hash functions used.
+func (bf *bloomFilterImpl) GetNumHashes() uint16 {
+       return bf.numHashes
+}
+
+// GetSeed returns the hash seed used by the filter.
+func (bf *bloomFilterImpl) GetSeed() uint64 {
+       return bf.seed
+}
+
+// Reset clears all bits in the filter.
+func (bf *bloomFilterImpl) Reset() error {
+       for i := range bf.bitArray {
+               bf.bitArray[i] = 0
+       }
+       bf.numBitsSet = 0
+       bf.isDirty = false
+       return nil
+}
+
+// IsCompatible checks if two filters can be combined (union/intersection).
+// Filters are compatible if they have the same seed, hash count, and capacity.
+func (bf *bloomFilterImpl) IsCompatible(other BloomFilter) bool {
+       return bf.seed == other.GetSeed() &&
+               bf.numHashes == other.GetNumHashes() &&
+               bf.capacityBits == other.GetCapacity()
+}
+
+// computeHashes computes two hash values using XXHash64 and 
Kirsch-Mitzenmacher approach.
+func (bf *bloomFilterImpl) computeHashes(data []byte) (h0, h1 uint64) {
+       // Compute h0 with the filter's seed
+       h := xxhash.NewWithSeed(bf.seed)
+       h.Write(data)
+       h0 = h.Sum64()
+
+       // Compute h1 using h0 as seed
+       h.Reset()
+       h = xxhash.NewWithSeed(h0)
+       h.Write(data)
+       h1 = h.Sum64()
+       return
+}
+
+// hashLongOptimized implements the Java-compatible optimized hash for single 
long values.
+// This matches the implementation in 
org.apache.datasketches.hash.XxHash64.hash(long, long).
+func hashLongOptimized(value uint64, seed uint64) uint64 {
+       const (
+               P1 = 0x9E3779B185EBCA87
+               P2 = 0xC2B2AE3D27D4EB4F
+               P3 = 0x165667B19E3779F9
+               P4 = 0x85EBCA77C2B2AE63
+               P5 = 0x27D4EB2F165667C5
+       )
+
+       hash := seed + P5
+       hash += 8 // length in bytes
+
+       k1 := value
+       k1 *= P2
+       k1 = bits.RotateLeft64(k1, 31)
+       k1 *= P1
+       hash ^= k1
+       hash = (bits.RotateLeft64(hash, 27) * P1) + P4
+
+       // Finalize
+       hash ^= hash >> 33
+       hash *= P2
+       hash ^= hash >> 29
+       hash *= P3
+       hash ^= hash >> 32
+
+       return hash
+}
+
+// computeHashesForLong computes hashes using the optimized single-long 
algorithm
+// to match Java/C++ behavior for integer values.
+func (bf *bloomFilterImpl) computeHashesForLong(value uint64) (h0, h1 uint64) {
+       h0 = hashLongOptimized(value, bf.seed)
+       h1 = hashLongOptimized(value, h0)
+       return
+}
+
+// getHashIndex computes the i-th hash index using double hashing.
+// Formula: g_i(x) = ((h0 + i * h1) >> 1) mod capacity
+func (bf *bloomFilterImpl) getHashIndex(h0, h1 uint64, i uint16) uint64 {
+       return ((h0 + uint64(i)*h1) >> 1) % bf.capacityBits
+}
+
+// internalUpdate updates the filter with pre-computed hash values.
+func (bf *bloomFilterImpl) internalUpdate(h0, h1 uint64) {
+       for i := uint16(1); i <= bf.numHashes; i++ {
+               idx := bf.getHashIndex(h0, h1, i)
+               if !getBit(bf.bitArray, idx) {
+                       setBit(bf.bitArray, idx)
+                       bf.numBitsSet++
+               }
+       }
+       bf.isDirty = true
+}
+
+// internalQuery queries the filter with pre-computed hash values.
+func (bf *bloomFilterImpl) internalQuery(h0, h1 uint64) bool {
+       if bf.IsEmpty() {
+               return false
+       }
+       for i := uint16(1); i <= bf.numHashes; i++ {
+               idx := bf.getHashIndex(h0, h1, i)
+               if !getBit(bf.bitArray, idx) {
+                       return false
+               }
+       }
+       return true
+}
+
+// internalQueryAndUpdate atomically queries and updates with pre-computed 
hash values.
+// Returns true if the item was already present (all k bits were set).
+func (bf *bloomFilterImpl) internalQueryAndUpdate(h0, h1 uint64) bool {
+       valueExists := true
+       newBitsSet := uint64(0)
+       for i := uint16(1); i <= bf.numHashes; i++ {
+               idx := bf.getHashIndex(h0, h1, i)
+               wasSet := getAndSetBit(bf.bitArray, idx)
+               valueExists = valueExists && wasSet
+               if !wasSet {
+                       newBitsSet++
+               }
+       }
+       bf.numBitsSet += newBitsSet
+       bf.isDirty = true
+       return valueExists
+}
+
+// UpdateUInt64 adds a uint64 value to the filter.
+func (bf *bloomFilterImpl) UpdateUInt64(datum uint64) error {
+       h0, h1 := bf.computeHashesForLong(datum)
+       bf.internalUpdate(h0, h1)
+       return nil
+}
+
+// UpdateInt64 adds an int64 value to the filter.
+func (bf *bloomFilterImpl) UpdateInt64(datum int64) error {
+       h0, h1 := bf.computeHashesForLong(uint64(datum))
+       bf.internalUpdate(h0, h1)
+       return nil
+}
+
+// UpdateString adds a string to the filter.
+// Empty strings are ignored (no update performed).
+func (bf *bloomFilterImpl) UpdateString(datum string) error {
+       if datum == "" {
+               return nil // Empty string - do nothing, matching Java behavior
+       }
+       return bf.UpdateSlice([]byte(datum))
+}
+
+// UpdateSlice adds a byte slice to the filter.
+func (bf *bloomFilterImpl) UpdateSlice(datum []byte) error {
+       h0, h1 := bf.computeHashes(datum)
+       bf.internalUpdate(h0, h1)
+       return nil
+}
+
+// UpdateFloat64 adds a float64 value to the filter.
+// NaN values are canonicalized to match Java's Double.doubleToLongBits().
+func (bf *bloomFilterImpl) UpdateFloat64(datum float64) error {
+       var bits uint64
+       if datum == 0.0 {
+               // Canonicalize -0.0 to 0.0
+               bits = 0
+       } else if math.IsNaN(datum) {
+               // Use Java's canonical NaN: 0x7ff8000000000000
+               bits = 0x7ff8000000000000
+       } else if math.IsInf(datum, 1) {
+               // Positive infinity
+               bits = 0x7ff0000000000000
+       } else if math.IsInf(datum, -1) {
+               // Negative infinity
+               bits = 0xfff0000000000000
+       } else {
+               bits = math.Float64bits(datum)
+       }
+
+       // Java hashes as a single-element long array (8 bytes), not using 
optimized single-long hash
+       buf := make([]byte, 8)
+       binary.LittleEndian.PutUint64(buf, bits)
+       h0, h1 := bf.computeHashes(buf)
+       bf.internalUpdate(h0, h1)
+       return nil
+}
+
+// QueryUInt64 tests if a uint64 value might be in the filter.
+func (bf *bloomFilterImpl) QueryUInt64(datum uint64) bool {
+       h0, h1 := bf.computeHashesForLong(datum)
+       return bf.internalQuery(h0, h1)
+}
+
+// QueryInt64 tests if an int64 value might be in the filter.
+func (bf *bloomFilterImpl) QueryInt64(datum int64) bool {
+       h0, h1 := bf.computeHashesForLong(uint64(datum))
+       return bf.internalQuery(h0, h1)
+}
+
+// QueryString tests if a string might be in the filter.
+// Empty strings always return false.
+func (bf *bloomFilterImpl) QueryString(datum string) bool {
+       if datum == "" {
+               return false // Empty string - do nothing, matching Java 
behavior
+       }
+       return bf.QuerySlice([]byte(datum))
+}
+
+// QuerySlice tests if a byte slice might be in the filter.
+func (bf *bloomFilterImpl) QuerySlice(datum []byte) bool {
+       h0, h1 := bf.computeHashes(datum)
+       return bf.internalQuery(h0, h1)
+}
+
+// QueryFloat64 tests if a float64 value might be in the filter.
+func (bf *bloomFilterImpl) QueryFloat64(datum float64) bool {
+       var bits uint64
+       if datum == 0.0 {
+               // Canonicalize -0.0 to 0.0
+               bits = 0
+       } else if math.IsNaN(datum) {
+               // Use Java's canonical NaN: 0x7ff8000000000000
+               bits = 0x7ff8000000000000
+       } else if math.IsInf(datum, 1) {
+               // Positive infinity
+               bits = 0x7ff0000000000000
+       } else if math.IsInf(datum, -1) {
+               // Negative infinity
+               bits = 0xfff0000000000000
+       } else {
+               bits = math.Float64bits(datum)
+       }
+
+       // Java hashes as a single-element long array (8 bytes), not using 
optimized single-long hash
+       buf := make([]byte, 8)
+       binary.LittleEndian.PutUint64(buf, bits)
+       h0, h1 := bf.computeHashes(buf)
+       return bf.internalQuery(h0, h1)
+}
+
+// QueryAndUpdateUInt64 atomically queries and updates for a uint64 value.
+// Returns true if the value was already present before the update.
+func (bf *bloomFilterImpl) QueryAndUpdateUInt64(datum uint64) (bool, error) {
+       h0, h1 := bf.computeHashesForLong(datum)
+       return bf.internalQueryAndUpdate(h0, h1), nil
+}
+
+// QueryAndUpdateInt64 atomically queries and updates for an int64 value.
+func (bf *bloomFilterImpl) QueryAndUpdateInt64(datum int64) (bool, error) {
+       h0, h1 := bf.computeHashesForLong(uint64(datum))
+       return bf.internalQueryAndUpdate(h0, h1), nil
+}
+
+// QueryAndUpdateString atomically queries and updates for a string.
+// Empty strings are ignored and return false.
+func (bf *bloomFilterImpl) QueryAndUpdateString(datum string) (bool, error) {
+       if datum == "" {
+               return false, nil // Empty string - do nothing, matching Java 
behavior
+       }
+       return bf.QueryAndUpdateSlice([]byte(datum))
+}
+
+// QueryAndUpdateSlice atomically queries and updates for a byte slice.
+func (bf *bloomFilterImpl) QueryAndUpdateSlice(datum []byte) (bool, error) {
+       h0, h1 := bf.computeHashes(datum)
+       return bf.internalQueryAndUpdate(h0, h1), nil
+}
+
+// QueryAndUpdateFloat64 atomically queries and updates for a float64 value.
+func (bf *bloomFilterImpl) QueryAndUpdateFloat64(datum float64) (bool, error) {
+       var bits uint64
+       if datum == 0.0 {
+               // Canonicalize -0.0 to 0.0
+               bits = 0
+       } else if math.IsNaN(datum) {
+               // Use Java's canonical NaN: 0x7ff8000000000000
+               bits = 0x7ff8000000000000
+       } else if math.IsInf(datum, 1) {
+               // Positive infinity
+               bits = 0x7ff0000000000000
+       } else if math.IsInf(datum, -1) {
+               // Negative infinity
+               bits = 0xfff0000000000000
+       } else {
+               bits = math.Float64bits(datum)
+       }
+
+       // Java hashes as a single-element long array (8 bytes), not using 
optimized single-long hash
+       buf := make([]byte, 8)
+       binary.LittleEndian.PutUint64(buf, bits)
+       h0, h1 := bf.computeHashes(buf)
+       return bf.internalQueryAndUpdate(h0, h1), nil
+}
+
+// UpdateInt64Array adds an array of int64 values to the filter.
+// The entire array is hashed as a single unit (not element-by-element).
+// Nil or empty arrays are ignored.
+func (bf *bloomFilterImpl) UpdateInt64Array(data []int64) error {
+       if len(data) == 0 {
+               return nil
+       }
+
+       // Convert array to bytes (little-endian)
+       bytes := make([]byte, len(data)*8)
+       for i, val := range data {
+               binary.LittleEndian.PutUint64(bytes[i*8:], uint64(val))
+       }
+
+       h0, h1 := bf.computeHashes(bytes)
+       bf.internalUpdate(h0, h1)
+       return nil
+}
+
+// UpdateFloat64Array adds an array of float64 values to the filter.
+// The entire array is hashed as a single unit (not element-by-element).
+// Nil or empty arrays are ignored.
+func (bf *bloomFilterImpl) UpdateFloat64Array(data []float64) error {
+       if len(data) == 0 {
+               return nil
+       }
+
+       // Convert array to bytes (little-endian)
+       bytes := make([]byte, len(data)*8)
+       for i, val := range data {
+               bits := math.Float64bits(val)
+               binary.LittleEndian.PutUint64(bytes[i*8:], bits)
+       }
+
+       h0, h1 := bf.computeHashes(bytes)
+       bf.internalUpdate(h0, h1)
+       return nil
+}
+
+// QueryInt64Array tests if an array of int64 values might be in the filter.
+// The entire array is hashed as a single unit.
+func (bf *bloomFilterImpl) QueryInt64Array(data []int64) bool {
+       if len(data) == 0 {
+               return false
+       }
+
+       // Convert array to bytes (little-endian)
+       bytes := make([]byte, len(data)*8)
+       for i, val := range data {
+               binary.LittleEndian.PutUint64(bytes[i*8:], uint64(val))
+       }
+
+       h0, h1 := bf.computeHashes(bytes)
+       return bf.internalQuery(h0, h1)
+}
+
+// QueryFloat64Array tests if an array of float64 values might be in the 
filter.
+// The entire array is hashed as a single unit.
+func (bf *bloomFilterImpl) QueryFloat64Array(data []float64) bool {
+       if len(data) == 0 {
+               return false
+       }
+
+       // Convert array to bytes (little-endian)
+       bytes := make([]byte, len(data)*8)
+       for i, val := range data {
+               bits := math.Float64bits(val)
+               binary.LittleEndian.PutUint64(bytes[i*8:], bits)
+       }
+
+       h0, h1 := bf.computeHashes(bytes)
+       return bf.internalQuery(h0, h1)
+}
+
+// QueryAndUpdateInt64Array atomically queries and updates for an int64 array.
+// Returns true if the array was already present before the update.
+func (bf *bloomFilterImpl) QueryAndUpdateInt64Array(data []int64) (bool, 
error) {
+       if len(data) == 0 {
+               return false, nil
+       }
+
+       // Convert array to bytes (little-endian)
+       bytes := make([]byte, len(data)*8)
+       for i, val := range data {
+               binary.LittleEndian.PutUint64(bytes[i*8:], uint64(val))
+       }
+
+       h0, h1 := bf.computeHashes(bytes)
+       return bf.internalQueryAndUpdate(h0, h1), nil
+}
+
+// QueryAndUpdateFloat64Array atomically queries and updates for a float64 
array.
+// Returns true if the array was already present before the update.
+func (bf *bloomFilterImpl) QueryAndUpdateFloat64Array(data []float64) (bool, 
error) {
+       if len(data) == 0 {
+               return false, nil
+       }
+
+       // Convert array to bytes (little-endian)
+       bytes := make([]byte, len(data)*8)
+       for i, val := range data {
+               bits := math.Float64bits(val)
+               binary.LittleEndian.PutUint64(bytes[i*8:], bits)
+       }
+
+       h0, h1 := bf.computeHashes(bytes)
+       return bf.internalQueryAndUpdate(h0, h1), nil
+}
+
+// Union performs a bitwise OR operation with another filter.
+// After union, this filter will contain items from both filters.
+func (bf *bloomFilterImpl) Union(other BloomFilter) error {
+       if !bf.IsCompatible(other) {
+               return fmt.Errorf("cannot union incompatible bloom filters")
+       }
+       otherImpl, ok := other.(*bloomFilterImpl)
+       if !ok {
+               return fmt.Errorf("cannot union with non-standard bloom filter 
implementation")
+       }
+       bf.numBitsSet = unionWith(bf.bitArray, otherImpl.bitArray)
+       bf.isDirty = false
+       return nil
+}
+
+// Intersect performs a bitwise AND operation with another filter.
+// After intersection, this filter will only contain items present in both 
filters.
+func (bf *bloomFilterImpl) Intersect(other BloomFilter) error {
+       if !bf.IsCompatible(other) {
+               return fmt.Errorf("cannot intersect incompatible bloom filters")
+       }
+       otherImpl, ok := other.(*bloomFilterImpl)
+       if !ok {
+               return fmt.Errorf("cannot intersect with non-standard bloom 
filter implementation")
+       }
+       bf.numBitsSet = intersect(bf.bitArray, otherImpl.bitArray)
+       bf.isDirty = false
+       return nil
+}
+
+// Invert flips all bits in the filter.
+// This inverts the notion of membership.
+func (bf *bloomFilterImpl) Invert() error {
+       bf.numBitsSet = invert(bf.bitArray)
+       bf.isDirty = false
+       return nil
+}
+
+// ToCompactSlice serializes the filter to a byte slice.
+func (bf *bloomFilterImpl) ToCompactSlice() ([]byte, error) {
+       isEmpty := bf.IsEmpty()
+       var size int
+       if isEmpty {
+               size = preambleEmptyBytes
+       } else {
+               size = preambleBytes + len(bf.bitArray)*8
+       }
+
+       bytes := make([]byte, size)
+
+       // Write preamble
+       if isEmpty {
+               insertPreambleLongs(bytes, preambleLongsEmpty)
+       } else {
+               insertPreambleLongs(bytes, preambleLongsStandard)
+       }
+       insertSerVer(bytes)
+       insertFamilyID(bytes)
+
+       flags := uint8(0)
+       if isEmpty {
+               flags = setEmptyFlag(flags)
+       }
+       insertFlags(bytes, flags)
+       insertNumHashes(bytes, bf.numHashes)
+       insertSeed(bytes, bf.seed)
+       insertBitArrayLength(bytes, uint32(len(bf.bitArray)))
+
+       if !isEmpty {
+               bitsUsed := bf.GetBitsUsed()
+               insertNumBitsSet(bytes, bitsUsed)
+
+               // Write bit array
+               for i, val := range bf.bitArray {
+                       
binary.LittleEndian.PutUint64(bytes[bitArrayOffset+i*8:], val)
+               }
+       }
+
+       return bytes, nil
+}
diff --git a/filters/bloom_filter_builder.go b/filters/bloom_filter_builder.go
new file mode 100644
index 0000000..73fe651
--- /dev/null
+++ b/filters/bloom_filter_builder.go
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package filters
+
+import (
+       "crypto/rand"
+       "encoding/binary"
+       "fmt"
+       "math"
+)
+
+// Default seed value for hash functions
+const DefaultSeed = uint64(9001)
+
+// bloomFilterOptions holds optional parameters for filter construction.
+type bloomFilterOptions struct {
+       seed uint64
+}
+
+// BloomFilterOption is a functional option for configuring a BloomFilter.
+type BloomFilterOption func(*bloomFilterOptions)
+
+// WithSeed sets a custom seed for the hash functions.
+func WithSeed(seed uint64) BloomFilterOption {
+       return func(opts *bloomFilterOptions) {
+               opts.seed = seed
+       }
+}
+
+// NewBloomFilterBySize creates a new Bloom filter with explicit size 
parameters.
+//
+// Parameters:
+//   - numBits: The number of bits in the filter (will be rounded up to 
multiple of 64)
+//   - numHashes: The number of hash functions to use
+//   - opts: Optional configuration (seed)
+//
+// Returns an error if parameters are invalid.
+func NewBloomFilterBySize(numBits uint64, numHashes uint16, opts 
...BloomFilterOption) (BloomFilter, error) {
+       if numBits == 0 {
+               return nil, fmt.Errorf("numBits must be positive")
+       }
+       if numHashes == 0 {
+               return nil, fmt.Errorf("numHashes must be positive")
+       }
+
+       // Check for overflow
+       maxBits := uint64(math.MaxInt32-32) * 64
+       if numBits > maxBits {
+               return nil, fmt.Errorf("numBits exceeds maximum allowed size")
+       }
+
+       // Apply options
+       options := &bloomFilterOptions{
+               seed: DefaultSeed,
+       }
+       for _, opt := range opts {
+               opt(options)
+       }
+
+       // Round capacity to multiple of 64
+       capacityBits := roundCapacity(numBits)
+       numLongs := capacityBits / 64
+
+       return &bloomFilterImpl{
+               seed:         options.seed,
+               numHashes:    numHashes,
+               isDirty:      false,
+               capacityBits: capacityBits,
+               numBitsSet:   0,
+               bitArray:     make([]uint64, numLongs),
+       }, nil
+}
+
+// NewBloomFilterByAccuracy creates a new Bloom filter optimized for target 
accuracy.
+//
+// The filter is sized to achieve the specified false positive probability for 
the
+// given number of expected items. The optimal number of bits and hash 
functions
+// are calculated automatically.
+//
+// Parameters:
+//   - maxDistinctItems: Expected number of distinct items to be inserted
+//   - targetFpp: Target false positive probability (between 0 and 1)
+//   - opts: Optional configuration (seed)
+//
+// Returns an error if parameters are invalid.
+func NewBloomFilterByAccuracy(maxDistinctItems uint64, targetFpp float64, opts 
...BloomFilterOption) (BloomFilter, error) {
+       if maxDistinctItems == 0 {
+               return nil, fmt.Errorf("maxDistinctItems must be positive")
+       }
+       if targetFpp <= 0.0 || targetFpp >= 1.0 {
+               return nil, fmt.Errorf("targetFpp must be between 0 and 1")
+       }
+
+       // Calculate optimal parameters
+       numBits := SuggestNumFilterBits(maxDistinctItems, targetFpp)
+       numHashes := SuggestNumHashesFromSize(maxDistinctItems, numBits)
+
+       return NewBloomFilterBySize(numBits, numHashes, opts...)
+}
+
+// NewBloomFilterWithDefault creates a new Bloom filter with default 
parameters.
+// Suitable for approximately 10,000 items with 1% false positive rate.
+func NewBloomFilterWithDefault() (BloomFilter, error) {
+       return NewBloomFilterByAccuracy(10000, 0.01)
+}
+
+// SuggestNumFilterBits calculates the optimal number of bits for a Bloom 
filter.
+//
+// Formula: m = ceil(-n * ln(p) / (ln(2))^2)
+// where n = number of items, p = target false positive probability
+func SuggestNumFilterBits(maxDistinctItems uint64, targetFpp float64) uint64 {
+       n := float64(maxDistinctItems)
+       p := targetFpp
+       ln2 := math.Ln2
+
+       bits := -n * math.Log(p) / (ln2 * ln2)
+       return uint64(math.Ceil(bits))
+}
+
+// SuggestNumHashes calculates the optimal number of hash functions from 
target FPP.
+//
+// Formula: k = ceil(-ln(p) / ln(2))
+// where p = target false positive probability
+func SuggestNumHashes(targetFpp float64) uint16 {
+       k := -math.Log(targetFpp) / math.Ln2
+       return uint16(math.Ceil(k))
+}
+
+// SuggestNumHashesFromSize calculates optimal number of hash functions from 
filter size.
+//
+// Formula: k = ceil((m/n) * ln(2))
+// where m = number of bits, n = number of items
+func SuggestNumHashesFromSize(maxDistinctItems, numFilterBits uint64) uint16 {
+       if maxDistinctItems == 0 {
+               return 1
+       }
+       ratio := float64(numFilterBits) / float64(maxDistinctItems)
+       k := ratio * math.Ln2
+       result := uint16(math.Ceil(k))
+       if result == 0 {
+               return 1
+       }
+       return result
+}
+
+// GenerateRandomSeed generates a cryptographically random seed value.
+func GenerateRandomSeed() (uint64, error) {
+       buf := make([]byte, 8)
+       _, err := rand.Read(buf)
+       if err != nil {
+               return 0, fmt.Errorf("failed to generate random seed: %w", err)
+       }
+       return binary.LittleEndian.Uint64(buf), nil
+}
+
+// NewBloomFilterFromSlice deserializes a Bloom filter from a byte slice.
+//
+// The byte slice must contain a valid serialized Bloom filter in the
+// DataSketches binary format. This format is compatible with C++ and Java
+// implementations.
+//
+// Returns an error if the data is invalid or corrupted.
+func NewBloomFilterFromSlice(bytes []byte) (BloomFilter, error) {
+       // Validate minimum size
+       if len(bytes) < preambleEmptyBytes {
+               return nil, fmt.Errorf("insufficient data: need at least %d 
bytes, got %d", preambleEmptyBytes, len(bytes))
+       }
+
+       // Extract and validate preamble fields
+       pLongs := extractPreambleLongs(bytes)
+       sVer := extractSerVer(bytes)
+       fID := extractFamilyID(bytes)
+       flags := extractFlags(bytes)
+       numHashes := extractNumHashes(bytes)
+       seed := extractSeed(bytes)
+       bitArrayLength := extractBitArrayLength(bytes)
+
+       // Validate serialization version
+       if sVer != serVer {
+               return nil, fmt.Errorf("unsupported serialization version: %d 
(expected %d)", sVer, serVer)
+       }
+
+       // Validate family ID
+       if fID != familyID {
+               return nil, fmt.Errorf("invalid family ID: %d (expected %d)", 
fID, familyID)
+       }
+
+       // Validate preamble longs
+       isEmpty := isEmptyFlag(flags)
+       expectedPreambleLongs := uint8(preambleLongsEmpty)
+       if !isEmpty {
+               expectedPreambleLongs = uint8(preambleLongsStandard)
+       }
+       if pLongs != expectedPreambleLongs {
+               return nil, fmt.Errorf("invalid preamble longs: %d (expected %d 
for empty=%v)", pLongs, expectedPreambleLongs, isEmpty)
+       }
+
+       // Validate numHashes
+       if numHashes == 0 {
+               return nil, fmt.Errorf("numHashes must be positive")
+       }
+
+       // Calculate capacity
+       capacityBits := uint64(bitArrayLength) * 64
+
+       // Create filter structure
+       bf := &bloomFilterImpl{
+               seed:         seed,
+               numHashes:    numHashes,
+               isDirty:      false,
+               capacityBits: capacityBits,
+               numBitsSet:   0,
+               bitArray:     make([]uint64, bitArrayLength),
+       }
+
+       // Handle empty filter
+       if isEmpty {
+               if len(bytes) != preambleEmptyBytes {
+                       return nil, fmt.Errorf("empty filter size mismatch: got 
%d bytes, expected %d", len(bytes), preambleEmptyBytes)
+               }
+               return bf, nil
+       }
+
+       // Handle non-empty filter
+       expectedSize := preambleBytes + int(bitArrayLength)*8
+       if len(bytes) != expectedSize {
+               return nil, fmt.Errorf("non-empty filter size mismatch: got %d 
bytes, expected %d", len(bytes), expectedSize)
+       }
+
+       // Extract num bits set
+       numBitsSet := extractNumBitsSet(bytes)
+       if numBitsSet == dirtyBitsValue {
+               // Need to recount
+               bf.isDirty = true
+       } else {
+               bf.numBitsSet = numBitsSet
+       }
+
+       // Read bit array
+       for i := 0; i < int(bitArrayLength); i++ {
+               offset := bitArrayOffset + i*8
+               bf.bitArray[i] = binary.LittleEndian.Uint64(bytes[offset:])
+       }
+
+       // Recount if dirty
+       if bf.isDirty {
+               bf.numBitsSet = countBitsSet(bf.bitArray)
+               bf.isDirty = false
+       }
+
+       return bf, nil
+}
+
+// roundCapacity rounds the number of bits up to the nearest multiple of 64.
+func roundCapacity(numBits uint64) uint64 {
+       return (numBits + 63) & ^uint64(63)
+}
diff --git a/filters/bloom_filter_serialization_test.go 
b/filters/bloom_filter_serialization_test.go
new file mode 100644
index 0000000..3e82fe6
--- /dev/null
+++ b/filters/bloom_filter_serialization_test.go
@@ -0,0 +1,534 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package filters
+
+import (
+       "crypto/md5"
+       "fmt"
+       "math"
+       "os"
+       "testing"
+
+       "github.com/apache/datasketches-go/internal"
+       "github.com/stretchr/testify/assert"
+)
+
+func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) {
+
+}
+
+func TestGoCompat(t *testing.T) {
+       if len(os.Getenv(internal.DSketchTestGenerateGo)) == 0 {
+               t.Skipf("%s not set", internal.DSketchTestGenerateGo)
+       }
+
+       err := os.MkdirAll(internal.GoPath, os.ModePerm)
+       assert.NoError(t, err)
+
+       t.Run("bloom filter", func(t *testing.T) {
+               testCases := []struct {
+                       n         int
+                       numHashes uint16
+               }{
+                       {0, 3},
+                       {0, 5},
+                       {10000, 3},
+                       {10000, 5},
+                       {2000000, 3},
+                       {2000000, 5},
+               }
+
+               for _, tc := range testCases {
+                       // Match Java's configuration: configBits = max(n, 1000)
+                       configBits := uint64(tc.n)
+                       if configBits < 1000 {
+                               configBits = 1000
+                       }
+
+                       // Generate random seed to match Java's approach
+                       seed, err := GenerateRandomSeed()
+                       assert.NoError(t, err)
+
+                       // Create filter
+                       bf, err := NewBloomFilterBySize(configBits, 
tc.numHashes, WithSeed(seed))
+                       assert.NoError(t, err)
+
+                       // Insert items: n/10 items (0 to n/10-1), matching Java
+                       numInserts := tc.n / 10
+                       for i := 0; i < numInserts; i++ {
+                               err = bf.UpdateInt64(int64(i))
+                               assert.NoError(t, err)
+                       }
+
+                       // If non-empty, also insert NaN (matching Java)
+                       if tc.n > 0 {
+                               err = bf.UpdateFloat64(math.NaN())
+                               assert.NoError(t, err)
+                       }
+
+                       // Verify state
+                       assert.Equal(t, tc.n == 0, bf.IsEmpty())
+                       if !bf.IsEmpty() {
+                               assert.Greater(t, bf.GetBitsUsed(), 
uint64(numInserts))
+                       }
+
+                       // Serialize
+                       data, err := bf.ToCompactSlice()
+                       assert.NoError(t, err)
+
+                       // Write to file
+                       filename := fmt.Sprintf("%s/bf_n%d_h%d_go.sk", 
internal.GoPath, tc.n, tc.numHashes)
+                       err = os.WriteFile(filename, data, 0644)
+                       assert.NoError(t, err)
+               }
+       })
+
+       t.Run("Specific Go", func(t *testing.T) {
+               n := 10000
+               numInserts := n / 10
+               numHashes := uint16(3)
+               configBits := uint64(n)
+
+               // Generate for string type
+               t.Run("string_type", func(t *testing.T) {
+                       seed, _ := GenerateRandomSeed()
+                       bf, _ := NewBloomFilterBySize(configBits, numHashes, 
WithSeed(seed))
+
+                       for i := 0; i < numInserts; i++ {
+                               bf.UpdateString(fmt.Sprintf("%d", i))
+                       }
+
+                       data, _ := bf.ToCompactSlice()
+                       filename := fmt.Sprintf("%s/bf_string_n%d_h%d_go.sk", 
internal.GoPath, n, numHashes)
+                       os.WriteFile(filename, data, 0644)
+                       t.Logf("Generated: %s", filename)
+               })
+
+               // Generate for double type
+               t.Run("double_type", func(t *testing.T) {
+                       seed, _ := GenerateRandomSeed()
+                       bf, _ := NewBloomFilterBySize(configBits, numHashes, 
WithSeed(seed))
+
+                       for i := 0; i < numInserts; i++ {
+                               bf.UpdateFloat64(float64(i))
+                       }
+
+                       data, _ := bf.ToCompactSlice()
+                       filename := fmt.Sprintf("%s/bf_double_n%d_h%d_go.sk", 
internal.GoPath, n, numHashes)
+                       os.WriteFile(filename, data, 0644)
+                       t.Logf("Generated: %s", filename)
+               })
+
+               // Generate for long array type
+               t.Run("long_array_type", func(t *testing.T) {
+                       seed, _ := GenerateRandomSeed()
+                       bf, _ := NewBloomFilterBySize(configBits, numHashes, 
WithSeed(seed))
+
+                       for i := 0; i < numInserts; i++ {
+                               arr := []int64{int64(i), int64(i)}
+                               bf.UpdateInt64Array(arr)
+                       }
+
+                       data, _ := bf.ToCompactSlice()
+                       filename := 
fmt.Sprintf("%s/bf_long_array_n%d_h%d_go.sk", internal.GoPath, n, numHashes)
+                       os.WriteFile(filename, data, 0644)
+                       t.Logf("Generated: %s", filename)
+               })
+
+               // Generate for double array type
+               t.Run("double_array_type", func(t *testing.T) {
+                       seed, _ := GenerateRandomSeed()
+                       bf, _ := NewBloomFilterBySize(configBits, numHashes, 
WithSeed(seed))
+
+                       for i := 0; i < numInserts; i++ {
+                               arr := []float64{float64(i), float64(i)}
+                               bf.UpdateFloat64Array(arr)
+                       }
+
+                       data, _ := bf.ToCompactSlice()
+                       filename := 
fmt.Sprintf("%s/bf_double_array_n%d_h%d_go.sk", internal.GoPath, n, numHashes)
+                       os.WriteFile(filename, data, 0644)
+                       t.Logf("Generated: %s", filename)
+               })
+
+               // Generate for byte array type
+               t.Run("byte_array_type", func(t *testing.T) {
+                       seed, _ := GenerateRandomSeed()
+                       bf, _ := NewBloomFilterBySize(configBits, numHashes, 
WithSeed(seed))
+
+                       for i := 0; i < numInserts; i++ {
+                               b := byte(i % 256)
+                               arr := []byte{b, b, b, b}
+                               bf.UpdateSlice(arr)
+                       }
+
+                       data, _ := bf.ToCompactSlice()
+                       filename := 
fmt.Sprintf("%s/bf_byte_array_n%d_h%d_go.sk", internal.GoPath, n, numHashes)
+                       os.WriteFile(filename, data, 0644)
+                       t.Logf("Generated: %s", filename)
+               })
+       })
+}
+
+func TestJavaCompat(t *testing.T) {
+       t.Run("bloom filter", func(t *testing.T) {
+               testCases := []struct {
+                       n         int
+                       numHashes uint16
+               }{
+                       {0, 3},
+                       {0, 5},
+                       {10000, 3},
+                       {10000, 5},
+                       {2000000, 3},
+                       {2000000, 5},
+               }
+
+               for _, tc := range testCases {
+                       b, err := 
os.ReadFile(fmt.Sprintf("%s/bf_n%d_h%d_java.sk", internal.JavaPath, tc.n, 
tc.numHashes))
+                       assert.NoError(t, err)
+
+                       bf, err := NewBloomFilterFromSlice(b)
+                       assert.NoError(t, err)
+
+                       // Verify basic properties
+                       assert.Equal(t, tc.n == 0, bf.IsEmpty())
+                       assert.Equal(t, tc.numHashes, bf.GetNumHashes())
+
+                       if tc.n > 0 {
+                               // Verify bits used is reasonable
+                               assert.Greater(t, bf.GetBitsUsed(), uint64(0))
+                               assert.Less(t, bf.GetBitsUsed(), 
bf.GetCapacity())
+
+                               // Java inserts n/10 items (0 to n/10-1)
+                               numInserted := tc.n / 10
+
+                               // Verify ALL inserted items are found (no 
false negatives!)
+                               for i := 0; i < numInserted; i++ {
+                                       assert.True(t, bf.QueryInt64(int64(i)),
+                                               "Item %d should be found in 
bf_n%d_h%d_java.sk", i, tc.n, tc.numHashes)
+                               }
+
+                               // Verify NaN is found
+                               assert.True(t, bf.QueryFloat64(math.NaN()),
+                                       "NaN should be found in 
bf_n%d_h%d_java.sk", tc.n, tc.numHashes)
+
+                               // Negative test: verify false positive 
behavior is reasonable
+                               // Test items that were definitely NOT inserted
+                               negativeTestItems := []int64{-1, -100, 
int64(numInserted), int64(numInserted + 1), int64(numInserted * 2)}
+                               foundNegativeCount := 0
+                               for _, item := range negativeTestItems {
+                                       if bf.QueryInt64(item) {
+                                               foundNegativeCount++
+                                       }
+                               }
+
+                               // Should not find ALL negative items (that 
would indicate a bug)
+                               assert.Less(t, foundNegativeCount, 
len(negativeTestItems),
+                                       "Should not find ALL non-inserted items 
(found %d/%d)", foundNegativeCount, len(negativeTestItems))
+
+                               // Test false positive rate on a larger sample
+                               falsePositiveCount := 0
+                               testRange := 1000
+                               startNegative := int64(numInserted + 1000)
+                               for i := int64(0); i < int64(testRange); i++ {
+                                       if bf.QueryInt64(startNegative + i) {
+                                               falsePositiveCount++
+                                       }
+                               }
+
+                               fppRate := float64(falsePositiveCount) / 
float64(testRange)
+                               assert.Less(t, fppRate, 0.5,
+                                       "False positive rate should be 
reasonable, got %.2f%%", fppRate*100)
+                       }
+               }
+       })
+       t.Run("Specific Java", func(t *testing.T) {
+               n := 10000
+               numInserts := n / 10 // 1000 items
+               numHashes := uint16(3)
+
+               testCases := []struct {
+                       name      string
+                       filename  string
+                       testItems func(bf BloomFilter) // Function to test items
+               }{
+                       {
+                               name:     "long_type",
+                               filename: "bf_n10000_h3_java.sk", // Already 
exists - standard test
+                               testItems: func(bf BloomFilter) {
+                                       // Java inserted integers 0-999
+                                       for i := 0; i < numInserts; i++ {
+                                               assert.True(t, 
bf.QueryInt64(int64(i)),
+                                                       "Should find int64 
value %d", i)
+                                       }
+                               },
+                       },
+                       {
+                               name:     "string_type",
+                               filename: "bf_string_n10000_h3_java.sk",
+                               testItems: func(bf BloomFilter) {
+                                       // Java inserted strings "0", "1", ..., 
"999"
+                                       for i := 0; i < numInserts; i++ {
+                                               assert.True(t, 
bf.QueryString(fmt.Sprintf("%d", i)),
+                                                       "Should find string 
'%d'", i)
+                                       }
+                               },
+                       },
+                       {
+                               name:     "double_type",
+                               filename: "bf_double_n10000_h3_java.sk",
+                               testItems: func(bf BloomFilter) {
+                                       // Java inserted doubles 0.0, 1.0, ..., 
999.0
+                                       for i := 0; i < numInserts; i++ {
+                                               assert.True(t, 
bf.QueryFloat64(float64(i)),
+                                                       "Should find double 
%f", float64(i))
+                                       }
+                               },
+                       },
+                       {
+                               name:     "long_array_type",
+                               filename: "bf_long_array_n10000_h3_java.sk",
+                               testItems: func(bf BloomFilter) {
+                                       // Java inserted long arrays [0,0], 
[1,1], ..., [999,999]
+                                       for i := 0; i < numInserts; i++ {
+                                               arr := []int64{int64(i), 
int64(i)}
+                                               assert.True(t, 
bf.QueryInt64Array(arr),
+                                                       "Should find long array 
[%d,%d]", i, i)
+                                       }
+                               },
+                       },
+                       {
+                               name:     "double_array_type",
+                               filename: "bf_double_array_n10000_h3_java.sk",
+                               testItems: func(bf BloomFilter) {
+                                       // Java inserted double arrays 
[0.0,0.0], [1.0,1.0], ..., [999.0,999.0]
+                                       for i := 0; i < numInserts; i++ {
+                                               arr := []float64{float64(i), 
float64(i)}
+                                               assert.True(t, 
bf.QueryFloat64Array(arr),
+                                                       "Should find double 
array [%f,%f]", float64(i), float64(i))
+                                       }
+                               },
+                       },
+                       {
+                               name:     "byte_array_type",
+                               filename: "bf_byte_array_n10000_h3_java.sk",
+                               testItems: func(bf BloomFilter) {
+                                       // Java inserted byte arrays [i,i,i,i] 
for i=0-999 (mod 256)
+                                       for i := 0; i < numInserts; i++ {
+                                               b := byte(i % 256)
+                                               arr := []byte{b, b, b, b}
+                                               assert.True(t, 
bf.QuerySlice(arr),
+                                                       "Should find byte array 
[%d,%d,%d,%d]", b, b, b, b)
+                                       }
+                               },
+                       },
+               }
+
+               for _, tc := range testCases {
+                       t.Run(tc.name, func(t *testing.T) {
+                               filepath := fmt.Sprintf("%s/%s", 
internal.JavaPath, tc.filename)
+
+                               // Skip if file doesn't exist
+                               if _, err := os.Stat(filepath); 
os.IsNotExist(err) {
+                                       t.Skipf("Java file not found: %s (needs 
to be generated)", tc.filename)
+                                       return
+                               }
+
+                               // Read Java file
+                               data, err := os.ReadFile(filepath)
+                               assert.NoError(t, err)
+
+                               // Deserialize
+                               bf, err := NewBloomFilterFromSlice(data)
+                               assert.NoError(t, err)
+                               assert.False(t, bf.IsEmpty())
+                               assert.Equal(t, numHashes, bf.GetNumHashes())
+
+                               // Test all items of this type
+                               tc.testItems(bf)
+
+                               // Compute MD5 for reference
+                               hash := md5.Sum(data)
+                               t.Logf("✅ Java %s: %d items verified, MD5=%x", 
tc.name, numInserts, hash)
+                       })
+               }
+       })
+}
+
+func TestCPPCompat(t *testing.T) {
+       t.Run("bloom filter", func(t *testing.T) {
+               testCases := []struct {
+                       n         int
+                       numHashes uint16
+               }{
+                       {0, 3},
+                       {0, 5},
+                       {10000, 3},
+                       {10000, 5},
+                       {2000000, 3},
+                       {2000000, 5},
+               }
+
+               for _, tc := range testCases {
+                       filename := fmt.Sprintf("%s/bf_n%d_h%d_cpp.sk", 
internal.CppPath, tc.n, tc.numHashes)
+
+                       // Skip if file doesn't exist
+                       if _, err := os.Stat(filename); os.IsNotExist(err) {
+                               t.Skipf("C++ file not found: %s", filename)
+                               return
+                       }
+
+                       b, err := os.ReadFile(filename)
+                       assert.NoError(t, err)
+
+                       bf, err := NewBloomFilterFromSlice(b)
+                       assert.NoError(t, err)
+
+                       // Verify basic properties
+                       assert.Equal(t, tc.n == 0, bf.IsEmpty())
+                       assert.Equal(t, tc.numHashes, bf.GetNumHashes())
+
+                       if tc.n > 0 {
+                               // Verify bits used is reasonable
+                               assert.Greater(t, bf.GetBitsUsed(), uint64(0))
+                               assert.Less(t, bf.GetBitsUsed(), 
bf.GetCapacity())
+
+                               // C++ inserts n/10 items (0 to n/10-1)
+                               numInserted := tc.n / 10
+
+                               // Verify ALL inserted items are found (no 
false negatives!)
+                               for i := 0; i < numInserted; i++ {
+                                       assert.True(t, bf.QueryInt64(int64(i)),
+                                               "Item %d should be found in 
bf_n%d_h%d_cpp.sk", i, tc.n, tc.numHashes)
+                               }
+
+                               // Verify NaN is found
+                               assert.True(t, bf.QueryFloat64(math.NaN()),
+                                       "NaN should be found in 
bf_n%d_h%d_cpp.sk", tc.n, tc.numHashes)
+
+                               // Negative test: verify false positive 
behavior is reasonable
+                               // Test items that were definitely NOT inserted
+                               negativeTestItems := []int64{-1, -100, 
int64(numInserted), int64(numInserted + 1), int64(numInserted * 2)}
+                               foundNegativeCount := 0
+                               for _, item := range negativeTestItems {
+                                       if bf.QueryInt64(item) {
+                                               foundNegativeCount++
+                                       }
+                               }
+
+                               // Should not find ALL negative items (that 
would indicate a bug)
+                               assert.Less(t, foundNegativeCount, 
len(negativeTestItems),
+                                       "Should not find ALL non-inserted items 
(found %d/%d)", foundNegativeCount, len(negativeTestItems))
+
+                               // Test false positive rate on a larger sample
+                               falsePositiveCount := 0
+                               testRange := 1000
+                               startNegative := int64(numInserted + 1000)
+                               for i := int64(0); i < int64(testRange); i++ {
+                                       if bf.QueryInt64(startNegative + i) {
+                                               falsePositiveCount++
+                                       }
+                               }
+
+                               fppRate := float64(falsePositiveCount) / 
float64(testRange)
+                               assert.Less(t, fppRate, 0.5,
+                                       "False positive rate should be 
reasonable, got %.2f%%", fppRate*100)
+                       }
+               }
+       })
+
+       t.Run("Specific Cpp", func(t *testing.T) {
+               n := 10000
+               numInserts := n / 10 // 1000 items
+               numHashes := uint16(3)
+
+               testCases := []struct {
+                       name      string
+                       filename  string
+                       testItems func(bf BloomFilter)
+               }{
+                       {
+                               name:     "long_type",
+                               filename: "bf_n10000_h3_cpp.sk", // Standard 
test
+                               testItems: func(bf BloomFilter) {
+                                       for i := 0; i < numInserts; i++ {
+                                               assert.True(t, 
bf.QueryInt64(int64(i)))
+                                       }
+                               },
+                       },
+                       {
+                               name:     "string_type",
+                               filename: "bf_string_n10000_h3_cpp.sk",
+                               testItems: func(bf BloomFilter) {
+                                       for i := 0; i < numInserts; i++ {
+                                               assert.True(t, 
bf.QueryString(fmt.Sprintf("%d", i)))
+                                       }
+                               },
+                       },
+                       {
+                               name:     "double_type",
+                               filename: "bf_double_n10000_h3_cpp.sk",
+                               testItems: func(bf BloomFilter) {
+                                       for i := 0; i < numInserts; i++ {
+                                               assert.True(t, 
bf.QueryFloat64(float64(i)))
+                                       }
+                               },
+                       },
+                       {
+                               name:     "byte_array_type",
+                               filename: "bf_byte_array_n10000_h3_cpp.sk",
+                               testItems: func(bf BloomFilter) {
+                                       for i := 0; i < numInserts; i++ {
+                                               b := byte(i % 256)
+                                               arr := []byte{b, b, b, b}
+                                               assert.True(t, 
bf.QuerySlice(arr))
+                                       }
+                               },
+                       },
+               }
+
+               for _, tc := range testCases {
+                       t.Run(tc.name, func(t *testing.T) {
+                               filepath := fmt.Sprintf("%s/%s", 
internal.CppPath, tc.filename)
+
+                               // Skip if file doesn't exist
+                               if _, err := os.Stat(filepath); 
os.IsNotExist(err) {
+                                       t.Skipf("C++ file not found: %s (needs 
to be generated)", tc.filename)
+                                       return
+                               }
+
+                               // Read C++ file
+                               data, err := os.ReadFile(filepath)
+                               assert.NoError(t, err)
+
+                               // Deserialize
+                               bf, err := NewBloomFilterFromSlice(data)
+                               assert.NoError(t, err)
+                               assert.False(t, bf.IsEmpty())
+                               assert.Equal(t, numHashes, bf.GetNumHashes())
+
+                               // Test all items
+                               tc.testItems(bf)
+
+                               hash := md5.Sum(data)
+                               t.Logf("✅ C++ %s: %d items verified, MD5=%x", 
tc.name, numInserts, hash)
+                       })
+               }
+       })
+}
diff --git a/filters/bloom_filter_test.go b/filters/bloom_filter_test.go
new file mode 100644
index 0000000..97ac50c
--- /dev/null
+++ b/filters/bloom_filter_test.go
@@ -0,0 +1,804 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package filters
+
+import (
+       "encoding/binary"
+       "math"
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestInvalidConstructorArguments(t *testing.T) {
+       // numBits = 0
+       _, err := NewBloomFilterBySize(0, 3)
+       assert.Error(t, err)
+
+       // numHashes = 0
+       _, err = NewBloomFilterBySize(64, 0)
+       assert.Error(t, err)
+
+       // numBits too large (overflow)
+       _, err = NewBloomFilterBySize(1<<60, 3)
+       assert.Error(t, err)
+
+       // Invalid FPP
+       _, err = NewBloomFilterByAccuracy(1000, 0.0)
+       assert.Error(t, err)
+
+       _, err = NewBloomFilterByAccuracy(1000, 1.0)
+       assert.Error(t, err)
+
+       // Zero max items
+       _, err = NewBloomFilterByAccuracy(0, 0.01)
+       assert.Error(t, err)
+}
+
+func TestStandardConstructors(t *testing.T) {
+       numItems := uint64(5000)
+       targetFpp := 0.01
+
+       // Create by accuracy
+       bf1, err := NewBloomFilterByAccuracy(numItems, targetFpp)
+       assert.NoError(t, err)
+       assert.NotNil(t, bf1)
+
+       // Verify capacity is rounded to multiple of 64
+       capacity1 := bf1.GetCapacity()
+       assert.Equal(t, uint64(0), capacity1%64)
+
+       // Create by size with same parameters
+       numBits := SuggestNumFilterBits(numItems, targetFpp)
+       numHashes := SuggestNumHashesFromSize(numItems, numBits)
+       bf2, err := NewBloomFilterBySize(numBits, numHashes)
+       assert.NoError(t, err)
+
+       // Both should have same capacity and hash count
+       assert.Equal(t, bf1.GetCapacity(), bf2.GetCapacity())
+       assert.Equal(t, bf1.GetNumHashes(), bf2.GetNumHashes())
+
+       // Both should be empty
+       assert.True(t, bf1.IsEmpty())
+       assert.True(t, bf2.IsEmpty())
+       assert.Equal(t, uint64(0), bf1.GetBitsUsed())
+       assert.Equal(t, uint64(0), bf2.GetBitsUsed())
+}
+
+func TestBasicOperations(t *testing.T) {
+       numItems := uint64(5000)
+       targetFpp := 0.01
+       seed := uint64(12345)
+
+       bf, err := NewBloomFilterByAccuracy(numItems, targetFpp, WithSeed(seed))
+       assert.NoError(t, err)
+       assert.Equal(t, seed, bf.GetSeed())
+
+       // Initially empty
+       assert.True(t, bf.IsEmpty())
+
+       // Insert items
+       for i := uint64(0); i < numItems; i++ {
+               err = bf.UpdateUInt64(i)
+               assert.NoError(t, err)
+       }
+
+       // No longer empty
+       assert.False(t, bf.IsEmpty())
+
+       // Check bits used is reasonable (should be around 50% for optimal 
parameters)
+       bitsUsed := bf.GetBitsUsed()
+       capacity := bf.GetCapacity()
+       utilizationPercent := float64(bitsUsed) * 100.0 / float64(capacity)
+       assert.Greater(t, utilizationPercent, 30.0)
+       assert.Less(t, utilizationPercent, 70.0)
+
+       // All inserted items should be found
+       for i := uint64(0); i < numItems; i++ {
+               assert.True(t, bf.QueryUInt64(i), "Item %d should be found", i)
+       }
+
+       // Count false positives on non-inserted items
+       falsePositives := 0
+       testSize := 10000
+       for i := numItems; i < numItems+uint64(testSize); i++ {
+               if bf.QueryUInt64(i) {
+                       falsePositives++
+               }
+       }
+
+       // False positive rate should be within reasonable bounds of target
+       actualFpp := float64(falsePositives) / float64(testSize)
+       // Allow up to 3x the target FPP (probabilistic structure)
+       assert.Less(t, actualFpp, targetFpp*3.0, "Actual FPP: %.4f, Target: 
%.4f", actualFpp, targetFpp)
+
+       // Test Reset
+       err = bf.Reset()
+       assert.NoError(t, err)
+       assert.True(t, bf.IsEmpty())
+       assert.Equal(t, uint64(0), bf.GetBitsUsed())
+}
+
+func TestInversion(t *testing.T) {
+       bf, err := NewBloomFilterBySize(256, 5, WithSeed(42))
+       assert.NoError(t, err)
+
+       // Insert some items
+       for i := uint64(0); i < 100; i++ {
+               bf.UpdateUInt64(i)
+       }
+
+       bitsUsedBefore := bf.GetBitsUsed()
+       capacity := bf.GetCapacity()
+
+       // Count items that appear present before inversion
+       presentBefore := 0
+       for i := uint64(0); i < 100; i++ {
+               if bf.QueryUInt64(i) {
+                       presentBefore++
+               }
+       }
+       assert.Equal(t, 100, presentBefore)
+
+       // Invert
+       err = bf.Invert()
+       assert.NoError(t, err)
+
+       // Bits used should be inverted
+       bitsUsedAfter := bf.GetBitsUsed()
+       assert.Equal(t, capacity-bitsUsedBefore, bitsUsedAfter)
+
+       // Most original items should now not be present
+       stillPresent := 0
+       for i := uint64(0); i < 100; i++ {
+               if bf.QueryUInt64(i) {
+                       stillPresent++
+               }
+       }
+       assert.Less(t, stillPresent, 10) // Very few should still appear present
+}
+
+func TestIncompatibleSetOperations(t *testing.T) {
+       bf1, _ := NewBloomFilterBySize(256, 5, WithSeed(42))
+
+       // Different num_bits
+       bf2, _ := NewBloomFilterBySize(512, 5, WithSeed(42))
+       assert.False(t, bf1.IsCompatible(bf2))
+       assert.Error(t, bf1.Union(bf2))
+       assert.Error(t, bf1.Intersect(bf2))
+
+       // Different num_hashes
+       bf3, _ := NewBloomFilterBySize(256, 7, WithSeed(42))
+       assert.False(t, bf1.IsCompatible(bf3))
+       assert.Error(t, bf1.Union(bf3))
+
+       // Different seed
+       bf4, _ := NewBloomFilterBySize(256, 5, WithSeed(99))
+       assert.False(t, bf1.IsCompatible(bf4))
+       assert.Error(t, bf1.Intersect(bf4))
+}
+
+func TestBasicUnion(t *testing.T) {
+       n := uint64(1000)
+       bf1, _ := NewBloomFilterBySize(12288, 4, WithSeed(123))
+       bf2, _ := NewBloomFilterBySize(12288, 4, WithSeed(123))
+
+       // bf1: items 0 to n-1
+       for i := uint64(0); i < n; i++ {
+               bf1.UpdateUInt64(i)
+       }
+
+       // bf2: items n/2 to 3n/2-1 (overlap in middle)
+       for i := n / 2; i < 3*n/2; i++ {
+               bf2.UpdateUInt64(i)
+       }
+
+       // Union bf2 into bf1
+       err := bf1.Union(bf2)
+       assert.NoError(t, err)
+
+       // All items from 0 to 3n/2-1 should be present
+       for i := uint64(0); i < 3*n/2; i++ {
+               assert.True(t, bf1.QueryUInt64(i), "Item %d should be in 
union", i)
+       }
+
+       // Count false positives on items beyond range
+       falsePositives := 0
+       testRange := 1000
+       for i := 3 * n / 2; i < 3*n/2+uint64(testRange); i++ {
+               if bf1.QueryUInt64(i) {
+                       falsePositives++
+               }
+       }
+
+       fppRate := float64(falsePositives) / float64(testRange)
+       assert.Less(t, fppRate, 0.20) // Should be reasonable
+}
+
+func TestBasicIntersection(t *testing.T) {
+       n := uint64(1000)
+       bf1, _ := NewBloomFilterBySize(12288, 4, WithSeed(456))
+       bf2, _ := NewBloomFilterBySize(12288, 4, WithSeed(456))
+
+       // bf1: items 0 to n-1
+       for i := uint64(0); i < n; i++ {
+               bf1.UpdateUInt64(i)
+       }
+
+       // bf2: items n/2 to 3n/2-1
+       for i := n / 2; i < 3*n/2; i++ {
+               bf2.UpdateUInt64(i)
+       }
+
+       // Intersect
+       err := bf1.Intersect(bf2)
+       assert.NoError(t, err)
+
+       // Items in overlap (n/2 to n-1) should be present
+       for i := n / 2; i < n; i++ {
+               assert.True(t, bf1.QueryUInt64(i), "Item %d should be in 
intersection", i)
+       }
+
+       // Items outside overlap should mostly not be present
+       presentOutside := 0
+       for i := uint64(0); i < n/2; i++ {
+               if bf1.QueryUInt64(i) {
+                       presentOutside++
+               }
+       }
+       // Allow some false positives
+       assert.Less(t, presentOutside, int(n/10))
+}
+
+func TestQueryAndUpdate(t *testing.T) {
+       bf, _ := NewBloomFilterBySize(256, 5, WithSeed(789))
+
+       // First call should return false (not present)
+       wasPresent, err := bf.QueryAndUpdateUInt64(42)
+       assert.NoError(t, err)
+       assert.False(t, wasPresent)
+
+       // Second call should return true (now present)
+       wasPresent, err = bf.QueryAndUpdateUInt64(42)
+       assert.NoError(t, err)
+       assert.True(t, wasPresent)
+
+       // Regular query should also return true
+       assert.True(t, bf.QueryUInt64(42))
+}
+
+func TestMultipleDataTypes(t *testing.T) {
+       bf, _ := NewBloomFilterBySize(512, 7)
+
+       // Test int64
+       bf.UpdateInt64(-123)
+       assert.True(t, bf.QueryInt64(-123))
+       assert.False(t, bf.QueryInt64(-124))
+
+       // Test string
+       bf.UpdateString("hello world")
+       assert.True(t, bf.QueryString("hello world"))
+       assert.False(t, bf.QueryString("hello"))
+
+       // Test byte slice
+       data := []byte{1, 2, 3, 4, 5}
+       bf.UpdateSlice(data)
+       assert.True(t, bf.QuerySlice(data))
+       assert.False(t, bf.QuerySlice([]byte{1, 2, 3}))
+
+       // Test float64
+       bf.UpdateFloat64(3.14159)
+       assert.True(t, bf.QueryFloat64(3.14159))
+       assert.False(t, bf.QueryFloat64(2.71828))
+
+       // Test NaN handling (NaN should be canonicalized)
+       bf.UpdateFloat64(math.NaN())
+       assert.True(t, bf.QueryFloat64(math.NaN()))
+
+       // Test -0.0 and 0.0 are treated the same
+       bf.UpdateFloat64(0.0)
+       assert.True(t, bf.QueryFloat64(-0.0))
+       assert.True(t, bf.QueryFloat64(0.0))
+}
+
+func TestSerializationRoundtrip(t *testing.T) {
+       bf, _ := NewBloomFilterBySize(256, 5, WithSeed(999))
+
+       // Insert some items
+       for i := uint64(0); i < 50; i++ {
+               bf.UpdateUInt64(i)
+       }
+
+       // Serialize
+       bytes, err := bf.ToCompactSlice()
+       assert.NoError(t, err)
+       assert.NotNil(t, bytes)
+
+       // Verify we can deserialize and properties match
+       bf2, err := NewBloomFilterFromSlice(bytes)
+       assert.NoError(t, err)
+       assert.Equal(t, bf.GetSeed(), bf2.GetSeed())
+       assert.Equal(t, bf.GetNumHashes(), bf2.GetNumHashes())
+       assert.Equal(t, bf.GetCapacity(), bf2.GetCapacity())
+       assert.Equal(t, bf.GetBitsUsed(), bf2.GetBitsUsed())
+}
+
+func TestEmptySerializationFormat(t *testing.T) {
+       bf, _ := NewBloomFilterBySize(256, 5, WithSeed(111))
+
+       // Serialize empty filter
+       bytes, err := bf.ToCompactSlice()
+       assert.NoError(t, err)
+
+       // Empty filter should have shorter serialization (24 bytes)
+       assert.Equal(t, 24, len(bytes))
+
+       // Should be able to deserialize
+       bf2, err := NewBloomFilterFromSlice(bytes)
+       assert.NoError(t, err)
+       assert.True(t, bf2.IsEmpty())
+       assert.Equal(t, bf.GetSeed(), bf2.GetSeed())
+       assert.Equal(t, bf.GetNumHashes(), bf2.GetNumHashes())
+}
+
+func TestSuggestFunctions(t *testing.T) {
+       // Test SuggestNumFilterBits
+       bits := SuggestNumFilterBits(10000, 0.01)
+       assert.Greater(t, bits, uint64(0))
+
+       // For 10000 items at 1% FPP, should be around 95850 bits
+       assert.InDelta(t, 95850, bits, 1000)
+
+       // Test SuggestNumHashes
+       hashes := SuggestNumHashes(0.01)
+       assert.Greater(t, hashes, uint16(0))
+       // For 1% FPP, should be around 7 hashes
+       assert.InDelta(t, 7, hashes, 1)
+
+       // Test SuggestNumHashesFromSize
+       hashes2 := SuggestNumHashesFromSize(10000, bits)
+       assert.Greater(t, hashes2, uint16(0))
+       // Should also be around 7
+       assert.InDelta(t, 7, hashes2, 1)
+}
+
+func TestCapacityRounding(t *testing.T) {
+       // Test that capacity is always rounded to multiple of 64
+       testCases := []struct {
+               input    uint64
+               expected uint64
+       }{
+               {1, 64},
+               {63, 64},
+               {64, 64},
+               {65, 128},
+               {127, 128},
+               {128, 128},
+               {129, 192},
+               {1000, 1024},
+       }
+
+       for _, tc := range testCases {
+               result := roundCapacity(tc.input)
+               assert.Equal(t, tc.expected, result, "roundCapacity(%d) should 
be %d", tc.input, tc.expected)
+               assert.Equal(t, uint64(0), result%64, "Result should be 
multiple of 64")
+       }
+}
+
+func TestDeserializationRoundtrip(t *testing.T) {
+       // Create and populate a filter
+       bf1, err := NewBloomFilterBySize(512, 7, WithSeed(12345))
+       assert.NoError(t, err)
+
+       for i := uint64(0); i < 100; i++ {
+               bf1.UpdateUInt64(i)
+       }
+
+       // Serialize
+       bytes, err := bf1.ToCompactSlice()
+       assert.NoError(t, err)
+
+       // Deserialize
+       bf2, err := NewBloomFilterFromSlice(bytes)
+       assert.NoError(t, err)
+
+       // Verify properties match
+       assert.Equal(t, bf1.GetSeed(), bf2.GetSeed())
+       assert.Equal(t, bf1.GetNumHashes(), bf2.GetNumHashes())
+       assert.Equal(t, bf1.GetCapacity(), bf2.GetCapacity())
+       assert.Equal(t, bf1.GetBitsUsed(), bf2.GetBitsUsed())
+       assert.Equal(t, bf1.IsEmpty(), bf2.IsEmpty())
+
+       // Verify all inserted items are found
+       for i := uint64(0); i < 100; i++ {
+               assert.True(t, bf2.QueryUInt64(i), "Item %d should be found 
after deserialization", i)
+       }
+
+       // Verify queries match
+       for i := uint64(100); i < 200; i++ {
+               assert.Equal(t, bf1.QueryUInt64(i), bf2.QueryUInt64(i), "Query 
results should match for item %d", i)
+       }
+}
+
+func TestDeserializeEmptyFilter(t *testing.T) {
+       // Create empty filter
+       bf1, err := NewBloomFilterBySize(256, 5, WithSeed(999))
+       assert.NoError(t, err)
+
+       // Serialize
+       bytes, err := bf1.ToCompactSlice()
+       assert.NoError(t, err)
+       assert.Equal(t, 24, len(bytes)) // Empty filter is 24 bytes
+
+       // Deserialize
+       bf2, err := NewBloomFilterFromSlice(bytes)
+       assert.NoError(t, err)
+
+       // Verify properties
+       assert.True(t, bf2.IsEmpty())
+       assert.Equal(t, uint64(0), bf2.GetBitsUsed())
+       assert.Equal(t, bf1.GetSeed(), bf2.GetSeed())
+       assert.Equal(t, bf1.GetNumHashes(), bf2.GetNumHashes())
+       assert.Equal(t, bf1.GetCapacity(), bf2.GetCapacity())
+}
+
+func TestDeserializeInvalidData(t *testing.T) {
+       // Too small
+       _, err := NewBloomFilterFromSlice([]byte{1, 2, 3})
+       assert.Error(t, err)
+       assert.Contains(t, err.Error(), "insufficient data")
+
+       // Wrong serialization version
+       bytes := make([]byte, 24)
+       insertPreambleLongs(bytes, preambleLongsEmpty)
+       insertSerVer(bytes)
+       bytes[serVerOffset] = 99 // Invalid version
+       insertFamilyID(bytes)
+       insertFlags(bytes, setEmptyFlag(0))
+       _, err = NewBloomFilterFromSlice(bytes)
+       assert.Error(t, err)
+       assert.Contains(t, err.Error(), "unsupported serialization version")
+
+       // Wrong family ID
+       bytes = make([]byte, 24)
+       insertPreambleLongs(bytes, preambleLongsEmpty)
+       insertSerVer(bytes)
+       bytes[familyIDOffset] = 99 // Invalid family
+       insertFlags(bytes, setEmptyFlag(0))
+       _, err = NewBloomFilterFromSlice(bytes)
+       assert.Error(t, err)
+       assert.Contains(t, err.Error(), "invalid family ID")
+
+       // Zero numHashes
+       bytes = make([]byte, 24)
+       insertPreambleLongs(bytes, preambleLongsEmpty)
+       insertSerVer(bytes)
+       insertFamilyID(bytes)
+       insertFlags(bytes, setEmptyFlag(0))
+       insertNumHashes(bytes, 0)
+       _, err = NewBloomFilterFromSlice(bytes)
+       assert.Error(t, err)
+       assert.Contains(t, err.Error(), "numHashes must be positive")
+}
+
+func TestDeserializeWithDirtyBits(t *testing.T) {
+       // Create and populate a filter
+       bf1, err := NewBloomFilterBySize(256, 5, WithSeed(777))
+       assert.NoError(t, err)
+
+       for i := uint64(0); i < 50; i++ {
+               bf1.UpdateUInt64(i)
+       }
+
+       // Serialize
+       bytes, err := bf1.ToCompactSlice()
+       assert.NoError(t, err)
+
+       // Manually set numBitsSet to dirty value (0xFFFFFFFFFFFFFFFF)
+       insertNumBitsSet(bytes, dirtyBitsValue)
+
+       // Deserialize - should recount bits
+       bf2, err := NewBloomFilterFromSlice(bytes)
+       assert.NoError(t, err)
+
+       // Should have correct bit count (recalculated)
+       assert.Equal(t, bf1.GetBitsUsed(), bf2.GetBitsUsed())
+       assert.Greater(t, bf2.GetBitsUsed(), uint64(0))
+}
+
+func TestSerializeDeserializeConsistency(t *testing.T) {
+       // Test multiple serialize-deserialize cycles
+       bf, err := NewBloomFilterBySize(512, 7, WithSeed(42))
+       assert.NoError(t, err)
+
+       for i := uint64(0); i < 100; i++ {
+               bf.UpdateUInt64(i)
+       }
+
+       // First cycle
+       bytes1, err := bf.ToCompactSlice()
+       assert.NoError(t, err)
+
+       bf2, err := NewBloomFilterFromSlice(bytes1)
+       assert.NoError(t, err)
+
+       // Second cycle
+       bytes2, err := bf2.ToCompactSlice()
+       assert.NoError(t, err)
+
+       // Bytes should be identical
+       assert.Equal(t, bytes1, bytes2)
+
+       // Third cycle
+       bf3, err := NewBloomFilterFromSlice(bytes2)
+       assert.NoError(t, err)
+
+       bytes3, err := bf3.ToCompactSlice()
+       assert.NoError(t, err)
+
+       // Still identical
+       assert.Equal(t, bytes1, bytes3)
+}
+
+func TestHashFunctionConsistency(t *testing.T) {
+       // Test that hash function produces consistent results
+       seed := uint64(12345)
+       bf, err := NewBloomFilterBySize(1024, 5, WithSeed(seed))
+       assert.NoError(t, err)
+
+       // Insert an item
+       err = bf.UpdateInt64(42)
+       assert.NoError(t, err)
+
+       // Query it back immediately
+       assert.True(t, bf.QueryInt64(42), "Item should be found immediately 
after insertion")
+
+       // Serialize and deserialize
+       bytes, err := bf.ToCompactSlice()
+       assert.NoError(t, err)
+
+       bf2, err := NewBloomFilterFromSlice(bytes)
+       assert.NoError(t, err)
+
+       // Still found after deserialization
+       assert.True(t, bf2.QueryInt64(42), "Item should be found after 
deserialization")
+
+       // Create a new filter with the same seed
+       bf3, err := NewBloomFilterBySize(1024, 5, WithSeed(seed))
+       assert.NoError(t, err)
+       bf3.UpdateInt64(42)
+
+       // Should produce identical serialization
+       bytes3, err := bf3.ToCompactSlice()
+       assert.NoError(t, err)
+       assert.Equal(t, bytes, bytes3, "Same seed and items should produce 
identical binary output")
+}
+
+func TestArrayMethods(t *testing.T) {
+       seed := uint64(99999)
+
+       // Test int64 arrays
+       intArray := []int64{1, 2, 3, 4, 5}
+       bf1, _ := NewBloomFilterBySize(512, 5, WithSeed(seed))
+
+       // Update with array
+       err := bf1.UpdateInt64Array(intArray)
+       assert.NoError(t, err)
+       assert.False(t, bf1.IsEmpty())
+
+       // Query array should return true
+       assert.True(t, bf1.QueryInt64Array(intArray), "Should find array after 
update")
+
+       // QueryAndUpdate should return true (already present)
+       wasPresent, err := bf1.QueryAndUpdateInt64Array(intArray)
+       assert.NoError(t, err)
+       assert.True(t, wasPresent, "Array should be present")
+
+       // Different array should not be found
+       differentArray := []int64{10, 20, 30}
+       assert.False(t, bf1.QueryInt64Array(differentArray), "Different array 
should not be found")
+
+       // Test float64 arrays
+       floatArray := []float64{1.1, 2.2, 3.3}
+       bf2, _ := NewBloomFilterBySize(512, 5, WithSeed(seed))
+
+       err = bf2.UpdateFloat64Array(floatArray)
+       assert.NoError(t, err)
+
+       assert.True(t, bf2.QueryFloat64Array(floatArray), "Should find float 
array after update")
+
+       wasPresent, err = bf2.QueryAndUpdateFloat64Array(floatArray)
+       assert.NoError(t, err)
+       assert.True(t, wasPresent)
+
+       // Empty arrays should be no-op
+       bf3, _ := NewBloomFilterBySize(512, 5, WithSeed(seed))
+       err = bf3.UpdateInt64Array([]int64{})
+       assert.NoError(t, err)
+       assert.True(t, bf3.IsEmpty(), "Empty array update should not change 
filter")
+
+       err = bf3.UpdateFloat64Array([]float64{})
+       assert.NoError(t, err)
+       assert.True(t, bf3.IsEmpty(), "Empty array update should not change 
filter")
+
+       // Nil arrays should be no-op
+       err = bf3.UpdateInt64Array(nil)
+       assert.NoError(t, err)
+       assert.True(t, bf3.IsEmpty(), "Nil array update should not change 
filter")
+}
+
+// TestBasicUpdateMethods replicates Java's testBasicUpdateMethods
+func TestBasicUpdateMethods(t *testing.T) {
+       numDistinct := uint64(100)
+       fpp := 1e-6
+       bf, err := NewBloomFilterByAccuracy(numDistinct, fpp)
+       assert.NoError(t, err)
+
+       // Empty string should do nothing (no update)
+       err = bf.UpdateString("")
+       assert.NoError(t, err)
+       // Querying empty string should return false (not inserted)
+       wasPresent, err := bf.QueryAndUpdateString("")
+       assert.NoError(t, err)
+       assert.False(t, wasPresent)
+       assert.Equal(t, uint64(0), bf.GetBitsUsed())
+
+       // Update with non-empty string
+       err = bf.UpdateString("abc")
+       assert.NoError(t, err)
+
+       // Query different string (should not be present)
+       wasPresent, err = bf.QueryAndUpdateString("def")
+       assert.NoError(t, err)
+       assert.False(t, wasPresent)
+
+       // Update with int
+       err = bf.UpdateInt64(932)
+       assert.NoError(t, err)
+
+       // Query different int (should not be present)
+       wasPresent, err = bf.QueryAndUpdateInt64(543)
+       assert.NoError(t, err)
+       assert.False(t, wasPresent)
+
+       // Update with NaN
+       err = bf.UpdateFloat64(math.NaN())
+       assert.NoError(t, err)
+
+       // Query positive infinity (should not be present)
+       wasPresent, err = bf.QueryAndUpdateFloat64(math.Inf(1))
+       assert.NoError(t, err)
+       assert.False(t, wasPresent)
+
+       // Bits used should be reasonable (at most numHashes * 6 for 6 distinct 
updates)
+       assert.LessOrEqual(t, bf.GetBitsUsed(), uint64(bf.GetNumHashes())*6)
+       assert.False(t, bf.IsEmpty())
+}
+
+// TestArrayUpdateMethods replicates Java's testArrayUpdateMethods
+func TestArrayUpdateMethods(t *testing.T) {
+       // Test data: 3 doubles = 24 bytes
+       rawData := []float64{1.414, 2.71, 3.1415926538}
+
+       numDistinct := uint64(100)
+       fpp := 1e-6
+
+       // Test UpdateFloat64Array (matches Java's update(double[]))
+       bfDoubles, err := NewBloomFilterByAccuracy(numDistinct, fpp)
+       assert.NoError(t, err)
+
+       err = bfDoubles.UpdateFloat64Array(rawData)
+       assert.NoError(t, err)
+
+       wasPresent, err := bfDoubles.QueryAndUpdateFloat64Array(rawData)
+       assert.NoError(t, err)
+       assert.True(t, wasPresent, "Should find array after update")
+
+       found := bfDoubles.QueryFloat64Array(rawData)
+       assert.True(t, found, "Query should return true")
+
+       numBitsSet := bfDoubles.GetBitsUsed()
+       seed := bfDoubles.GetSeed()
+
+       // Test with byte slice (matches Java's update(byte[]))
+       bytes := make([]byte, len(rawData)*8)
+       for i, val := range rawData {
+               bits := math.Float64bits(val)
+               binary.LittleEndian.PutUint64(bytes[i*8:], bits)
+       }
+
+       bfBytes, err := NewBloomFilterByAccuracy(numDistinct, fpp, 
WithSeed(seed))
+       assert.NoError(t, err)
+
+       err = bfBytes.UpdateSlice(bytes)
+       assert.NoError(t, err)
+
+       wasPresent, err = bfBytes.QueryAndUpdateSlice(bytes)
+       assert.NoError(t, err)
+       assert.True(t, wasPresent)
+
+       found = bfBytes.QuerySlice(bytes)
+       assert.True(t, found)
+
+       // Both should have same number of bits set (same data, same seed)
+       assert.Equal(t, numBitsSet, bfBytes.GetBitsUsed())
+
+       // Test with int64 array (matches Java's update(long[]))
+       intData := []int64{12345, 67890, -11111}
+       bfInts, err := NewBloomFilterByAccuracy(numDistinct, fpp, 
WithSeed(seed))
+       assert.NoError(t, err)
+
+       err = bfInts.UpdateInt64Array(intData)
+       assert.NoError(t, err)
+
+       wasPresent, err = bfInts.QueryAndUpdateInt64Array(intData)
+       assert.NoError(t, err)
+       assert.True(t, wasPresent)
+
+       found = bfInts.QueryInt64Array(intData)
+       assert.True(t, found)
+
+       // Intersect all filters (each with different data but same seed)
+       bf := &bloomFilterImpl{
+               seed:         seed,
+               numHashes:    bfDoubles.GetNumHashes(),
+               isDirty:      false,
+               capacityBits: bfDoubles.GetCapacity(),
+               numBitsSet:   0,
+               bitArray:     make([]uint64, bfDoubles.GetCapacity()/64),
+       }
+
+       // Manually create a filter by intersecting
+       bf.Union(bfDoubles)
+       bf.Intersect(bfBytes)
+
+       // After intersecting with itself (same data), should have same bit 
count
+       assert.Equal(t, numBitsSet, bf.GetBitsUsed(),
+               "Intersection of identical data should preserve bit count")
+}
+
+// TestNegativeQueries verifies that items NOT inserted are (usually) not found
+func TestNegativeQueries(t *testing.T) {
+       bf, err := NewBloomFilterBySize(1024, 5, WithSeed(42))
+       assert.NoError(t, err)
+
+       // Insert items 0-99
+       for i := int64(0); i < 100; i++ {
+               err = bf.UpdateInt64(i)
+               assert.NoError(t, err)
+       }
+
+       // Test negative cases - items that were NOT inserted
+       negativeItems := []int64{-1, -10, -100, 100, 101, 200, 1000, 10000}
+
+       foundCount := 0
+       for _, item := range negativeItems {
+               if bf.QueryInt64(item) {
+                       foundCount++
+               }
+       }
+
+       // Should not find ALL negative items (that would be a bug)
+       assert.Less(t, foundCount, len(negativeItems),
+               "Should not find all non-inserted items")
+
+       // Specifically, -1 should almost certainly not be found
+       // (allowing for small probability of false positive)
+       found := bf.QueryInt64(-1)
+       t.Logf("Item -1 found: %v (false positive)", found)
+}
diff --git a/filters/preamble_utils.go b/filters/preamble_utils.go
new file mode 100644
index 0000000..4415189
--- /dev/null
+++ b/filters/preamble_utils.go
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package filters
+
+import "encoding/binary"
+
+// Preamble constants matching C++ implementation
+const (
+       // Header sizes
+       preambleBytes      = 32 // Full preamble size for non-empty filter
+       preambleEmptyBytes = 24 // Preamble size for empty filter
+
+       // Preamble field offsets
+       preambleLongsOffset  = 0
+       serVerOffset         = 1
+       familyIDOffset       = 2
+       flagsOffset          = 3
+       numHashesOffset      = 4
+       seedOffset           = 8
+       bitArrayLengthOffset = 16
+       numBitsSetOffset     = 24
+       bitArrayOffset       = 32
+
+       // Family and version identifiers
+       familyID = 21
+       serVer   = 1
+
+       // Preamble sizes in longs
+       preambleLongsEmpty    = 3
+       preambleLongsStandard = 4
+
+       // Flag masks
+       emptyFlagMask = 0x04
+
+       // Special values
+       dirtyBitsValue = 0xFFFFFFFFFFFFFFFF
+)
+
+// extractPreambleLongs extracts the preamble longs field from the header.
+func extractPreambleLongs(bytes []byte) uint8 {
+       return bytes[preambleLongsOffset]
+}
+
+// extractSerVer extracts the serialization version from the header.
+func extractSerVer(bytes []byte) uint8 {
+       return bytes[serVerOffset]
+}
+
+// extractFamilyID extracts the family ID from the header.
+func extractFamilyID(bytes []byte) uint8 {
+       return bytes[familyIDOffset]
+}
+
+// extractFlags extracts the flags byte from the header.
+func extractFlags(bytes []byte) uint8 {
+       return bytes[flagsOffset]
+}
+
+// extractNumHashes extracts the number of hash functions from the header.
+func extractNumHashes(bytes []byte) uint16 {
+       return binary.LittleEndian.Uint16(bytes[numHashesOffset:])
+}
+
+// extractSeed extracts the hash seed from the header.
+func extractSeed(bytes []byte) uint64 {
+       return binary.LittleEndian.Uint64(bytes[seedOffset:])
+}
+
+// extractBitArrayLength extracts the bit array length (in longs) from the 
header.
+func extractBitArrayLength(bytes []byte) uint32 {
+       return binary.LittleEndian.Uint32(bytes[bitArrayLengthOffset:])
+}
+
+// extractNumBitsSet extracts the number of bits set from the header.
+// Only valid for non-empty filters.
+func extractNumBitsSet(bytes []byte) uint64 {
+       return binary.LittleEndian.Uint64(bytes[numBitsSetOffset:])
+}
+
+// insertPreambleLongs inserts the preamble longs field into the header.
+func insertPreambleLongs(bytes []byte, val uint8) {
+       bytes[preambleLongsOffset] = val
+}
+
+// insertSerVer inserts the serialization version into the header.
+func insertSerVer(bytes []byte) {
+       bytes[serVerOffset] = serVer
+}
+
+// insertFamilyID inserts the family ID into the header.
+func insertFamilyID(bytes []byte) {
+       bytes[familyIDOffset] = familyID
+}
+
+// insertFlags inserts the flags byte into the header.
+func insertFlags(bytes []byte, flags uint8) {
+       bytes[flagsOffset] = flags
+}
+
+// insertNumHashes inserts the number of hash functions into the header.
+func insertNumHashes(bytes []byte, numHashes uint16) {
+       binary.LittleEndian.PutUint16(bytes[numHashesOffset:], numHashes)
+}
+
+// insertSeed inserts the hash seed into the header.
+func insertSeed(bytes []byte, seed uint64) {
+       binary.LittleEndian.PutUint64(bytes[seedOffset:], seed)
+}
+
+// insertBitArrayLength inserts the bit array length (in longs) into the 
header.
+func insertBitArrayLength(bytes []byte, length uint32) {
+       binary.LittleEndian.PutUint32(bytes[bitArrayLengthOffset:], length)
+}
+
+// insertNumBitsSet inserts the number of bits set into the header.
+func insertNumBitsSet(bytes []byte, numBitsSet uint64) {
+       binary.LittleEndian.PutUint64(bytes[numBitsSetOffset:], numBitsSet)
+}
+
+// isEmptyFlag checks if the empty flag is set in the flags byte.
+func isEmptyFlag(flags uint8) bool {
+       return (flags & emptyFlagMask) != 0
+}
+
+// setEmptyFlag sets the empty flag in the flags byte.
+func setEmptyFlag(flags uint8) uint8 {
+       return flags | emptyFlagMask
+}
+
+// clearEmptyFlag clears the empty flag in the flags byte.
+func clearEmptyFlag(flags uint8) uint8 {
+       return flags &^ emptyFlagMask
+}
diff --git a/go.mod b/go.mod
index b177cad..09bbb2b 100644
--- a/go.mod
+++ b/go.mod
@@ -24,6 +24,8 @@ require (
        github.com/twmb/murmur3 v1.1.8
 )
 
+require github.com/cespare/xxhash/v2 v2.3.0 // indirect
+
 require (
        github.com/davecgh/go-spew v1.1.1 // indirect
        github.com/pmezard/go-difflib v1.0.0 // indirect
diff --git a/go.sum b/go.sum
index 3f83047..f1072fd 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,5 @@
+github.com/cespare/xxhash/v2 v2.3.0 
h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod 
h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/davecgh/go-spew v1.1.1 
h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod 
h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/pmezard/go-difflib v1.0.0 
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/internal/family.go b/internal/family.go
index 4679414..2569330 100644
--- a/internal/family.go
+++ b/internal/family.go
@@ -25,9 +25,10 @@ type family struct {
 type families struct {
        HLL            family
        Frequency      family
-       Kll            family ``
+       Kll            family
        CPC            family
        CountMinSketch family
+       BloomFilter    family
 }
 
 var FamilyEnum = &families{
@@ -51,4 +52,8 @@ var FamilyEnum = &families{
                Id:          18,
                MaxPreLongs: 3,
        },
+       BloomFilter: family{
+               Id:          21,
+               MaxPreLongs: 4,
+       },
 }
diff --git 
a/serialization_test_data/go_generated_files/bf_byte_array_n10000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_byte_array_n10000_h3_go.sk
new file mode 100644
index 0000000..241719d
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_byte_array_n10000_h3_go.sk 
differ
diff --git 
a/serialization_test_data/go_generated_files/bf_double_array_n10000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_double_array_n10000_h3_go.sk
new file mode 100644
index 0000000..49e590c
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_double_array_n10000_h3_go.sk 
differ
diff --git 
a/serialization_test_data/go_generated_files/bf_double_n10000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_double_n10000_h3_go.sk
new file mode 100644
index 0000000..77a0319
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_double_n10000_h3_go.sk differ
diff --git 
a/serialization_test_data/go_generated_files/bf_long_array_n10000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_long_array_n10000_h3_go.sk
new file mode 100644
index 0000000..9e09163
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_long_array_n10000_h3_go.sk 
differ
diff --git a/serialization_test_data/go_generated_files/bf_n0_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_n0_h3_go.sk
new file mode 100644
index 0000000..3f94cd8
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_n0_h3_go.sk differ
diff --git a/serialization_test_data/go_generated_files/bf_n0_h5_go.sk 
b/serialization_test_data/go_generated_files/bf_n0_h5_go.sk
new file mode 100644
index 0000000..720e455
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_n0_h5_go.sk differ
diff --git a/serialization_test_data/go_generated_files/bf_n10000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_n10000_h3_go.sk
new file mode 100644
index 0000000..9f93779
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_n10000_h3_go.sk differ
diff --git a/serialization_test_data/go_generated_files/bf_n10000_h5_go.sk 
b/serialization_test_data/go_generated_files/bf_n10000_h5_go.sk
new file mode 100644
index 0000000..20376e9
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_n10000_h5_go.sk differ
diff --git a/serialization_test_data/go_generated_files/bf_n2000000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_n2000000_h3_go.sk
new file mode 100644
index 0000000..5503c77
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_n2000000_h3_go.sk differ
diff --git a/serialization_test_data/go_generated_files/bf_n2000000_h5_go.sk 
b/serialization_test_data/go_generated_files/bf_n2000000_h5_go.sk
new file mode 100644
index 0000000..15b7a9d
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_n2000000_h5_go.sk differ
diff --git 
a/serialization_test_data/go_generated_files/bf_string_n10000_h3_go.sk 
b/serialization_test_data/go_generated_files/bf_string_n10000_h3_go.sk
new file mode 100644
index 0000000..8b24eb6
Binary files /dev/null and 
b/serialization_test_data/go_generated_files/bf_string_n10000_h3_go.sk differ


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-go) 02/02: Add a bloomfilter implementation to Go

Reply via email to