This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
The following commit(s) were added to refs/heads/cpc-sketch by this push:
new fa7027c cleanup constructor and bench custom/lib murmur3
fa7027c is described below
commit fa7027cb6a008cc41a3d5c521919db743c123913
Author: Pierre Lacave <[email protected]>
AuthorDate: Thu Nov 21 11:59:05 2024 +0100
cleanup constructor and bench custom/lib murmur3
---
cpc/cpc_sketch.go | 55 +++++++-----------------------------------------
cpc/cpc_sketch_test.go | 11 +++++++---
cpc/utils.go | 2 +-
internal/murmur3_test.go | 22 ++++++++++++++++++-
4 files changed, 38 insertions(+), 52 deletions(-)
diff --git a/cpc/cpc_sketch.go b/cpc/cpc_sketch.go
index 06f91fc..1b6eba4 100644
--- a/cpc/cpc_sketch.go
+++ b/cpc/cpc_sketch.go
@@ -52,6 +52,10 @@ type CpcSketch struct {
scratch [8]byte
}
+func NewCpcSketchWithDefault() (CpcSketch, error) {
+ return NewCpcSketch(defaultLgK, internal.DEFAULT_UPDATE_SEED)
+}
+
func NewCpcSketch(lgK int, seed uint64) (CpcSketch, error) {
if err := checkLgK(lgK); err != nil {
return CpcSketch{}, err
@@ -121,7 +125,7 @@ func (c *CpcSketch) UpdateByteSlice(datum []byte) error {
if len(datum) == 0 {
return nil
}
- hashLo, hashHi := internal.HashCharSliceMurmur3(datum, 0, len(datum),
c.seed)
+ hashLo, hashHi := murmur3.SeedSum128(c.seed, c.seed, datum)
return c.hashUpdate(hashLo, hashHi)
}
@@ -243,7 +247,9 @@ func (c *CpcSketch) updateWindowed(rowCol int) error {
c.updateHIP(rowCol)
c8post := c.numCoupons << 3
if c8post >= ((27 + w8pre) * k) {
- c.modifyOffset(c.windowOffset + 1)
+ if err := c.modifyOffset(c.windowOffset + 1); err !=
nil {
+ return err
+ }
if c.windowOffset < 1 || c.windowOffset > 56 {
return fmt.Errorf("windowOffset < 1 ||
windowOffset > 56")
}
@@ -257,51 +263,6 @@ func (c *CpcSketch) updateWindowed(rowCol int) error {
return nil
}
-/*
-private static void updateWindowed(final CpcSketch sketch, final int rowCol) {
- assert ((sketch.windowOffset >= 0) && (sketch.windowOffset <= 56));
- final int k = 1 << sketch.lgK;
- final long c32pre = sketch.numCoupons << 5;
- assert c32pre >= (3L * k); // C < 3K/32, in other words flavor >= HYBRID
- final long c8pre = sketch.numCoupons << 3;
- final int w8pre = sketch.windowOffset << 3;
- assert c8pre < ((27L + w8pre) * k); // C < (K * 27/8) + (K * windowOffset)
-
- boolean isNovel = false; //novel if new coupon
- final int col = rowCol & 63;
-
- if (col < sketch.windowOffset) { // track the surprising 0's "before" the
window
- isNovel = PairTable.maybeDelete(sketch.pairTable, rowCol); // inverted
logic
- }
- else if (col < (sketch.windowOffset + 8)) { // track the 8 bits inside the
window
- assert (col >= sketch.windowOffset);
- final int row = rowCol >>> 6;
- final byte oldBits = sketch.slidingWindow[row];
- final byte newBits = (byte) (oldBits | (1 << (col -
sketch.windowOffset)));
- if (newBits != oldBits) {
- sketch.slidingWindow[row] = newBits;
- isNovel = true;
- }
- }
- else { // track the surprising 1's "after" the window
- assert col >= (sketch.windowOffset + 8);
- isNovel = PairTable.maybeInsert(sketch.pairTable, rowCol); // normal
logic
- }
-
- if (isNovel) {
- sketch.numCoupons += 1;
- updateHIP(sketch, rowCol);
- final long c8post = sketch.numCoupons << 3;
- if (c8post >= ((27L + w8pre) * k)) {
- modifyOffset(sketch, sketch.windowOffset + 1);
- assert (sketch.windowOffset >= 1) && (sketch.windowOffset <= 56);
- final int w8post = sketch.windowOffset << 3;
- assert c8post < ((27L + w8post) * k); // C < (K * 27/8) + (K *
windowOffset)
- }
- }
- }
-*/
-
func hash(bs []byte, seed uint64) (uint64, uint64) {
return murmur3.SeedSum128(seed, seed, bs)
}
diff --git a/cpc/cpc_sketch_test.go b/cpc/cpc_sketch_test.go
index 0c0275e..1841e99 100644
--- a/cpc/cpc_sketch_test.go
+++ b/cpc/cpc_sketch_test.go
@@ -18,6 +18,7 @@
package cpc
import (
+ "github.com/apache/datasketches-go/internal"
"github.com/stretchr/testify/assert"
"testing"
)
@@ -56,9 +57,9 @@ func TestCPCCheckUpdatesEstimate(t *testing.T) {
func TestCPCCheckEstimatesWithMerge(t *testing.T) {
lgk := 4
- sk1, err := NewCpcSketch(lgk, CpcDefaultUpdateSeed)
+ sk1, err := NewCpcSketch(lgk, internal.DEFAULT_UPDATE_SEED)
assert.NoError(t, err)
- sk2, err := NewCpcSketch(lgk, CpcDefaultUpdateSeed)
+ sk2, err := NewCpcSketch(lgk, internal.DEFAULT_UPDATE_SEED)
assert.NoError(t, err)
n := 1 << lgk
for i := 0; i < n; i++ {
@@ -85,7 +86,7 @@ func TestCPCCheckEstimatesWithMerge(t *testing.T) {
func TestCPCCheckCornerCaseUpdates(t *testing.T) {
lgK := 4
- sk, err := NewCpcSketch(lgK, CpcDefaultUpdateSeed)
+ sk, err := NewCpcSketch(lgK, internal.DEFAULT_UPDATE_SEED)
assert.NoError(t, err)
err = sk.UpdateFloat64(0.0)
assert.NoError(t, err)
@@ -125,6 +126,10 @@ func TestCPCCheckLgK(t *testing.T) {
assert.Equal(t, sk.lgK, 10)
_, err = NewCpcSketch(3, 0)
assert.Error(t, err)
+ sk, err = NewCpcSketchWithDefault()
+ assert.NoError(t, err)
+ assert.Equal(t, sk.lgK, defaultLgK)
+ assert.Equal(t, sk.seed, internal.DEFAULT_UPDATE_SEED)
}
func TestCPCcheckIconHipUBLBLg15(t *testing.T) {
diff --git a/cpc/utils.go b/cpc/utils.go
index 52baab8..174bd6b 100644
--- a/cpc/utils.go
+++ b/cpc/utils.go
@@ -46,7 +46,7 @@ const (
)
const (
- CpcDefaultUpdateSeed = 9001
+ defaultLgK = 11
)
var (
diff --git a/internal/murmur3_test.go b/internal/murmur3_test.go
index 7aa6a2a..00b184a 100644
--- a/internal/murmur3_test.go
+++ b/internal/murmur3_test.go
@@ -17,7 +17,10 @@
package internal
-import "testing"
+import (
+ "github.com/twmb/murmur3"
+ "testing"
+)
func TestByteArrRemainderGT8(t *testing.T) {
key := []byte("The quick brown fox jumps over the lazy dog")
@@ -31,3 +34,20 @@ func TestByteArrRemainderGT8(t *testing.T) {
t.Errorf("expected %v, got %v", h2, resultHi)
}
}
+
+func BenchmarkHashCharSliceMurmur3(b *testing.B) {
+ b.Run("custom murmur3", func(b *testing.B) {
+ key := []byte("The quick brown fox jumps over the lazy dog")
+ for i := 0; i < b.N; i++ {
+ HashCharSliceMurmur3(key, 0, len(key), 0)
+ }
+ })
+
+ b.Run("stdlib murmur3", func(b *testing.B) {
+ key := []byte("The quick brown fox jumps over the lazy dog")
+ for i := 0; i < b.N; i++ {
+ murmur3.SeedSum128(DEFAULT_UPDATE_SEED,
DEFAULT_UPDATE_SEED, key)
+ }
+ })
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]