This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git


The following commit(s) were added to refs/heads/cpc-sketch by this push:
     new fa7027c  cleanup constructor and bench custom/lib murmur3
fa7027c is described below

commit fa7027cb6a008cc41a3d5c521919db743c123913
Author: Pierre Lacave <[email protected]>
AuthorDate: Thu Nov 21 11:59:05 2024 +0100

    cleanup constructor and bench custom/lib murmur3
---
 cpc/cpc_sketch.go        | 55 +++++++-----------------------------------------
 cpc/cpc_sketch_test.go   | 11 +++++++---
 cpc/utils.go             |  2 +-
 internal/murmur3_test.go | 22 ++++++++++++++++++-
 4 files changed, 38 insertions(+), 52 deletions(-)

diff --git a/cpc/cpc_sketch.go b/cpc/cpc_sketch.go
index 06f91fc..1b6eba4 100644
--- a/cpc/cpc_sketch.go
+++ b/cpc/cpc_sketch.go
@@ -52,6 +52,10 @@ type CpcSketch struct {
        scratch [8]byte
 }
 
+func NewCpcSketchWithDefault() (CpcSketch, error) {
+       return NewCpcSketch(defaultLgK, internal.DEFAULT_UPDATE_SEED)
+}
+
 func NewCpcSketch(lgK int, seed uint64) (CpcSketch, error) {
        if err := checkLgK(lgK); err != nil {
                return CpcSketch{}, err
@@ -121,7 +125,7 @@ func (c *CpcSketch) UpdateByteSlice(datum []byte) error {
        if len(datum) == 0 {
                return nil
        }
-       hashLo, hashHi := internal.HashCharSliceMurmur3(datum, 0, len(datum), 
c.seed)
+       hashLo, hashHi := murmur3.SeedSum128(c.seed, c.seed, datum)
        return c.hashUpdate(hashLo, hashHi)
 }
 
@@ -243,7 +247,9 @@ func (c *CpcSketch) updateWindowed(rowCol int) error {
                c.updateHIP(rowCol)
                c8post := c.numCoupons << 3
                if c8post >= ((27 + w8pre) * k) {
-                       c.modifyOffset(c.windowOffset + 1)
+                       if err := c.modifyOffset(c.windowOffset + 1); err != 
nil {
+                               return err
+                       }
                        if c.windowOffset < 1 || c.windowOffset > 56 {
                                return fmt.Errorf("windowOffset < 1 || 
windowOffset > 56")
                        }
@@ -257,51 +263,6 @@ func (c *CpcSketch) updateWindowed(rowCol int) error {
        return nil
 }
 
-/*
-private static void updateWindowed(final CpcSketch sketch, final int rowCol) {
-    assert ((sketch.windowOffset >= 0) && (sketch.windowOffset <= 56));
-    final int k = 1 << sketch.lgK;
-    final long c32pre = sketch.numCoupons << 5;
-    assert c32pre >= (3L * k); // C < 3K/32, in other words flavor >= HYBRID
-    final long c8pre = sketch.numCoupons << 3;
-    final int w8pre = sketch.windowOffset << 3;
-    assert c8pre < ((27L + w8pre) * k); // C < (K * 27/8) + (K * windowOffset)
-
-    boolean isNovel = false; //novel if new coupon
-    final int col = rowCol & 63;
-
-    if (col < sketch.windowOffset) { // track the surprising 0's "before" the 
window
-      isNovel = PairTable.maybeDelete(sketch.pairTable, rowCol); // inverted 
logic
-    }
-    else if (col < (sketch.windowOffset + 8)) { // track the 8 bits inside the 
window
-      assert (col >= sketch.windowOffset);
-      final int row = rowCol >>> 6;
-      final byte oldBits = sketch.slidingWindow[row];
-      final byte newBits = (byte) (oldBits | (1 << (col - 
sketch.windowOffset)));
-      if (newBits != oldBits) {
-        sketch.slidingWindow[row] = newBits;
-        isNovel = true;
-      }
-    }
-    else { // track the surprising 1's "after" the window
-      assert col >= (sketch.windowOffset + 8);
-      isNovel = PairTable.maybeInsert(sketch.pairTable, rowCol); // normal 
logic
-    }
-
-    if (isNovel) {
-      sketch.numCoupons += 1;
-      updateHIP(sketch, rowCol);
-      final long c8post = sketch.numCoupons << 3;
-      if (c8post >= ((27L + w8pre) * k)) {
-        modifyOffset(sketch, sketch.windowOffset + 1);
-        assert (sketch.windowOffset >= 1) && (sketch.windowOffset <= 56);
-        final int w8post = sketch.windowOffset << 3;
-        assert c8post < ((27L + w8post) * k); // C < (K * 27/8) + (K * 
windowOffset)
-      }
-    }
-  }
-*/
-
 func hash(bs []byte, seed uint64) (uint64, uint64) {
        return murmur3.SeedSum128(seed, seed, bs)
 }
diff --git a/cpc/cpc_sketch_test.go b/cpc/cpc_sketch_test.go
index 0c0275e..1841e99 100644
--- a/cpc/cpc_sketch_test.go
+++ b/cpc/cpc_sketch_test.go
@@ -18,6 +18,7 @@
 package cpc
 
 import (
+       "github.com/apache/datasketches-go/internal"
        "github.com/stretchr/testify/assert"
        "testing"
 )
@@ -56,9 +57,9 @@ func TestCPCCheckUpdatesEstimate(t *testing.T) {
 
 func TestCPCCheckEstimatesWithMerge(t *testing.T) {
        lgk := 4
-       sk1, err := NewCpcSketch(lgk, CpcDefaultUpdateSeed)
+       sk1, err := NewCpcSketch(lgk, internal.DEFAULT_UPDATE_SEED)
        assert.NoError(t, err)
-       sk2, err := NewCpcSketch(lgk, CpcDefaultUpdateSeed)
+       sk2, err := NewCpcSketch(lgk, internal.DEFAULT_UPDATE_SEED)
        assert.NoError(t, err)
        n := 1 << lgk
        for i := 0; i < n; i++ {
@@ -85,7 +86,7 @@ func TestCPCCheckEstimatesWithMerge(t *testing.T) {
 
 func TestCPCCheckCornerCaseUpdates(t *testing.T) {
        lgK := 4
-       sk, err := NewCpcSketch(lgK, CpcDefaultUpdateSeed)
+       sk, err := NewCpcSketch(lgK, internal.DEFAULT_UPDATE_SEED)
        assert.NoError(t, err)
        err = sk.UpdateFloat64(0.0)
        assert.NoError(t, err)
@@ -125,6 +126,10 @@ func TestCPCCheckLgK(t *testing.T) {
        assert.Equal(t, sk.lgK, 10)
        _, err = NewCpcSketch(3, 0)
        assert.Error(t, err)
+       sk, err = NewCpcSketchWithDefault()
+       assert.NoError(t, err)
+       assert.Equal(t, sk.lgK, defaultLgK)
+       assert.Equal(t, sk.seed, internal.DEFAULT_UPDATE_SEED)
 }
 
 func TestCPCcheckIconHipUBLBLg15(t *testing.T) {
diff --git a/cpc/utils.go b/cpc/utils.go
index 52baab8..174bd6b 100644
--- a/cpc/utils.go
+++ b/cpc/utils.go
@@ -46,7 +46,7 @@ const (
 )
 
 const (
-       CpcDefaultUpdateSeed = 9001
+       defaultLgK = 11
 )
 
 var (
diff --git a/internal/murmur3_test.go b/internal/murmur3_test.go
index 7aa6a2a..00b184a 100644
--- a/internal/murmur3_test.go
+++ b/internal/murmur3_test.go
@@ -17,7 +17,10 @@
 
 package internal
 
-import "testing"
+import (
+       "github.com/twmb/murmur3"
+       "testing"
+)
 
 func TestByteArrRemainderGT8(t *testing.T) {
        key := []byte("The quick brown fox jumps over the lazy dog")
@@ -31,3 +34,20 @@ func TestByteArrRemainderGT8(t *testing.T) {
                t.Errorf("expected %v, got %v", h2, resultHi)
        }
 }
+
+func BenchmarkHashCharSliceMurmur3(b *testing.B) {
+       b.Run("custom murmur3", func(b *testing.B) {
+               key := []byte("The quick brown fox jumps over the lazy dog")
+               for i := 0; i < b.N; i++ {
+                       HashCharSliceMurmur3(key, 0, len(key), 0)
+               }
+       })
+
+       b.Run("stdlib murmur3", func(b *testing.B) {
+               key := []byte("The quick brown fox jumps over the lazy dog")
+               for i := 0; i < b.N; i++ {
+                       murmur3.SeedSum128(DEFAULT_UPDATE_SEED, 
DEFAULT_UPDATE_SEED, key)
+               }
+       })
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to