Changeset: ed3714394359 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ed3714394359
Modified Files:
        gdk/gdk_imprints.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_sample.c
        gdk/xoshiro256starstar.h
        sql/test/sample/Tests/sample.10.test
        sql/test/sample/Tests/sample.11.test
        sql/test/sample/Tests/sample.12.test
        sql/test/sample/Tests/sample.13.test
        sql/test/sample/Tests/sample.2.test
        sql/test/sample/Tests/sample.3.test
        sql/test/sample/Tests/sample.4.test
        sql/test/sample/Tests/sample.5.test
        sql/test/sample/Tests/sample.6.test
        sql/test/sample/Tests/sample.9.test
        sql/test/sys-schema/Tests/webExamplesMathematicalFunctionsOperators.test
Branch: Dec2023
Log Message:

Updated BATsample, remove BATcreatesample (which was private anyway).


diffs (truncated from 579 to 300 lines):

diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -523,7 +523,7 @@ BATimprints(BAT *b)
                imprints->imprints.parentid = b->batCacheid;
 
 #define SMP_SIZE 2048
-               s1 = BATsample_with_seed(b, SMP_SIZE, (uint64_t) GDKusec() * 
(uint64_t) b->batCacheid);
+               s1 = BATsample(b, SMP_SIZE);
                if (s1 == NULL) {
                        GDKfree(imprints);
                        bat_iterator_end(&bi);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3559,11 +3559,9 @@ guess_uniques(BAT *b, struct canditer *c
                                  ALGOBATPAR(b));
                        return unique_est;
                }
-               s1 = BATcreatesample(b->hseqbase, batcount, 1000,
-                                    (uint64_t) GDKusec() * (uint64_t) 
b->batCacheid);
+               s1 = BATsample(b, 1000);
        } else {
-               BAT *s2 = BATcreatesample(ci->s->hseqbase, ci->ncand, 1000,
-                                         (uint64_t) GDKusec() * (uint64_t) 
b->batCacheid);
+               BAT *s2 = BATsample(ci->s, 1000);
                if (s2 == NULL)
                        return -1;
                s1 = BATproject(s2, ci->s);
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -67,8 +67,6 @@ gdk_return BATcheckmodes(BAT *b, bool pe
        __attribute__((__visibility__("hidden")));
 BAT *BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t 
width)
        __attribute__((__visibility__("hidden")));
-BAT *BATcreatesample(oid hseq, BUN cnt, BUN n, uint64_t seed)
-       __attribute__((__visibility__("hidden")));
 void BATdelete(BAT *b)
        __attribute__((__visibility__("hidden")));
 void BATdestroy(BAT *b)
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -42,7 +42,7 @@ struct oidtreenode {
        };
 };
 
-static int
+static bool
 OIDTreeMaybeInsert(struct oidtreenode *tree, oid o, BUN allocated)
 {
        struct oidtreenode **nodep;
@@ -50,12 +50,12 @@ OIDTreeMaybeInsert(struct oidtreenode *t
        if (allocated == 0) {
                tree->left = tree->right = NULL;
                tree->o = o;
-               return 1;
+               return true;
        }
        nodep = &tree;
        while (*nodep) {
                if (o == (*nodep)->o)
-                       return 0;
+                       return false;
                if (o < (*nodep)->o)
                        nodep = &(*nodep)->left;
                else
@@ -64,7 +64,7 @@ OIDTreeMaybeInsert(struct oidtreenode *t
        *nodep = &tree[allocated];
        tree[allocated].left = tree[allocated].right = NULL;
        tree[allocated].o = o;
-       return 1;
+       return true;
 }
 
 /* inorder traversal, gives us a sorted BAT */
@@ -134,19 +134,14 @@ do_batsample(oid hseq, BUN cnt, BUN n, r
                        return NULL;
                }
 
+               if (lock)
+                       MT_lock_set(lock);
                /* generate a list of random numbers; note we use the
                 * "tree" array, but we use the value from each location
                 * before it is overwritten by the use as part of the
                 * binary tree */
-               if (lock) {
-                       MT_lock_set(lock);
-                       for (rescnt = 0; rescnt < n; rescnt++)
-                               tree[rescnt].r = next(rse);
-                       MT_lock_unset(lock);
-               } else {
-                       for (rescnt = 0; rescnt < n; rescnt++)
-                               tree[rescnt].r = next(rse);
-               }
+               for (rescnt = 0; rescnt < n; rescnt++)
+                       tree[rescnt].r = next(rse);
 
                /* while we do not have enough sample OIDs yet */
                BUN rnd = 0;
@@ -156,12 +151,8 @@ do_batsample(oid hseq, BUN cnt, BUN n, r
                                if (rnd == n) {
                                        /* we ran out of random numbers,
                                         * so generate more */
-                                       if (lock)
-                                               MT_lock_set(lock);
                                        for (rnd = rescnt; rnd < n; rnd++)
                                                tree[rnd].r = next(rse);
-                                       if (lock)
-                                               MT_lock_unset(lock);
                                        rnd = rescnt;
                                }
                                candoid = minoid + tree[rnd++].r % cnt;
@@ -169,6 +160,8 @@ do_batsample(oid hseq, BUN cnt, BUN n, r
                                 * generated, try again */
                        } while (!OIDTreeMaybeInsert(tree, candoid, rescnt));
                }
+               if (lock)
+                       MT_lock_unset(lock);
                if (!antiset) {
                        OIDTreeToBAT(tree, bn);
                } else {
@@ -185,26 +178,20 @@ do_batsample(oid hseq, BUN cnt, BUN n, r
        return bn;
 }
 
+/* BATsample implements sampling for BATs */
 BAT *
-BATcreatesample(oid hseq, BUN cnt, BUN n, uint64_t seed)
+BATsample_with_seed(BAT *b, BUN n, uint64_t seed)
 {
        random_state_engine rse;
 
        init_random_state_engine(rse, seed);
 
-       BAT *bn = do_batsample(hseq, cnt, n, rse, NULL);
-       TRC_DEBUG(ALGO, OIDFMT "," BUNFMT "," BUNFMT " -> " ALGOOPTBATFMT "\n",
-                 hseq, cnt, n, ALGOOPTBATPAR(bn));
+       BAT *bn = do_batsample(b->hseqbase, BATcount(b), n, rse, NULL);
+       TRC_DEBUG(ALGO, ALGOBATFMT "," BUNFMT " -> " ALGOOPTBATFMT "\n",
+                 ALGOBATPAR(b), n, ALGOOPTBATPAR(bn));
        return bn;
 }
 
-/* BATsample implements sampling for BATs */
-BAT *
-BATsample_with_seed(BAT *b, BUN n, uint64_t seed)
-{
-       return BATcreatesample(b->hseqbase, b->batCount, n, seed);
-}
-
 static MT_Lock rse_lock = MT_LOCK_INITIALIZER(rse_lock);
 BAT *
 BATsample(BAT *b, BUN n)
diff --git a/gdk/xoshiro256starstar.h b/gdk/xoshiro256starstar.h
--- a/gdk/xoshiro256starstar.h
+++ b/gdk/xoshiro256starstar.h
@@ -17,12 +17,14 @@ worldwide. This software is distributed 
 
 See <http://creativecommons.org/publicdomain/zero/1.0/>. */
 
-static inline uint64_t rotl(const uint64_t x, int k) {
+typedef uint64_t random_state_engine[4];
+
+static inline uint64_t
+rotl(const uint64_t x, int k)
+{
        return (x << k) | (x >> (64 - k));
 }
 
-typedef uint64_t random_state_engine[4];
-
 static inline void
 init_random_state_engine(random_state_engine engine, uint64_t seed)
 {
@@ -40,8 +42,10 @@ init_random_state_engine(random_state_en
        }
 }
 
-static inline uint64_t next(random_state_engine rse) {
-       const uint64_t output = rotl(rse[0] * 5, 7) * 9;
+static inline uint64_t
+next(random_state_engine rse)
+{
+       const uint64_t output = rotl(rse[1] * 5, 7) * 9;
 
        const uint64_t t = rse[1] << 17;
 
diff --git a/sql/test/sample/Tests/sample.10.test 
b/sql/test/sample/Tests/sample.10.test
--- a/sql/test/sample/Tests/sample.10.test
+++ b/sql/test/sample/Tests/sample.10.test
@@ -1,14 +1,14 @@
 query I rowsort
 select * from GENERATE_SERIES(1,41) sample 0.25 seed 1234
 ----
-14
-15
-19
-2
+1
+16
+17
 20
+22
 26
-27
-38
-39
-8
+28
+29
+31
+7
 
diff --git a/sql/test/sample/Tests/sample.11.test 
b/sql/test/sample/Tests/sample.11.test
--- a/sql/test/sample/Tests/sample.11.test
+++ b/sql/test/sample/Tests/sample.11.test
@@ -1,24 +1,24 @@
 query I rowsort
 select * from GENERATE_SERIES(1,41) sample 0.5 seed 1234
 ----
-12
-13
-14
-15
+1
+11
+16
 17
 18
-19
 2
 20
+22
 26
 27
+28
+29
 3
 30
-37
-38
-39
-4
+31
+35
+36
+5
 7
 8
-9
 
diff --git a/sql/test/sample/Tests/sample.12.test 
b/sql/test/sample/Tests/sample.12.test
--- a/sql/test/sample/Tests/sample.12.test
+++ b/sql/test/sample/Tests/sample.12.test
@@ -1,34 +1,34 @@
 query I rowsort
 select * from GENERATE_SERIES(1,41) sample 0.75 seed 1234
 ----
-1
 10
 11
 12
 13
-16
-17
+14
+15
 18
+19
+2
 21
-22
 23
 24
 25
-28
-29
+27
 3
 30
-31
 32
 33
 34
 35
 36
 37
+38
+39
 4
 40
 5
 6
-7
+8
 9
 
diff --git a/sql/test/sample/Tests/sample.13.test 
b/sql/test/sample/Tests/sample.13.test
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to