Changeset: ed3714394359 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/ed3714394359 Modified Files: gdk/gdk_imprints.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_sample.c gdk/xoshiro256starstar.h sql/test/sample/Tests/sample.10.test sql/test/sample/Tests/sample.11.test sql/test/sample/Tests/sample.12.test sql/test/sample/Tests/sample.13.test sql/test/sample/Tests/sample.2.test sql/test/sample/Tests/sample.3.test sql/test/sample/Tests/sample.4.test sql/test/sample/Tests/sample.5.test sql/test/sample/Tests/sample.6.test sql/test/sample/Tests/sample.9.test sql/test/sys-schema/Tests/webExamplesMathematicalFunctionsOperators.test Branch: Dec2023 Log Message:
Updated BATsample, remove BATcreatesample (which was private anyway). diffs (truncated from 579 to 300 lines): diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c --- a/gdk/gdk_imprints.c +++ b/gdk/gdk_imprints.c @@ -523,7 +523,7 @@ BATimprints(BAT *b) imprints->imprints.parentid = b->batCacheid; #define SMP_SIZE 2048 - s1 = BATsample_with_seed(b, SMP_SIZE, (uint64_t) GDKusec() * (uint64_t) b->batCacheid); + s1 = BATsample(b, SMP_SIZE); if (s1 == NULL) { GDKfree(imprints); bat_iterator_end(&bi); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3559,11 +3559,9 @@ guess_uniques(BAT *b, struct canditer *c ALGOBATPAR(b)); return unique_est; } - s1 = BATcreatesample(b->hseqbase, batcount, 1000, - (uint64_t) GDKusec() * (uint64_t) b->batCacheid); + s1 = BATsample(b, 1000); } else { - BAT *s2 = BATcreatesample(ci->s->hseqbase, ci->ncand, 1000, - (uint64_t) GDKusec() * (uint64_t) b->batCacheid); + BAT *s2 = BATsample(ci->s, 1000); if (s2 == NULL) return -1; s1 = BATproject(s2, ci->s); diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -67,8 +67,6 @@ gdk_return BATcheckmodes(BAT *b, bool pe __attribute__((__visibility__("hidden"))); BAT *BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width) __attribute__((__visibility__("hidden"))); -BAT *BATcreatesample(oid hseq, BUN cnt, BUN n, uint64_t seed) - __attribute__((__visibility__("hidden"))); void BATdelete(BAT *b) __attribute__((__visibility__("hidden"))); void BATdestroy(BAT *b) diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c --- a/gdk/gdk_sample.c +++ b/gdk/gdk_sample.c @@ -42,7 +42,7 @@ struct oidtreenode { }; }; -static int +static bool OIDTreeMaybeInsert(struct oidtreenode *tree, oid o, BUN allocated) { struct oidtreenode **nodep; @@ -50,12 +50,12 @@ OIDTreeMaybeInsert(struct oidtreenode *t if (allocated == 0) { tree->left = tree->right = NULL; tree->o = o; - return 1; + return true; } nodep = &tree; while (*nodep) { if (o == (*nodep)->o) - return 0; + return false; if (o < (*nodep)->o) nodep = &(*nodep)->left; else @@ -64,7 +64,7 @@ OIDTreeMaybeInsert(struct oidtreenode *t *nodep = &tree[allocated]; tree[allocated].left = tree[allocated].right = NULL; tree[allocated].o = o; - return 1; + return true; } /* inorder traversal, gives us a sorted BAT */ @@ -134,19 +134,14 @@ do_batsample(oid hseq, BUN cnt, BUN n, r return NULL; } + if (lock) + MT_lock_set(lock); /* generate a list of random numbers; note we use the * "tree" array, but we use the value from each location * before it is overwritten by the use as part of the * binary tree */ - if (lock) { - MT_lock_set(lock); - for (rescnt = 0; rescnt < n; rescnt++) - tree[rescnt].r = next(rse); - MT_lock_unset(lock); - } else { - for (rescnt = 0; rescnt < n; rescnt++) - tree[rescnt].r = next(rse); - } + for (rescnt = 0; rescnt < n; rescnt++) + tree[rescnt].r = next(rse); /* while we do not have enough sample OIDs yet */ BUN rnd = 0; @@ -156,12 +151,8 @@ do_batsample(oid hseq, BUN cnt, BUN n, r if (rnd == n) { /* we ran out of random numbers, * so generate more */ - if (lock) - MT_lock_set(lock); for (rnd = rescnt; rnd < n; rnd++) tree[rnd].r = next(rse); - if (lock) - MT_lock_unset(lock); rnd = rescnt; } candoid = minoid + tree[rnd++].r % cnt; @@ -169,6 +160,8 @@ do_batsample(oid hseq, BUN cnt, BUN n, r * generated, try again */ } while (!OIDTreeMaybeInsert(tree, candoid, rescnt)); } + if (lock) + MT_lock_unset(lock); if (!antiset) { OIDTreeToBAT(tree, bn); } else { @@ -185,26 +178,20 @@ do_batsample(oid hseq, BUN cnt, BUN n, r return bn; } +/* BATsample implements sampling for BATs */ BAT * -BATcreatesample(oid hseq, BUN cnt, BUN n, uint64_t seed) +BATsample_with_seed(BAT *b, BUN n, uint64_t seed) { random_state_engine rse; init_random_state_engine(rse, seed); - BAT *bn = do_batsample(hseq, cnt, n, rse, NULL); - TRC_DEBUG(ALGO, OIDFMT "," BUNFMT "," BUNFMT " -> " ALGOOPTBATFMT "\n", - hseq, cnt, n, ALGOOPTBATPAR(bn)); + BAT *bn = do_batsample(b->hseqbase, BATcount(b), n, rse, NULL); + TRC_DEBUG(ALGO, ALGOBATFMT "," BUNFMT " -> " ALGOOPTBATFMT "\n", + ALGOBATPAR(b), n, ALGOOPTBATPAR(bn)); return bn; } -/* BATsample implements sampling for BATs */ -BAT * -BATsample_with_seed(BAT *b, BUN n, uint64_t seed) -{ - return BATcreatesample(b->hseqbase, b->batCount, n, seed); -} - static MT_Lock rse_lock = MT_LOCK_INITIALIZER(rse_lock); BAT * BATsample(BAT *b, BUN n) diff --git a/gdk/xoshiro256starstar.h b/gdk/xoshiro256starstar.h --- a/gdk/xoshiro256starstar.h +++ b/gdk/xoshiro256starstar.h @@ -17,12 +17,14 @@ worldwide. This software is distributed See <http://creativecommons.org/publicdomain/zero/1.0/>. */ -static inline uint64_t rotl(const uint64_t x, int k) { +typedef uint64_t random_state_engine[4]; + +static inline uint64_t +rotl(const uint64_t x, int k) +{ return (x << k) | (x >> (64 - k)); } -typedef uint64_t random_state_engine[4]; - static inline void init_random_state_engine(random_state_engine engine, uint64_t seed) { @@ -40,8 +42,10 @@ init_random_state_engine(random_state_en } } -static inline uint64_t next(random_state_engine rse) { - const uint64_t output = rotl(rse[0] * 5, 7) * 9; +static inline uint64_t +next(random_state_engine rse) +{ + const uint64_t output = rotl(rse[1] * 5, 7) * 9; const uint64_t t = rse[1] << 17; diff --git a/sql/test/sample/Tests/sample.10.test b/sql/test/sample/Tests/sample.10.test --- a/sql/test/sample/Tests/sample.10.test +++ b/sql/test/sample/Tests/sample.10.test @@ -1,14 +1,14 @@ query I rowsort select * from GENERATE_SERIES(1,41) sample 0.25 seed 1234 ---- -14 -15 -19 -2 +1 +16 +17 20 +22 26 -27 -38 -39 -8 +28 +29 +31 +7 diff --git a/sql/test/sample/Tests/sample.11.test b/sql/test/sample/Tests/sample.11.test --- a/sql/test/sample/Tests/sample.11.test +++ b/sql/test/sample/Tests/sample.11.test @@ -1,24 +1,24 @@ query I rowsort select * from GENERATE_SERIES(1,41) sample 0.5 seed 1234 ---- -12 -13 -14 -15 +1 +11 +16 17 18 -19 2 20 +22 26 27 +28 +29 3 30 -37 -38 -39 -4 +31 +35 +36 +5 7 8 -9 diff --git a/sql/test/sample/Tests/sample.12.test b/sql/test/sample/Tests/sample.12.test --- a/sql/test/sample/Tests/sample.12.test +++ b/sql/test/sample/Tests/sample.12.test @@ -1,34 +1,34 @@ query I rowsort select * from GENERATE_SERIES(1,41) sample 0.75 seed 1234 ---- -1 10 11 12 13 -16 -17 +14 +15 18 +19 +2 21 -22 23 24 25 -28 -29 +27 3 30 -31 32 33 34 35 36 37 +38 +39 4 40 5 6 -7 +8 9 diff --git a/sql/test/sample/Tests/sample.13.test b/sql/test/sample/Tests/sample.13.test _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org