Changeset: 4fb5c7d4f9fa for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4fb5c7d4f9fa Added Files: gdk/gdk_cand.c Modified Files: gdk/Makefile.ag gdk/gdk_batop.c gdk/gdk_group.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_select.c gdk/gdk_unique.c Branch: viewless Log Message:
moving shared candidate related function to gdk_cand.c also introduce the CAND prefix for these functions diffs (truncated from 384 to 300 lines): diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag --- a/gdk/Makefile.ag +++ b/gdk/Makefile.ag @@ -16,7 +16,7 @@ lib_gdk = { gdk_calc.c gdk_calc.h gdk_calc_compare.h gdk_calc_private.h \ gdk_ssort.c gdk_ssort_impl.h \ gdk_aggr.c \ - gdk.h gdk_cand.h gdk_atomic.h gdk_batop.c \ + gdk.h gdk_cand.h gdk_cand.c gdk_atomic.h gdk_batop.c \ gdk_search.c gdk_hash.c gdk_hash.h gdk_tm.c \ gdk_orderidx.c \ gdk_align.c gdk_bbp.c gdk_bbp.h \ diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -1957,7 +1957,7 @@ BATmergecand(BAT *a, BAT *b) bn->tkey = 1; bn->tnil = 0; bn->tnonil = 1; - return virtualize(bn); + return CANDvirtualize(bn); } /* intersect two candidate lists and produce a new one @@ -2044,5 +2044,5 @@ BATintersectcand(BAT *a, BAT *b) bn->tkey = 1; bn->tnil = 0; bn->tnonil = 1; - return virtualize(bn); + return CANDvirtualize(bn); } diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c new file mode 100644 --- /dev/null +++ b/gdk/gdk_cand.c @@ -0,0 +1,89 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 1997 - July 2008 CWI, August 2008 - 2017 MonetDB B.V. + */ + +#include "monetdb_config.h" +#include "gdk.h" +#include "gdk_private.h" +#include "gdk_cand.h" + +/* This file contains function to create and manipulate candidate + * lists. The functions are shared across GDK and are inlined if + * needed. + */ + +/* create a new dense candiate list */ +BAT * +CANDnewdense(oid first, oid last) +{ + BAT *bn; + if ((bn = COLnew(0, TYPE_void, 0, TRANSIENT)) == NULL) + return NULL; + if (last < first) + first = last = 0; /* empty range */ + BATsetcount(bn, last - first + 1); + BATtseqbase(bn, first); + return bn; +} + +/* binary search in a candidate list, return 1 if found, 0 if not */ +inline int +CANDbinsearch(const oid *cand, BUN lo, BUN hi, oid v) +{ + BUN mid; + + --hi; /* now hi is inclusive */ + if (v < cand[lo] || v > cand[hi]) + return 0; + while (hi > lo) { + mid = (lo + hi) / 2; + if (cand[mid] == v) + return 1; + if (cand[mid] < v) + lo = mid + 1; + else + hi = mid - 1; + } + return cand[lo] == v; +} + +/* makes sure that a candidate list is virtualized to dense when possible */ +BAT * +CANDvirtualize(BAT *bn) +{ + /* input must be a valid candidate list or NULL */ + assert(bn == NULL || + (((bn->ttype == TYPE_void && bn->tseqbase != oid_nil) || + bn->ttype == TYPE_oid) && + bn->tkey && bn->tsorted)); + /* since bn has unique and strictly ascending tail values, we + * can easily check whether the tail is dense */ + if (bn && bn->ttype == TYPE_oid && + (BATcount(bn) <= 1 || + * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 == + * (const oid *) Tloc(bn, BUNlast(bn) - 1))) { + /* tail is dense, replace by virtual oid */ + ALGODEBUG fprintf(stderr, "#CANDvirtualize(bn=%s#"BUNFMT",seq="OIDFMT")\n", + BATgetId(bn), BATcount(bn), + BATcount(bn) > 0 ? * (const oid *) Tloc(bn, 0) : 0); + if (BATcount(bn) == 0) + bn->tseqbase = 0; + else + bn->tseqbase = * (const oid *) Tloc(bn, 0); + bn->tdense = 1; + HEAPfree(&bn->theap, 1); + bn->theap.storage = bn->theap.newstorage = STORE_MEM; + bn->theap.size = 0; + bn->ttype = TYPE_void; + bn->tvarsized = 1; + bn->twidth = 0; + bn->tshift = 0; + } + + return bn; +} + diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -1061,7 +1061,7 @@ BATgroup_internal(BAT **groups, BAT **ex en->trevsorted = ngrp == 1; en->tnonil = 1; en->tnil = 0; - *extents = virtualize(en); + *extents = CANDvirtualize(en); } if (histo) { BATsetcount(hn, (BUN) ngrp); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -99,6 +99,13 @@ joinparamcheck(BAT *l, BAT *r1, BAT *r2, GDKerror("%s: candidate lists must be unique.\n", func); return GDK_FAIL; } + if (!viewless(l) || !viewless(r1) || (r2 && !viewless(r2))) { + /* + GDKerror("%s: inputs are views.\n", func); + assert(0); + return GDK_FAIL; + */ + } return GDK_SUCCEED; } @@ -236,11 +243,11 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r r2->tnonil = 1; } if (lstart == lend || !(nil_on_miss | only_misses)) { - virtualize(r1); + CANDvirtualize(r1); r1->trevsorted = 1; r1->tnorevsorted = 0; if (r2) { - virtualize(r2); + CANDvirtualize(r2); r2->trevsorted = 1; r2->tnorevsorted = 0; } @@ -740,9 +747,9 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l, } doreturn: if (r1->tkey) - virtualize(r1); + CANDvirtualize(r1); if (r2 && r2->tkey && r2->tsorted) - virtualize(r2); + CANDvirtualize(r2); ALGODEBUG fprintf(stderr, "#mergejoin_void(l=%s,r=%s)=(%s#"BUNFMT"%s%s%s%s,%s#"BUNFMT"%s%s%s%s) " LLFMT "us\n", BATgetId(l), BATgetId(r), BATgetId(r1), BATcount(r1), @@ -2310,27 +2317,6 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT return GDK_FAIL; } -/* binary search in a candidate list, return 1 if found, 0 if not */ -inline int -binsearchcand(const oid *cand, BUN lo, BUN hi, oid v) -{ - BUN mid; - - --hi; /* now hi is inclusive */ - if (v < cand[lo] || v > cand[hi]) - return 0; - while (hi > lo) { - mid = (lo + hi) / 2; - if (cand[mid] == v) - return 1; - if (cand[mid] < v) - lo = mid + 1; - else - hi = mid - 1; - } - return cand[lo] == v; -} - #define HASHLOOPBODY() \ do { \ if (BUNlast(r1) == BATcapacity(r1)) { \ @@ -2582,7 +2568,7 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT * } else if (rcand) { HASHloop_bound(ri, hsh, rb, v, rl, rh) { ro = (oid) (rb - rl + rseq); - if (!binsearchcand(rcand, 0, nrcand, ro)) + if (!CANDbinsearch(rcand, 0, nrcand, ro)) continue; if (only_misses) { nr++; @@ -2672,7 +2658,7 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT * if (nil_matches || cmp(v, nil) != 0) { HASHloop_bound(ri, hsh, rb, v, rl, rh) { ro = (oid) (rb - rl + rseq); - if (!binsearchcand(rcand, 0, nrcand, ro)) + if (!CANDbinsearch(rcand, 0, nrcand, ro)) continue; if (only_misses) { nr++; @@ -3528,7 +3514,7 @@ fetchjoin(BAT *r1, BAT *r2, BAT *l, BAT r2->tsorted = 1; r2->trevsorted = e - b <= 1; r2->tseqbase = e == b ? 0 : r->hseqbase + b; - virtualize(r2); + CANDvirtualize(r2); } if (BATextend(r1, e - b) != GDK_SUCCEED) goto bailout; diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -219,10 +219,15 @@ void BBPdump(void); /* never called: fo __hidden gdk_return VIEWreset(BAT *b) __attribute__ ((__warn_unused_result__)) __attribute__((__visibility__("hidden"))); -__hidden BAT *virtualize(BAT *bn) +/* candidate specific functions */ +__hidden BAT *CANDvirtualize(BAT *bn) __attribute__((__visibility__("hidden"))); -__hidden int binsearchcand(const oid *cand, BUN lo, BUN hi, oid v) +__hidden int CANDbinsearch(const oid *cand, BUN lo, BUN hi, oid v) __attribute__((__visibility__("hidden"))); +__hidden BAT *CANDnewdense(oid first, oid last) + __attribute__((__visibility__("hidden"))); + + __hidden void gdk_bbp_reset(void) __attribute__((__visibility__("hidden"))); __hidden void gdk_system_reset(void) diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -35,41 +35,6 @@ float nextafterf(float x, float y); A[(I)] = (V); \ } while (0) -BAT * -virtualize(BAT *bn) -{ - /* input must be a valid candidate list or NULL */ - assert(bn == NULL || - (((bn->ttype == TYPE_void && bn->tseqbase != oid_nil) || - bn->ttype == TYPE_oid) && - bn->tkey && bn->tsorted)); - /* since bn has unique and strictly ascending tail values, we - * can easily check whether the tail is dense */ - if (bn && bn->ttype == TYPE_oid && - (BATcount(bn) <= 1 || - * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 == - * (const oid *) Tloc(bn, BUNlast(bn) - 1))) { - /* tail is dense, replace by virtual oid */ - ALGODEBUG fprintf(stderr, "#virtualize(bn=%s#"BUNFMT",seq="OIDFMT")\n", - BATgetId(bn), BATcount(bn), - BATcount(bn) > 0 ? * (const oid *) Tloc(bn, 0) : 0); - if (BATcount(bn) == 0) - bn->tseqbase = 0; - else - bn->tseqbase = * (const oid *) Tloc(bn, 0); - bn->tdense = 1; - HEAPfree(&bn->theap, 1); - bn->theap.storage = bn->theap.newstorage = STORE_MEM; - bn->theap.size = 0; - bn->ttype = TYPE_void; - bn->tvarsized = 1; - bn->twidth = 0; - bn->tshift = 0; - } - - return bn; -} - static BAT * newempty(void) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list