Changeset: 3d18e45d5375 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/3d18e45d5375 Modified Files: gdk/gdk_strimps.c Branch: string_imprints Log Message:
Use candidate lists for strimps We use candidate lists both for strimp creation and for filtering. diffs (136 lines): diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c --- a/gdk/gdk_strimps.c +++ b/gdk/gdk_strimps.c @@ -242,20 +242,25 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai lng t0 = 0; BATiter bi; str cs; - BUN i; + BUN i, ncand; size_t hidx; + oid x; size_t hlen; PairHistogramElem *hist; PairIterator pi, *pip; CharPair cp, *cpp; + struct canditer ci; - (void)s; TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec(); hlen = STRIMP_HISTSIZE; if ((hist = (PairHistogramElem *)GDKmalloc(hlen*sizeof(PairHistogramElem))) == NULL) { - // TODO handle error - return 0; + return false; + } + + ncand = canditer_init(&ci, b, s); + if (ncand == 0) { + return false; } for(hidx = 0; hidx < hlen; hidx++) { @@ -267,8 +272,9 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai bi = bat_iterator(b); pip = π cpp = &cp; - for (i = 0; i < b->batCount; i++) { - cs = (str)BUNtvar(bi, i); + for (i = 0; i < ncand; i++) { + x = canditer_next(&ci) - b->hseqbase; + cs = (str)BUNtvar(bi, x); if (!strNil(cs)) { pi.s = cs; pi.pos = 0; @@ -339,8 +345,8 @@ STRMPcreateStrimpHeap(BAT *b, BAT *s) if (b->tstrimps == NULL) { MT_lock_set(&b->batIdxLock); /* Make sure no other thread got here first */ - if (b->tstrimps == NULL) { - STRMPbuildHeader(b, s, hpairs); /* Find the header pairs */ + if (b->tstrimps == NULL && + STRMPbuildHeader(b, s, hpairs)) { /* Find the header pairs, put the result in hpairs */ sz = 8 + STRIMP_HEADER_SIZE; /* add 8-bytes for the descriptor and the pair sizes */ for (i = 0; i < STRIMP_HEADER_SIZE; i++) { sz += hpairs[i].psize; @@ -464,14 +470,14 @@ BAT * STRMPfilter(BAT *b, BAT *s, char *q) { BAT *r = NULL; - BUN i; + BUN i, ncand; uint64_t qbmask; uint64_t *ptr; Strimps *strmps; - (void)s; + oid x; + struct canditer ci; if (isVIEW(b)) { - // b = BBP_cache(VIEWtparent(b)); BAT *pb = BBP_cache(VIEWtparent(b)); if (!BATcheckstrimps(pb)) goto sfilter_fail; @@ -483,17 +489,27 @@ STRMPfilter(BAT *b, BAT *s, char *q) strmps = b->tstrimps; } - r = COLnew(b->hseqbase, TYPE_oid, b->batCount, TRANSIENT); + ncand = canditer_init(&ci, b, s); + if (ncand == 0) + /* Is this correct? */ + return BATdense(b->hseqbase, 0, 0); + r = COLnew(b->hseqbase, TYPE_oid, ncand, TRANSIENT); if (r == NULL) { goto sfilter_fail; } + /* TODO: Compare patterns with and without SQL pattern metachars + * (% and _). Theoretically they should produce the same results + * because bitstring creation ignores punctuation characters + * (see the macro isIgnored). + */ qbmask = STRMPmakebitstring(q, strmps); ptr = (uint64_t *)strmps->strimps_base; - for (i = 0; i < b->batCount; i++) { - if ((*(ptr + i) & qbmask) == qbmask) { - oid pos = i + b->hseqbase; + for (i = 0; i < ncand; i++) { + x = canditer_next(&ci) - b->hseqbase; + if ((*(ptr + x) & qbmask) == qbmask) { + oid pos = x + b->hseqbase; if (BUNappend(r, &pos, false) != GDK_SUCCEED) goto sfilter_fail; } @@ -590,11 +606,13 @@ STRMPcreate(BAT *b, BAT *s) { lng t0 = 0; BATiter bi; - BUN i; + BUN i, ncand; str cs; Strimps *h; uint64_t *dh; BAT *pb; + oid x; + struct canditer ci; TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec(); if (b->ttype != TYPE_str) { @@ -619,9 +637,12 @@ STRMPcreate(BAT *b, BAT *s) } dh = (uint64_t *)((uint8_t*)h->strimps.base + h->strimps.free + b->hseqbase*8); + ncand = canditer_init(&ci, b, s); + bi = bat_iterator(b); - for (i = 0; i < bi.count; i++) { - cs = (str)BUNtvar(bi, i); + for (i = 0; i < ncand; i++) { + x = canditer_next(&ci) - b->hseqbase; + cs = (str)BUNtvar(bi, x); if (!strNil(cs)) *dh++ = STRMPmakebitstring(cs, h); else _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list