Changeset: a3d5bebe13df for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a3d5bebe13df Modified Files: clients/Tests/exports.stable.out gdk/gdk_select.c monetdb5/modules/mal/pcre.c monetdb5/modules/mal/pcre.mal Branch: default Log Message:
merge diffs (truncated from 445 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -176,6 +176,7 @@ BAT *BATsubselect(BAT *b, BAT *s, const BAT *BATsunion(BAT *b, BAT *c); BAT *BATsunique(BAT *b); BAT *BATthetajoin(BAT *l, BAT *r, int mode, BUN estimate); +BAT *BATthetasubselect(BAT *b, BAT *s, const void *val, const char *op); int BATtopN(BAT *b, BUN topN); BAT *BATundo(BAT *b); BAT *BATuselect(BAT *b, const void *tl, const void *th); @@ -746,6 +747,8 @@ str ALGtdiff(int *result, int *lid, int str ALGthetajoin(int *result, int *lid, int *rid, int *opc); str ALGthetajoinEstimate(int *result, int *lid, int *rid, int *opc, lng *estimate); str ALGthetaselect(int *result, int *bid, ptr low, str *op); +str ALGthetasubselect1(bat *result, bat *bid, const void *val, const char **op); +str ALGthetasubselect2(bat *result, bat *bid, bat *sid, const void *val, const char **op); str ALGthetauselect(int *result, int *bid, ptr value, str *op); str ALGthsort(int *result, int *lid); str ALGtintersect(int *result, int *lid, int *rid); @@ -2108,6 +2111,8 @@ str PCRElike3(bit *ret, str *s, str *pat str PCRElike_join_pcre(int *ret, int *b, int *pat, str *esc); str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc); str PCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc); +str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit *caseignore, bit *anti); +str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit *caseignore, bit *anti); str PCREmatch(bit *ret, str *val, str *pat); str PCREnotilike2(bit *ret, str *s, str *pat); str PCREnotilike3(bit *ret, str *s, str *pat, str *esc); diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -161,18 +161,13 @@ BAT_hashselect(BAT *b, BAT *s, BAT *bn, "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): " \ "scanselect %s\n", BATgetId(b), BATcount(b), \ s ? BATgetId(s) : "NULL", anti, #TEST); \ - BATloop(s, p, q) { \ - o = * (oid *) BUNtloc(si, p); \ - if (o == oid_nil || \ - o < seqbase || \ - o - seqbase >= b->U->count) { \ - /* XXX return an error? */ \ - continue; \ - } \ - p = (BUN) (o - off); \ - v = BUNtail(bi, p); \ + while (p < q) { \ + o = *candlist++; \ + r = (BUN) (o - off); \ + v = BUNtail(bi, r); \ if (TEST) \ bunfastins(bn, NULL, &o); \ + p++; \ } \ } while (0) @@ -199,7 +194,7 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, BATiter bi = bat_iterator(b); int (*cmp)(const void *, const void *); BUN p, q; - oid o, seqbase, off; + oid o, off; const void *nil, *v; int c; @@ -216,14 +211,22 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, assert(!lval || !hval || (*cmp)(tl, th) <= 0); nil = b->T->nonil ? NULL : ATOMnilptr(b->ttype); - seqbase = b->hseqbase; - off = seqbase - BUNfirst(b); + off = b->hseqbase - BUNfirst(b); if (s) { - BATiter si = bat_iterator(s); + const oid *candlist; + BUN r; assert(s->tsorted); assert(s->tkey); + /* setup candscanloop loop vars to only iterate over + * part of s that has values that are in range of b */ + o = b->hseqbase + BATcount(b); + q = SORTfndfirst(s, &o); + p = SORTfndfirst(s, &b->hseqbase); + /* should we return an error if p > BUNfirst(s) || q < + * BUNlast(s) (i.e. s not fully used)? */ + candlist = (const oid *) Tloc(s, p); if (equi) { assert(li && hi); assert(!anti); @@ -324,7 +327,6 @@ BATsubselect(BAT *b, BAT *s, const void { int hval, lval, equi, t, lnil; const void *nil; - BAT *orig_s = s; BAT *bn; BUN estimate; @@ -433,10 +435,7 @@ BATsubselect(BAT *b, BAT *s, const void * any: i.e. return everything */ ALGODEBUG fprintf(stderr, "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): everything, nonil\n", BATgetId(b), BATcount(b), s ? BATgetId(s) : "NULL", anti); if (s) { - if (s == orig_s) - return BATcopy(s, TYPE_void, s->ttype, 0); - else - return s; /* already made a copy: return it */ + return BATcopy(s, TYPE_void, s->ttype, 0); } else { return BATmirror(BATmark(b, 0)); } @@ -526,8 +525,6 @@ BATsubselect(BAT *b, BAT *s, const void o = (oid) high; high = SORTfndfirst(s, &o); v = VIEWhead(BATmirror(s)); - if (s != orig_s) - BBPunfix(s->batCacheid); } else { v = VIEWhead(b); /* [oid,nil] */ } @@ -540,8 +537,6 @@ BATsubselect(BAT *b, BAT *s, const void o = (oid) high; high = SORTfndfirst(s, &o); v = VIEWhead(BATmirror(s)); - if (s != orig_s) - BBPunfix(s->batCacheid); } else { v = VIEWhead(b); /* [oid,nil] */ } @@ -584,11 +579,8 @@ BATsubselect(BAT *b, BAT *s, const void } bn = BATnew(TYPE_void, TYPE_oid, estimate); - if (bn == NULL) { - if (s && s != orig_s) - BBPreclaim(s); + if (bn == NULL) return NULL; - } if (equi && (b->T->hash || @@ -602,9 +594,6 @@ BATsubselect(BAT *b, BAT *s, const void bn = BAT_scanselect(b, s, bn, tl, th, li, hi, equi, anti, lval, hval); } - if (bn == NULL && s && s != orig_s) - BBPreclaim(s); - return bn; } diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c --- a/monetdb5/modules/mal/pcre.c +++ b/monetdb5/modules/mal/pcre.c @@ -97,6 +97,8 @@ pcre_export str PCREilike_join_pcre(int pcre_export str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc); pcre_export str PCREilike_select_pcre(int *ret, int *b, str *pat, str *esc); pcre_export str pcre_init(void); +pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit *caseignore, bit *anti); +pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit *caseignore, bit *anti); /* current implementation assumes simple %keyword% [keyw%]* */ typedef struct RE { @@ -368,6 +370,132 @@ pcre_index(int *res, pcre * pattern, str return MAL_SUCCEED; } +/* these two defines are copies from gdk_select.c */ + +/* scan select loop with candidates */ +#define candscanloop(TEST) \ + do { \ + ALGODEBUG fprintf(stderr, \ + "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): " \ + "scanselect %s\n", BATgetId(b), BATcount(b), \ + s ? BATgetId(s) : "NULL", anti, #TEST); \ + while (p < q) { \ + o = *candlist++; \ + r = (BUN) (o - off); \ + v = BUNtail(bi, r); \ + if (TEST) \ + bunfastins(bn, NULL, &o); \ + p++; \ + } \ + } while (0) + +/* scan select loop without candidates */ +#define scanloop(TEST) \ + do { \ + ALGODEBUG fprintf(stderr, \ + "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): " \ + "scanselect %s\n", BATgetId(b), BATcount(b), \ + s ? BATgetId(s) : "NULL", anti, #TEST); \ + BATloop(b, p, q) { \ + v = BUNtail(bi, p); \ + if (TEST) { \ + o = (oid) p + off; \ + bunfastins(bn, NULL, &o); \ + } \ + } \ + } while (0) + +static str +pcre_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, int anti) +{ + int options = PCRE_UTF8 | PCRE_MULTILINE; + pcre *re; + pcre_extra *pe; + const char *error; + int errpos; + BATiter bi = bat_iterator(b); + BAT *bn; + BUN p, q; + oid o, off; + const char *v; + int ovector[10]; + + assert(BAThdense(b)); + assert(ATOMstorage(b->ttype) == TYPE_str); + assert(anti == 0 || anti == 1); + + if (caseignore) + options |= PCRE_CASELESS; + if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL) + throw(MAL, "pcre.likesubselect", + OPERATION_FAILED ": compilation of pattern \"%s\" failed\n", pat); + pe = pcre_study(re, 0, &error); + if (error != NULL) { + my_pcre_free(re); + my_pcre_free(pe); + throw(MAL, "pcre.likesubselect", + OPERATION_FAILED ": studying pattern \"%s\" failed\n", pat); + } + bn = BATnew(TYPE_void, TYPE_oid, s ? BATcount(s) : BATcount(b)); + if (bn == NULL) { + my_pcre_free(re); + my_pcre_free(pe); + throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL); + } + off = b->hseqbase - BUNfirst(b); + + if (s) { + const oid *candlist; + BUN r; + + assert(BAThdense(s)); + assert(s->ttype == TYPE_oid || s->ttype == TYPE_void); + assert(s->tsorted); + assert(s->tkey); + /* setup candscanloop loop vars to only iterate over + * part of s that has values that are in range of b */ + o = b->hseqbase + BATcount(b); + q = SORTfndfirst(s, &o); + p = SORTfndfirst(s, &b->hseqbase); + candlist = (const oid *) Tloc(s, p); + if (anti) + candscanloop(v && *v != '\200' && + pcre_exec(re, pe, v, (int) strlen(v), 0, 0, ovector, 10) == -1); + else + candscanloop(v && *v != '\200' && + pcre_exec(re, pe, v, (int) strlen(v), 0, 0, ovector, 10) >= 0); + } else { + if (anti) + scanloop(v && *v != '\200' && + pcre_exec(re, pe, v, (int) strlen(v), 0, 0, ovector, 10) == -1); + else + scanloop(v && *v != '\200' && + pcre_exec(re, pe, v, (int) strlen(v), 0, 0, ovector, 10) >= 0); + } + my_pcre_free(re); + my_pcre_free(pe); + bn->tsorted = 1; + bn->trevsorted = bn->U->count <= 1; + bn->tkey = 1; + bn->tdense = bn->U->count <= 1; + if (bn->U->count == 1) + bn->tseqbase = * (oid *) Tloc(bn, BUNfirst(bn)); + bn->hsorted = 1; + bn->hdense = 1; + bn->hseqbase = 0; + bn->hkey = 1; + bn->hrevsorted = bn->U->count <= 1; + *bnp = bn; + return MAL_SUCCEED; + + bunins_failed: + BBPreclaim(bn); + my_pcre_free(re); + my_pcre_free(pe); + *bnp = NULL; + throw(MAL, "pcre.likesubselect", OPERATION_FAILED); +} + static str pcre_select(BAT **res, str pattern, BAT *strs, bit insensitive) { @@ -839,7 +967,7 @@ pcre_heap(Heap *heap, size_t capacity) } _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list