Changeset: a3d5bebe13df for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a3d5bebe13df
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk_select.c
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/pcre.mal
Branch: default
Log Message:

merge


diffs (truncated from 445 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -176,6 +176,7 @@ BAT *BATsubselect(BAT *b, BAT *s, const 
 BAT *BATsunion(BAT *b, BAT *c);
 BAT *BATsunique(BAT *b);
 BAT *BATthetajoin(BAT *l, BAT *r, int mode, BUN estimate);
+BAT *BATthetasubselect(BAT *b, BAT *s, const void *val, const char *op);
 int BATtopN(BAT *b, BUN topN);
 BAT *BATundo(BAT *b);
 BAT *BATuselect(BAT *b, const void *tl, const void *th);
@@ -746,6 +747,8 @@ str ALGtdiff(int *result, int *lid, int 
 str ALGthetajoin(int *result, int *lid, int *rid, int *opc);
 str ALGthetajoinEstimate(int *result, int *lid, int *rid, int *opc, lng 
*estimate);
 str ALGthetaselect(int *result, int *bid, ptr low, str *op);
+str ALGthetasubselect1(bat *result, bat *bid, const void *val, const char 
**op);
+str ALGthetasubselect2(bat *result, bat *bid, bat *sid, const void *val, const 
char **op);
 str ALGthetauselect(int *result, int *bid, ptr value, str *op);
 str ALGthsort(int *result, int *lid);
 str ALGtintersect(int *result, int *lid, int *rid);
@@ -2108,6 +2111,8 @@ str PCRElike3(bit *ret, str *s, str *pat
 str PCRElike_join_pcre(int *ret, int *b, int *pat, str *esc);
 str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
 str PCRElike_uselect_pcre(int *ret, int *b, str *pat, str *esc);
+str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit 
*caseignore, bit *anti);
+str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str *esc, bit 
*caseignore, bit *anti);
 str PCREmatch(bit *ret, str *val, str *pat);
 str PCREnotilike2(bit *ret, str *s, str *pat);
 str PCREnotilike3(bit *ret, str *s, str *pat, str *esc);
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -161,18 +161,13 @@ BAT_hashselect(BAT *b, BAT *s, BAT *bn, 
                            "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): " \
                            "scanselect %s\n", BATgetId(b), BATcount(b), \
                            s ? BATgetId(s) : "NULL", anti, #TEST);     \
-               BATloop(s, p, q) {                                      \
-                       o = * (oid *) BUNtloc(si, p);                   \
-                       if (o == oid_nil ||                             \
-                           o < seqbase ||                              \
-                           o - seqbase >= b->U->count) {               \
-                               /* XXX return an error? */              \
-                               continue;                               \
-                       }                                               \
-                       p = (BUN) (o - off);                            \
-                       v = BUNtail(bi, p);                             \
+               while (p < q) {                                         \
+                       o = *candlist++;                                \
+                       r = (BUN) (o - off);                            \
+                       v = BUNtail(bi, r);                             \
                        if (TEST)                                       \
                                bunfastins(bn, NULL, &o);               \
+                       p++;                                            \
                }                                                       \
        } while (0)
 
@@ -199,7 +194,7 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, 
        BATiter bi = bat_iterator(b);
        int (*cmp)(const void *, const void *);
        BUN p, q;
-       oid o, seqbase, off;
+       oid o, off;
        const void *nil, *v;
        int c;
 
@@ -216,14 +211,22 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, 
        assert(!lval || !hval || (*cmp)(tl, th) <= 0);
 
        nil = b->T->nonil ? NULL : ATOMnilptr(b->ttype);
-       seqbase = b->hseqbase;
-       off = seqbase - BUNfirst(b);
+       off = b->hseqbase - BUNfirst(b);
 
        if (s) {
-               BATiter si = bat_iterator(s);
+               const oid *candlist;
+               BUN r;
 
                assert(s->tsorted);
                assert(s->tkey);
+               /* setup candscanloop loop vars to only iterate over
+                * part of s that has values that are in range of b */
+               o = b->hseqbase + BATcount(b);
+               q = SORTfndfirst(s, &o);
+               p = SORTfndfirst(s, &b->hseqbase);
+               /* should we return an error if p > BUNfirst(s) || q <
+                * BUNlast(s) (i.e. s not fully used)? */
+               candlist = (const oid *) Tloc(s, p);
                if (equi) {
                        assert(li && hi);
                        assert(!anti);
@@ -324,7 +327,6 @@ BATsubselect(BAT *b, BAT *s, const void 
 {
        int hval, lval, equi, t, lnil;
        const void *nil;
-       BAT *orig_s = s;
        BAT *bn;
        BUN estimate;
 
@@ -433,10 +435,7 @@ BATsubselect(BAT *b, BAT *s, const void 
                 * any: i.e. return everything */
                ALGODEBUG fprintf(stderr, 
"#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): everything, nonil\n", BATgetId(b), 
BATcount(b), s ? BATgetId(s) : "NULL", anti);
                if (s) {
-                       if (s == orig_s)
-                               return BATcopy(s, TYPE_void, s->ttype, 0);
-                       else
-                               return s; /* already made a copy: return it */
+                       return BATcopy(s, TYPE_void, s->ttype, 0);
                } else {
                        return BATmirror(BATmark(b, 0));
                }
@@ -526,8 +525,6 @@ BATsubselect(BAT *b, BAT *s, const void 
                                o = (oid) high;
                                high = SORTfndfirst(s, &o);
                                v = VIEWhead(BATmirror(s));
-                               if (s != orig_s)
-                                       BBPunfix(s->batCacheid);
                        } else {
                                v = VIEWhead(b); /* [oid,nil] */
                        }
@@ -540,8 +537,6 @@ BATsubselect(BAT *b, BAT *s, const void 
                                o = (oid) high;
                                high = SORTfndfirst(s, &o);
                                v = VIEWhead(BATmirror(s));
-                               if (s != orig_s)
-                                       BBPunfix(s->batCacheid);
                        } else {
                                v = VIEWhead(b); /* [oid,nil] */
                        }
@@ -584,11 +579,8 @@ BATsubselect(BAT *b, BAT *s, const void 
        }
 
        bn = BATnew(TYPE_void, TYPE_oid, estimate);
-       if (bn == NULL) {
-               if (s && s != orig_s)
-                       BBPreclaim(s);
+       if (bn == NULL)
                return NULL;
-       }
 
        if (equi &&
            (b->T->hash ||
@@ -602,9 +594,6 @@ BATsubselect(BAT *b, BAT *s, const void 
                bn = BAT_scanselect(b, s, bn, tl, th, li, hi, equi, anti, lval, 
hval);
        }
 
-       if (bn == NULL && s && s != orig_s)
-               BBPreclaim(s);
-
        return bn;
 }
 
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -97,6 +97,8 @@ pcre_export str PCREilike_join_pcre(int 
 pcre_export str PCRElike_select_pcre(int *ret, int *b, str *pat, str *esc);
 pcre_export str PCREilike_select_pcre(int *ret, int *b, str *pat, str *esc);
 pcre_export str pcre_init(void);
+pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit 
*caseignore, bit *anti);
+pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str 
*esc, bit *caseignore, bit *anti);
 
 /* current implementation assumes simple %keyword% [keyw%]* */
 typedef struct RE {
@@ -368,6 +370,132 @@ pcre_index(int *res, pcre * pattern, str
        return MAL_SUCCEED;
 }
 
+/* these two defines are copies from gdk_select.c */
+
+/* scan select loop with candidates */
+#define candscanloop(TEST)                                                     
                        \
+       do {                                                                    
                                        \
+               ALGODEBUG fprintf(stderr,                                       
                        \
+                           "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "       
\
+                           "scanselect %s\n", BATgetId(b), BATcount(b),        
\
+                           s ? BATgetId(s) : "NULL", anti, #TEST);             
        \
+               while (p < q) {                                                 
                                \
+                       o = *candlist++;                                        
                                \
+                       r = (BUN) (o - off);                                    
                        \
+                       v = BUNtail(bi, r);                                     
                                \
+                       if (TEST)                                               
                                        \
+                               bunfastins(bn, NULL, &o);                       
                        \
+                       p++;                                                    
                                        \
+               }                                                               
                                                \
+       } while (0)
+
+/* scan select loop without candidates */
+#define scanloop(TEST)                                                         
                        \
+       do {                                                                    
                                        \
+               ALGODEBUG fprintf(stderr,                                       
                        \
+                           "#BATsubselect(b=%s#"BUNFMT",s=%s,anti=%d): "       
\
+                           "scanselect %s\n", BATgetId(b), BATcount(b),        
\
+                           s ? BATgetId(s) : "NULL", anti, #TEST);             
        \
+               BATloop(b, p, q) {                                              
                                \
+                       v = BUNtail(bi, p);                                     
                                \
+                       if (TEST) {                                             
                                        \
+                               o = (oid) p + off;                              
                                \
+                               bunfastins(bn, NULL, &o);                       
                        \
+                       }                                                       
                                                \
+               }                                                               
                                                \
+       } while (0)
+
+static str
+pcre_likesubselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, 
int anti)
+{
+       int options = PCRE_UTF8 | PCRE_MULTILINE;
+       pcre *re;
+       pcre_extra *pe;
+       const char *error;
+       int errpos;
+       BATiter bi = bat_iterator(b);
+       BAT *bn;
+       BUN p, q;
+       oid o, off;
+       const char *v;
+       int ovector[10];
+
+       assert(BAThdense(b));
+       assert(ATOMstorage(b->ttype) == TYPE_str);
+       assert(anti == 0 || anti == 1);
+
+       if (caseignore)
+               options |= PCRE_CASELESS;
+       if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL)
+               throw(MAL, "pcre.likesubselect",
+                         OPERATION_FAILED ": compilation of pattern \"%s\" 
failed\n", pat);
+       pe = pcre_study(re, 0, &error);
+       if (error != NULL) {
+               my_pcre_free(re);
+               my_pcre_free(pe);
+               throw(MAL, "pcre.likesubselect",
+                         OPERATION_FAILED ": studying pattern \"%s\" 
failed\n", pat);
+       }
+       bn = BATnew(TYPE_void, TYPE_oid, s ? BATcount(s) : BATcount(b));
+       if (bn == NULL) {
+               my_pcre_free(re);
+               my_pcre_free(pe);
+               throw(MAL, "pcre.likesubselect", MAL_MALLOC_FAIL);
+       }
+       off = b->hseqbase - BUNfirst(b);
+
+       if (s) {
+               const oid *candlist;
+               BUN r;
+
+               assert(BAThdense(s));
+               assert(s->ttype == TYPE_oid || s->ttype == TYPE_void);
+               assert(s->tsorted);
+               assert(s->tkey);
+               /* setup candscanloop loop vars to only iterate over
+                * part of s that has values that are in range of b */
+               o = b->hseqbase + BATcount(b);
+               q = SORTfndfirst(s, &o);
+               p = SORTfndfirst(s, &b->hseqbase);
+               candlist = (const oid *) Tloc(s, p);
+               if (anti)
+                       candscanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) == -1);
+               else
+                       candscanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) >= 0);
+       } else {
+               if (anti)
+                       scanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) == -1);
+               else
+                       scanloop(v && *v != '\200' &&
+                               pcre_exec(re, pe, v, (int) strlen(v), 0, 0, 
ovector, 10) >= 0);
+       }
+       my_pcre_free(re);
+       my_pcre_free(pe);
+       bn->tsorted = 1;
+       bn->trevsorted = bn->U->count <= 1;
+       bn->tkey = 1;
+       bn->tdense = bn->U->count <= 1;
+       if (bn->U->count == 1)
+               bn->tseqbase =  * (oid *) Tloc(bn, BUNfirst(bn));
+       bn->hsorted = 1;
+       bn->hdense = 1;
+       bn->hseqbase = 0;
+       bn->hkey = 1;
+       bn->hrevsorted = bn->U->count <= 1;
+       *bnp = bn;
+       return MAL_SUCCEED;
+
+  bunins_failed:
+       BBPreclaim(bn);
+       my_pcre_free(re);
+       my_pcre_free(pe);
+       *bnp = NULL;
+       throw(MAL, "pcre.likesubselect", OPERATION_FAILED);
+}
+
 static str
 pcre_select(BAT **res, str pattern, BAT *strs, bit insensitive)
 {
@@ -839,7 +967,7 @@ pcre_heap(Heap *heap, size_t capacity)
 }
 
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to