Changeset: d5d8593eecaf for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d5d8593eecaf
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/kernel/batstr.c
Branch: sw_ew_c_sorting
Log Message:

Cleanup MAL str functions plus make proper comparisons with ext string 
functions.


diffs (truncated from 681 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3766,25 +3766,6 @@ str_is_iprefix(const char *s, const char
        return utf8ncasecmp(s, prefix, plen);
 }
 
-static str
-STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
-       (void) cntxt;
-       (void) mb;
-       bit *res = getArgReference(stk, pci, 0);
-       const str *arg1 = getArgReference(stk, pci, 1),
-               *arg2 = getArgReference(stk, pci, 2);
-       bit icase = pci->argc == 4
-                       && *getArgReference_bit(stk, pci, 3) ? true : false;
-       str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED;
-       int plen = str_strlen(prefix);
-
-       *res = (strNil(s) || strNil(prefix)) ? bit_nil :
-                       icase ? str_is_iprefix(s, prefix, plen) : 
str_is_prefix(s, prefix,
-                                                                               
                                                        plen);
-       return msg;
-}
-
 bit
 str_is_suffix(const char *s, const char *suffix, int sul)
 {
@@ -3807,27 +3788,6 @@ str_is_isuffix(const char *s, const char
                return utf8casecmp(s + sl - sul, suffix);
 }
 
-
-/* returns whether arg1 ends with arg2 */
-static str
-STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
-       (void) cntxt;
-       (void) mb;
-       bit *res = getArgReference(stk, pci, 0);
-       const str *arg1 = getArgReference(stk, pci, 1),
-               *arg2 = getArgReference(stk, pci, 2);
-       bit icase = pci->argc == 4
-                       && *getArgReference_bit(stk, pci, 3) ? true : false;
-       str s = *arg1, suffix = *arg2, msg = MAL_SUCCEED;
-       int sul = str_strlen(suffix);
-
-       *res = (strNil(s) || strNil(suffix)) ? bit_nil :
-                       icase ? str_is_isuffix(s, suffix, sul) : 
str_is_suffix(s, suffix,
-                                                                               
                                                   sul);
-       return msg;
-}
-
 bit
 str_contains(const char *h, const char *n, int nlen)
 {
@@ -3844,25 +3804,69 @@ str_icontains(const char *h, const char 
        return utf8casestr(h, n) ? 0 : 1;
 }
 
+#define STR_MAPARGS(STK, PCI, R, S1, S2, ICASE)                                
\
+       do{                                                                     
                                        \
+               R = getArgReference(STK, PCI, 0);                               
                \
+               S1 = *getArgReference_str(STK, PCI, 1);                         
\
+               S2 = *getArgReference_str(STK, PCI, 2);                         
\
+               icase = PCI->argc == 4 &&                                       
                        \
+                       *getArgReference_bit(STK, PCI, 3) ? true : false;       
\
+                                                                               
                                                \
+       } while(0)
+
+static str
+STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+
+       str s1, s2;
+       bit *r, icase;
+
+       STR_MAPARGS(stk, pci, r, s1, s2, icase);
+
+       int s2_len = str_strlen(s2);
+       *r = (strNil(s1) || strNil(s2)) ? bit_nil :
+               icase ? str_is_iprefix(s1, s2, s2_len) == 0 :
+                       str_is_prefix(s1, s2, s2_len) == 0;
+       return MAL_SUCCEED;
+}
+
+static str
+STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+
+       str s1, s2;
+       bit *r, icase;
+
+       STR_MAPARGS(stk, pci, r, s1, s2, icase);
+
+       int s2_len = str_strlen(s2);
+       *r = (strNil(s1) || strNil(s2)) ? bit_nil :
+               icase ? str_is_isuffix(s1, s2, s2_len) == 0 :
+                       str_is_suffix(s1, s2, s2_len) == 0;
+       return MAL_SUCCEED;
+}
+
 /* returns whether haystack contains needle */
 static str
 STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
        (void) cntxt;
        (void) mb;
-       bit *res = getArgReference(stk, pci, 0);
-       const str *arg1 = getArgReference(stk, pci, 1),
-               *arg2 = getArgReference(stk, pci, 2);
-       bit icase = pci->argc == 4
-                       && *getArgReference_bit(stk, pci, 3) ? true : false;
-       str haystack = *arg1, needle = *arg2, msg = MAL_SUCCEED;
-       int needle_len = str_strlen(needle);
-
-       *res = (strNil(haystack) || strNil(needle)) ? bit_nil :
-                       icase ? str_icontains(haystack, needle,
-                                                                 needle_len) : 
str_contains(haystack, needle,
-                                                                               
                                         needle_len);
-       return msg;
+
+       str s1, s2;
+       bit *r, icase;
+
+       STR_MAPARGS(stk, pci, r, s1, s2, icase);
+
+       int s2_len = str_strlen(s2);
+       *r = (strNil(s1) || strNil(s2)) ? bit_nil :
+               icase ? str_icontains(s1, s2, s2_len) == 0 :
+                       str_contains(s1, s2, s2_len) == 0;
+       return MAL_SUCCEED;
 }
 
 int
@@ -5150,7 +5154,7 @@ BBPnreclaim(int nargs, ...)
        } while (0)
 
 static str
-do_string_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
+str_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
                                 BUN *rcnt, const char *key, bool anti,
                                 bit (*str_cmp)(const char *, const char *, 
int),
                                 bool keep_nulls)
@@ -5170,10 +5174,9 @@ do_string_select(BAT *bn, BAT *b, BAT *s
                                                                                
                   qry_ctx->querytimeout) : 0;
 
        if (anti)                                       /* keep nulls ? (use 
false for now) */
-               scanloop_anti(v && *v != '\200'
-                                         && str_cmp(v, key, klen) == 0, 
keep_nulls);
+               scanloop_anti(v && *v != '\200' && str_cmp(v, key, klen) != 0, 
keep_nulls);
        else
-               scanloop(v && *v != '\200' && str_cmp(v, key, klen) != 0, 
keep_nulls);
+               scanloop(v && *v != '\200' && str_cmp(v, key, klen) == 0, 
keep_nulls);
 
   bailout:
        bat_iterator_end(&bi);
@@ -5182,41 +5185,40 @@ do_string_select(BAT *bn, BAT *b, BAT *s
 }
 
 static str
-string_select(bat *ret, const bat *bid, const bat *sid, const str *key,
-                         const bit *anti, bit (*str_cmp)(const char *, const 
char *, int),
+STRselect(bat *r_id, const bat *b_id, const bat *cb_id, const char *key,
+                         const bit anti, bit (*str_cmp)(const char *, const 
char *, int),
                          const str fname)
 {
-       BAT *b, *s = NULL, *bn = NULL, *old_s = NULL;;
        str msg = MAL_SUCCEED;
+
+       BAT *b, *cb = NULL, *r = NULL, *old_s = NULL;;
        BUN p = 0, q = 0, rcnt = 0;
        struct canditer ci;
-       bool with_strimps = false, with_strimps_anti = false;
-
-       if ((b = BATdescriptor(*bid)) == NULL) {
-               msg = createException(MAL, fname,
-                                                         SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
-               goto bailout;
-       }
-       if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) {
-               msg = createException(MAL, fname,
-                                                         SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
-               goto bailout;
+       bool with_strimps = false,
+               with_strimps_anti = false;
+
+       if (!(b = BATdescriptor(*b_id)))
+               throw(MAL, fname, RUNTIME_OBJECT_MISSING);
+
+       if (cb_id && !is_bat_nil(*cb_id) && !(cb = BATdescriptor(*cb_id))) {
+               BBPreclaim(b);
+               throw(MAL, fname, RUNTIME_OBJECT_MISSING);
        }
 
        assert(ATOMstorage(b->ttype) == TYPE_str);
 
        if (BAThasstrimps(b)) {
                if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
-                       BAT *tmp_s = STRMPfilter(b, s, *key, *anti);
+                       BAT *tmp_s = STRMPfilter(b, cb, key, anti);
                        if (tmp_s) {
-                               old_s = s;
-                               s = tmp_s;
-                               if (!*anti)
+                               old_s = cb;
+                               cb = tmp_s;
+                               if (!anti)
                                        with_strimps = true;
                                else
                                        with_strimps_anti = true;
                        }
-               } else {                                /* If we cannot filter 
with the strimp just continue normally */
+               } else {
                        GDKclrerr();
                }
        }
@@ -5225,20 +5227,19 @@ string_select(bat *ret, const bat *bid, 
                                                   "string_select: strcmp 
function using strimps" :
                                                   (with_strimps_anti ?
                                                        "string_select: strcmp 
function using strimps anti"
-                                                       :
-                                                       "string_select: strcmp 
function with no accelerator"));
-
-       canditer_init(&ci, b, s);
-       if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
-               msg = createException(MAL, fname, SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
-               goto bailout;
+                                                       : "string_select: 
strcmp function with no accelerator"));
+
+       canditer_init(&ci, b, cb);
+       if (!(r = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
+               BBPnreclaim(2, b, cb);
+               throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
        }
 
-       if (!s || BATtdense(s)) {
-               if (s) {
-                       assert(BATtdense(s));
-                       p = (BUN) s->tseqbase;
-                       q = p + BATcount(s);
+       if (!cb || BATtdense(cb)) {
+               if (cb) {
+                       assert(BATtdense(cb));
+                       p = (BUN) cb->tseqbase;
+                       q = p + BATcount(cb);
                        if ((oid) p < b->hseqbase)
                                p = b->hseqbase;
                        if ((oid) q > b->hseqbase + BATcount(b))
@@ -5249,101 +5250,131 @@ string_select(bat *ret, const bat *bid, 
                }
        }
 
-       msg = do_string_select(bn, b, s, &ci, p, q, &rcnt, *key, *anti
-                                                  && !with_strimps_anti, 
str_cmp, with_strimps_anti);
-
-       if (!msg) {                                     /* set some properties 
*/
-               BATsetcount(bn, rcnt);
-               bn->tsorted = true;
-               bn->trevsorted = bn->batCount <= 1;
-               bn->tkey = true;
-               bn->tnil = false;
-               bn->tnonil = true;
-               bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const oid *) 
Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
+       msg = str_select(r, b, cb, &ci, p, q, &rcnt, key, anti
+                                        && !with_strimps_anti, str_cmp, 
with_strimps_anti);
+
+       if (!msg) {
+               BATsetcount(r, rcnt);
+               r->tsorted = r->batCount <= 1;
+               r->trevsorted = r->batCount <= 1;
+               r->tkey = false;
+               r->tnil = false;
+               r->tnonil = true;
+               r->tseqbase = rcnt == 0 ?
+                       0 : rcnt == 1 ?
+                       *(const oid *) Tloc(r, 0) : rcnt == b->batCount ? 
b->hseqbase : oid_nil;
+
                if (with_strimps_anti) {
                        BAT *rev;
                        if (old_s) {
-                               rev = BATdiffcand(old_s, bn);
+                               rev = BATdiffcand(old_s, r);
 #ifndef NDEBUG
-                               BAT *is = BATintersectcand(old_s, bn);
+                               BAT *is = BATintersectcand(old_s, r);
                                if (is) {
-                                       assert(is->batCount == bn->batCount);
+                                       assert(is->batCount == r->batCount);
                                        BBPreclaim(is);
                                }
-                               assert(rev->batCount == old_s->batCount - 
bn->batCount);
+                               assert(rev->batCount == old_s->batCount - 
r->batCount);
 #endif
-                       }
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to