Changeset: 7fb8f288cf97 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/7fb8f288cf97
Modified Files:
        monetdb5/modules/mal/pcre.c
Branch: ascii-flag
Log Message:

Convert inputs to lowercase for ILIKE join.


diffs (153 lines):

diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -1835,10 +1835,10 @@ PCRElikeselect(bat *ret, const bat *bid,
                                goto bailout;                                   
                                                \
                        if (!empty) {                                           
                                                \
                                if (use_re) {                                   
                                                \
-                                       if ((msg = re_like_build(&re, vr, 
caseignore, use_strcmp, (unsigned char) *esc)) != MAL_SUCCEED) \
+                                       if ((msg = re_like_build(&re, vr, 
false, use_strcmp, (unsigned char) *esc)) != MAL_SUCCEED) \
                                                goto bailout;                   
                                                \
                                } else if (pcrepat) {                           
                                        \
-                                       if ((msg = pcre_like_build(&pcrere, 
&pcreex, pcrepat, caseignore, lci.ncand)) != MAL_SUCCEED) \
+                                       if ((msg = pcre_like_build(&pcrere, 
&pcreex, pcrepat, false, lci.ncand)) != MAL_SUCCEED) \
                                                goto bailout;                   
                                                \
                                        GDKfree(pcrepat);                       
                                                \
                                        pcrepat = NULL;                         
                                                \
@@ -1937,31 +1937,30 @@ pcrejoin(BAT *r1, BAT *r2, BAT *l, BAT *
        regex_t pcrere = (regex_t) { 0 };
        void *pcreex = NULL;
 #endif
+       lng t0 = 0;
 
        size_t counter = 0;
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();
        qry_ctx = qry_ctx ? qry_ctx : &(QryCtx) {.endtime = 0};
 
-       TRC_DEBUG(ALGO,
-                         "pcrejoin(l=%s#" BUNFMT "[%s]%s%s,"
-                         "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
-                         "sr=%s#" BUNFMT "%s%s)\n",
-                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
-                         l->tsorted ? "-sorted" : "",
-                         l->trevsorted ? "-revsorted" : "",
-                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
-                         r->tsorted ? "-sorted" : "",
-                         r->trevsorted ? "-revsorted" : "",
-                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
-                         sl && sl->tsorted ? "-sorted" : "",
-                         sl && sl->trevsorted ? "-revsorted" : "",
-                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
-                         sr && sr->tsorted ? "-sorted" : "",
-                         sr && sr->trevsorted ? "-revsorted" : "");
+       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
 
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
        assert(ATOMtype(l->ttype) == TYPE_str);
 
+       BAT *ol = NULL, *or = NULL;
+       if (caseignore) {
+               ol = l;
+               or = r;
+               l = BATtolower(l, NULL);
+               r = BATtolower(r, NULL);
+               if (l == NULL || r == NULL) {
+                       BBPreclaim(l);
+                       BBPreclaim(r);
+                       throw(MAL, "pcre.join", GDK_EXCEPTION);
+               }
+       }
+
        canditer_init(&lci, l, sl);
        canditer_init(&rci, r, sr);
 
@@ -1989,22 +1988,18 @@ pcrejoin(BAT *r1, BAT *r2, BAT *l, BAT *
        }
 
        if (anti) {
-               if (caseignore) {
-                       pcre_join_loop(GDKstrcasecmp(vl, vr) == 0,
-                                                  re_match(vl, re), 
!PCRE_EXEC_COND);
-               } else {
-                       pcre_join_loop(strcmp(vl, vr) == 0, re_match(vl, re), 
!PCRE_EXEC_COND);
-               }
+               pcre_join_loop(strcmp(vl, vr) == 0, re_match(vl, re), 
!PCRE_EXEC_COND);
        } else {
-               if (caseignore) {
-                       pcre_join_loop(GDKstrcasecmp(vl, vr) != 0,
-                                                  !re_match(vl, re), 
PCRE_EXEC_COND);
-               } else {
-                       pcre_join_loop(strcmp(vl, vr) != 0, !re_match(vl, re), 
PCRE_EXEC_COND);
-               }
+               pcre_join_loop(strcmp(vl, vr) != 0, !re_match(vl, re), 
PCRE_EXEC_COND);
        }
        bat_iterator_end(&li);
        bat_iterator_end(&ri);
+       if (ol) {
+               BBPreclaim(l);
+               BBPreclaim(r);
+               l = ol;
+               r = or;
+       }
 
        assert(!r2 || BATcount(r1) == BATcount(r2));
        /* also set other bits of heap to correct value to indicate size */
@@ -2021,23 +2016,52 @@ pcrejoin(BAT *r1, BAT *r2, BAT *l, BAT *
                if (r2)
                        r2->tseqbase = 0;
        }
+
        if (r2)
                TRC_DEBUG(ALGO,
-                                 "pcrejoin(l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" 
BUNFMT "%s%s\n",
-                                 BATgetId(l), BATgetId(r),
+                                 "l=%s#" BUNFMT "[%s]%s%s,"
+                                 "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT 
"%s%s,"
+                                 "sr=%s#" BUNFMT "%s%s -> "
+                                 "%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s (" LLFMT 
" usec)\n",
+                                 BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                                 l->tsorted ? "-sorted" : "",
+                                 l->trevsorted ? "-revsorted" : "",
+                                 BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                                 r->tsorted ? "-sorted" : "",
+                                 r->trevsorted ? "-revsorted" : "",
+                                 sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) 
: 0,
+                                 sl && sl->tsorted ? "-sorted" : "",
+                                 sl && sl->trevsorted ? "-revsorted" : "",
+                                 sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) 
: 0,
+                                 sr && sr->tsorted ? "-sorted" : "",
+                                 sr && sr->trevsorted ? "-revsorted" : "",
                                  BATgetId(r1), BATcount(r1),
                                  r1->tsorted ? "-sorted" : "",
                                  r1->trevsorted ? "-revsorted" : "",
                                  BATgetId(r2), BATcount(r2),
                                  r2->tsorted ? "-sorted" : "",
-                                 r2->trevsorted ? "-revsorted" : "");
+                                 r2->trevsorted ? "-revsorted" : "", GDKusec() 
- t0);
        else
                TRC_DEBUG(ALGO,
-                                 "pcrejoin(l=%s,r=%s)=(%s#" BUNFMT "%s%s\n",
-                                 BATgetId(l), BATgetId(r),
+                                 "l=%s#" BUNFMT "[%s]%s%s,"
+                                 "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT 
"%s%s,"
+                                 "sr=%s#" BUNFMT "%s%s -> "
+                                 "%s#" BUNFMT "%s%s (" LLFMT " usec)\n",
+                                 BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                                 l->tsorted ? "-sorted" : "",
+                                 l->trevsorted ? "-revsorted" : "",
+                                 BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                                 r->tsorted ? "-sorted" : "",
+                                 r->trevsorted ? "-revsorted" : "",
+                                 sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) 
: 0,
+                                 sl && sl->tsorted ? "-sorted" : "",
+                                 sl && sl->trevsorted ? "-revsorted" : "",
+                                 sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) 
: 0,
+                                 sr && sr->tsorted ? "-sorted" : "",
+                                 sr && sr->trevsorted ? "-revsorted" : "",
                                  BATgetId(r1), BATcount(r1),
                                  r1->tsorted ? "-sorted" : "",
-                                 r1->trevsorted ? "-revsorted" : "");
+                                 r1->trevsorted ? "-revsorted" : "", GDKusec() 
- t0);
        return MAL_SUCCEED;
 
   bailout:
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to