Changeset: aa20ce0fc12a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/aa20ce0fc12a
Modified Files:
        monetdb5/modules/atoms/str.c
Branch: Jun2023
Log Message:

Strimps working in normal join. Anti join still defaults to no Strimps.


diffs (218 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -5230,32 +5230,35 @@ STRcontainsselect(bat *ret, const bat *b
                B->tseqbase = 0;                                                
\
        } while (0)
 
-#define batstr_join_loop(STRCMP, STR_LEN, WITH_STRIMPS)                        
        \
+#define str_join_loop(STRCMP, STR_LEN)                                         
                        \
        do {                                                                    
                                                        \
                for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                  
                \
-                       GDK_CHECK_TIMEOUT(timeoffset, counter, 
GOTO_LABEL_TIMEOUT_HANDLER(exit));\
+                       GDK_CHECK_TIMEOUT(timeoffset, counter, 
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
                        ro = canditer_next(&rci);                               
                                        \
                        vr = VALUE(r, ro - rbase);                              
                                        \
-                       rlen = str_strlen(vr);                                  
                                        \
+                       rlen = STR_LEN;                                         
                                        \
                        nl = 0;                                                 
                                                \
-                       canditer_reset(&lci);                                   
                                        \
-                       if (with_strimps) {                                     
                                        \
-                               if(!(filtered_sl = STRMPfilter(l, sl, vr, 
anti)))               \
-                                       sl = filtered_sl;                       
                                                \
-                       }                                                       
                                                                \
+                       if (with_strimps)                                       
                                                \
+                               filtered_sl = STRMPfilter(l, sl, vr, anti);     
                \
+                       if (filtered_sl)                                        
                                                \
+                               canditer_init(&lci, l, filtered_sl);            
                        \
+                       else                                                    
                                                        \
+                               canditer_init(&lci, l, sl);                     
                                \
                        for (BUN lidx = 0; lidx < lci.ncand; lidx++) {          
                \
                                lo = canditer_next(&lci);                       
                                        \
                                vl = VALUE(l, lo - lbase);                      
                                        \
                                if (strNil(vl)) {                               
                                                \
                                        continue;                               
                                                        \
-                               } else if (!(STRCMP))                           
                                        \
+                               } else if (!(STRCMP)) {                         
                                \
                                        continue;                               
                                                        \
+                               }                                               
                                                                \
                                if (BATcount(r1) == BATcapacity(r1)) {          
                        \
                                        newcap = BATgrows(r1);                  
                                        \
                                        BATsetcount(r1, BATcount(r1));          
                                \
                                        if (r2)                                 
                                                \
                                                BATsetcount(r2, BATcount(r2));  
                                \
-                                       if (BATextend(r1, newcap) != 
GDK_SUCCEED || (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \
+                                       if (BATextend(r1, newcap) != 
GDK_SUCCEED ||             \
+                                               (r2 && BATextend(r2, newcap) != 
GDK_SUCCEED)) { \
                                                msg = createException(MAL, 
"str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
                                                goto exit;                      
                                                        \
                                        }                                       
                                                                \
@@ -5282,9 +5285,75 @@ STRcontainsselect(bat *ret, const bat *b
                                        APPEND(r2, ro);                         
                                        \
                                lastl = lo;                                     
                                                \
                                nl++;                                           
                                                        \
-                               if (with_strimps) {                             
                                        \
-                                       sl = original_sl;                       
                                                \
+                       }                                                       
                                                                \
+                       if (with_strimps && filtered_sl)                        
                                \
+                               BBPreclaim(filtered_sl);                        
                                        \
+                       if (r2) {                                               
                                                        \
+                               if (nl > 1) {                                   
                                                \
+                                       r2->tkey = false;                       
                                                \
+                                       r2->tseqbase = oid_nil;                 
                                \
+                                       r1->trevsorted = false;                 
                                \
+                               } else if (nl == 0) {                           
                                        \
+                                       rskipped = BATcount(r2) > 0;            
                                \
+                               } else if (rskipped) {                          
                                        \
+                                       r2->tseqbase = oid_nil;                 
                                \
                                }                                               
                                                                \
+                       } else if (nl > 1) {                                    
                                        \
+                               r1->trevsorted = false;                         
                                \
+                       }                                                       
                                                                \
+               }                                                               
                                                                \
+       } while (0)
+
+#define str_antijoin_loop(STRCMP, STR_LEN)                                     
                        \
+       do {                                                                    
                                                        \
+               for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                  
                \
+                       GDK_CHECK_TIMEOUT(timeoffset, counter, 
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
+                       ro = canditer_next(&rci);                               
                                        \
+                       vr = VALUE(r, ro - rbase);                              
                                        \
+                       rlen = STR_LEN;                                         
                                        \
+                       nl = 0;                                                 
                                                \
+                       canditer_init(&lci, l, sl);                             
                                \
+                       for (BUN lidx = 0; lidx < lci.ncand; lidx++) {          
                \
+                               lo = canditer_next(&lci);                       
                                        \
+                               vl = VALUE(l, lo - lbase);                      
                                        \
+                               if (strNil(vl)) {                               
                                                \
+                                       continue;                               
                                                        \
+                               } else if (!(STRCMP)) {                         
                                \
+                                       continue;                               
                                                        \
+                               }                                               
                                                                \
+                               if (BATcount(r1) == BATcapacity(r1)) {          
                        \
+                                       newcap = BATgrows(r1);                  
                                        \
+                                       BATsetcount(r1, BATcount(r1));          
                                \
+                                       if (r2)                                 
                                                \
+                                               BATsetcount(r2, BATcount(r2));  
                                \
+                                       if (BATextend(r1, newcap) != 
GDK_SUCCEED ||             \
+                                               (r2 && BATextend(r2, newcap) != 
GDK_SUCCEED)) { \
+                                               msg = createException(MAL, 
"str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+                                               goto exit;                      
                                                        \
+                                       }                                       
                                                                \
+                                       assert(!r2 || BATcapacity(r1) == 
BATcapacity(r2));      \
+                               }                                               
                                                                \
+                               if (BATcount(r1) > 0) {                         
                                \
+                                       if (lastl + 1 != lo)                    
                                        \
+                                               r1->tseqbase = oid_nil;         
                                \
+                                       if (nl == 0) {                          
                                                \
+                                               if (r2)                         
                                                \
+                                                       r2->trevsorted = false; 
                                \
+                                               if (lastl > lo) {               
                                                \
+                                                       r1->tsorted = false;    
                                        \
+                                                       r1->tkey = false;       
                                                \
+                                               } else if (lastl < lo) {        
                                        \
+                                                       r1->trevsorted = false; 
                                \
+                                               } else {                        
                                                        \
+                                                       r1->tkey = false;       
                                                \
+                                               }                               
                                                                \
+                                       }                                       
                                                                \
+                               }                                               
                                                                \
+                               APPEND(r1, lo);                                 
                                        \
+                               if (r2)                                         
                                                \
+                                       APPEND(r2, ro);                         
                                        \
+                               lastl = lo;                                     
                                                \
+                               nl++;                                           
                                                        \
                        }                                                       
                                                                \
                        if (r2) {                                               
                                                        \
                                if (nl > 1) {                                   
                                                \
@@ -5299,18 +5368,6 @@ STRcontainsselect(bat *ret, const bat *b
                        } else if (nl > 1) {                                    
                                        \
                                r1->trevsorted = false;                         
                                \
                        }                                                       
                                                                \
-                       if (with_strimps && anti) {                             
                                \
-                               BAT *rev;                                       
                                                        \
-                               if (original_sl) {                              
                                                \
-                                       rev = BATdiffcand(original_sl, r1);     
                        \
-                                       assert (BATintersectcand(original_sl, 
r1)->batCount == r1->batCount); \
-                                       assert (rev->batCount == 
original_sl->batCount - r1->batCount); \
-                               }                                               
                                                                \
-                               else                                            
                                                        \
-                                       rev = BATnegcands(l->batCount, r1);     
                        \
-                               BBPreclaim(r1);                                 
                                        \
-                               r1 = rev;                                       
                                                        \
-                       }                                                       
                                                                \
                }                                                               
                                                                \
        } while (0)
 
@@ -5318,12 +5375,13 @@ static str
 strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, bit anti,
                bit (*str_cmp)(const char*, const char*, int), const str fname)
 {
+       (void)anti;
        struct canditer lci, rci;
        const char *lvals, *rvals, *lvars, *rvars, *vl, *vr;
        int rskipped = 0, rlen = 0;
        oid lbase, rbase, lo, ro, lastl = 0;
        BUN nl, newcap;
-       BAT *original_sl = sl, *filtered_sl = NULL;
+       BAT *filtered_sl = NULL;
        bool with_strimps = false;
        char *msg = MAL_SUCCEED;
 
@@ -5335,8 +5393,10 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r
                        (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
 
        if (BAThasstrimps(l)) {
-               if (STRMPcreate(l, NULL) == GDK_SUCCEED)
+               if (STRMPcreate(l, NULL) == GDK_SUCCEED){
+                       /* original_sl = sl; */
                        with_strimps = true;
+               }
                /* else throw the GDK error and default to nested loop without 
filters */
        }
 
@@ -5361,7 +5421,7 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
        assert(ATOMtype(l->ttype) == TYPE_str);
 
-       canditer_init(&lci, l, sl);
+       /* canditer_init(&lci, l, sl); */
        canditer_init(&rci, r, sr);
 
        BATiter li = bat_iterator(l);
@@ -5374,19 +5434,11 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r
        lvars = li.vh->base;
        rvars = ri.vh->base;
 
-       set_empty_bat_props(r1);
-       if (r2)
-               set_empty_bat_props(r2);
-
-       /* if (anti) */
-       /*      batstr_join_loop(str_cmp(vl, vr, rlen) == 0, str_strlen(vr), 
with_strimps); */
-       /* else */
-       /*      batstr_join_loop(str_cmp(vl, vr, rlen) != 0, str_strlen(vr), 
with_strimps); */
-
-       batstr_join_loop(anti && !with_strimps ?
-                                        (str_cmp(vl, vr, rlen) == 0) :
-                                        (str_cmp(vl, vr, rlen) != 0),
-                                        str_strlen(vr), with_strimps);
+       if (anti)
+               str_antijoin_loop(str_cmp(vl, vr, rlen) == 0, str_strlen(vr));
+       else {
+               str_join_loop(str_cmp(vl, vr, rlen) != 0, str_strlen(vr));
+       }
 
        assert(!r2 || BATcount(r1) == BATcount(r2));
        BATsetcount(r1, BATcount(r1));
@@ -5423,10 +5475,6 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r
                                  r1->tsorted ? "-sorted" : "",
                                  r1->trevsorted ? "-revsorted" : "");
  exit:
-       if (with_strimps) {
-               BBPreclaim(filtered_sl);
-               BBPreclaim(original_sl);
-       }
        bat_iterator_end(&li);
        bat_iterator_end(&ri);
        return msg;
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to