Changeset: b80b69e84cb8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/b80b69e84cb8
Modified Files:
        gdk/gdk_bbp.c
Branch: default
Log Message:

Merge with Jun2023 branch.


diffs (truncated from 2190 to 300 lines):

diff --git a/ChangeLog.Jun2023 b/ChangeLog.Jun2023
--- a/ChangeLog.Jun2023
+++ b/ChangeLog.Jun2023
@@ -1,3 +1,8 @@
 # ChangeLog file for devel
 # This file is updated with Maddlog
 
+* Thu Aug 10 2023 Panagiotis Koutsourakis <kutsu...@monetdbsolutions.com>
+- Improve performance of the ILIKE operator when the pattern contains only
+  ASCII characters. In this case we do not need to treat any characters as
+  UTF-8 and we can use much faster routines that perform byte comparisons.
+
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -3073,11 +3073,14 @@ BATdescriptor(bat i)
                }
                if (incref(i, false, false) > 0) {
                        b = BBP_cache(i);
-                       if (b == NULL)
+                       if (b == NULL) {
                                b = getBBPdescriptor(i);
-               } else {
-                       /* if incref fails, we must return NULL */
-                       b = NULL;
+                               if (b == NULL) {
+                                       /* if loading failed, we need to
+                                        * compensate for the incref */
+                                       decref(i, false, false, __func__);
+                               }
+                       }
                }
                if (lock)
                        MT_lock_unset(&GDKswapLock(i));
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -35,7 +35,7 @@ static gdk_return log_del_bat(logger *lg
 #define LOG_CREATE     5
 #define LOG_DESTROY    6
 #define LOG_SEQ                7
-#define LOG_CLEAR      8 /* DEPRECATED */
+#define LOG_CLEAR      8       /* DEPRECATED */
 #define LOG_BAT_GROUP  9
 
 #ifdef NATIVE_WIN32
@@ -61,7 +61,7 @@ static const char *log_commands[] = {
        "LOG_CREATE",
        "LOG_DESTROY",
        "LOG_SEQ",
-       "", /* LOG_CLEAR IS DEPRECATED */
+       "",                     /* LOG_CLEAR IS DEPRECATED */
        "LOG_BAT_GROUP",
 };
 
@@ -92,9 +92,9 @@ typedef struct logformat_t {
        int id;
 } logformat;
 
-typedef enum {LOG_OK, LOG_EOF, LOG_ERR} log_return;
-
-static gdk_return bm_commit(logger *lg);
+typedef enum { LOG_OK, LOG_EOF, LOG_ERR } log_return;
+
+static gdk_return bm_commit(logger *lg, uint32_t *updated, BUN maxupdated);
 static gdk_return tr_grow(trans *tr);
 
 #define log_lock(lg)   MT_lock_set(&(lg)->lock)
@@ -300,10 +300,10 @@ string_reader(logger *lg, BAT *b, lng nr
        lng SZ = 0;
        log_return res = LOG_OK;
 
-       for (; nr && res == LOG_OK; ) {
+       while (nr && res == LOG_OK) {
                if (mnstr_readLng(lg->input_log, &SZ) != 1)
                        return LOG_EOF;
-               sz = (size_t)SZ;
+               sz = (size_t) SZ;
                char *buf = lg->rbuf;
                if (lg->rbufsize < sz) {
                        if (!(buf = GDKrealloc(lg->rbuf, sz)))
@@ -321,18 +321,22 @@ string_reader(logger *lg, BAT *b, lng nr
                char *strings[CHUNK_SIZE];
                int cur = 0;
 
-               for(; nr>0 && res == LOG_OK && t < (buf+sz); nr--) {
+               for (; nr > 0 && res == LOG_OK && t < (buf + sz); nr--) {
                        strings[cur++] = t;
-                       if (cur == CHUNK_SIZE && b && BUNappendmulti(b, 
strings, cur, true) != GDK_SUCCEED)
+                       if (cur == CHUNK_SIZE &&
+                           b &&
+                           BUNappendmulti(b, strings, cur, true) != 
GDK_SUCCEED)
                                res = LOG_ERR;
                        if (cur == CHUNK_SIZE)
                                cur = 0;
                        /* find next */
-                       while(*t)
+                       while (*t)
                                t++;
                        t++;
                }
-               if (cur && b && BUNappendmulti(b, strings, cur, true) != 
GDK_SUCCEED)
+               if (cur &&
+                   b &&
+                   BUNappendmulti(b, strings, cur, true) != GDK_SUCCEED)
                        res = LOG_ERR;
        }
        return res;
@@ -340,13 +344,13 @@ string_reader(logger *lg, BAT *b, lng nr
 
 
 struct offset {
-       lng os /*offset within source BAT in logfile */;
-       lng nr /*number of values to be copied*/;
-       lng od /*offset within destination BAT in database*/;
+       lng os;           /* offset within source BAT in logfile */
+       lng nr;           /* number of values to be copied */
+       lng od;           /* offset within destination BAT in database */
 };
 
 static log_return
-log_read_updates(logger *lg, trans *tr, logformat *l, log_id id, BAT** cands)
+log_read_updates(logger *lg, trans *tr, logformat *l, log_id id, BAT **cands)
 {
        log_return res = LOG_OK;
        lng nr, pnr;
@@ -365,12 +369,12 @@ log_read_updates(logger *lg, trans *tr, 
        if (tpe >= 0) {
                BAT *uid = NULL;
                BAT *r = NULL;
-               void *(*rt) (ptr, size_t *, stream *, size_t) = 
BATatoms[tpe].atomRead;
+               void *(*rt)(ptr, size_t *, stream *, size_t) = 
BATatoms[tpe].atomRead;
                lng offset;
 
                assert(nr <= (lng) BUN_MAX);
                if (!lg->flushing && l->flag == LOG_UPDATE) {
-                       uid = COLnew(0, TYPE_oid, (BUN)nr, PERSISTENT);
+                       uid = COLnew(0, TYPE_oid, (BUN) nr, PERSISTENT);
                        if (uid == NULL) {
                                return LOG_ERR;
                        }
@@ -387,20 +391,17 @@ log_read_updates(logger *lg, trans *tr, 
                                        assert((*cands)->ttype == TYPE_void);
                                        BATtseqbase(*cands, (oid) offset);
                                        BATsetcount(*cands, (BUN) nr);
-                               }
-                               else if (!lg->flushing) {
+                               } else if (!lg->flushing) {
                                        assert(BATcount(*cands) > 0);
-                                       BAT* dense = BATdense(0, (oid) offset, 
(BUN) nr);
-                                       BAT* newcands = NULL;
-                                       if (!dense ) {
+                                       BAT *dense = BATdense(0, (oid) offset, 
(BUN) nr);
+                                       BAT *newcands = NULL;
+                                       if (!dense) {
                                                res = LOG_ERR;
-                                       }
-                                       else if ((*cands)->ttype == TYPE_void) {
-                                               if ( (newcands = 
BATmergecand(*cands, dense)) ) {
+                                       } else if ((*cands)->ttype == 
TYPE_void) {
+                                               if ((newcands = 
BATmergecand(*cands, dense))) {
                                                        BBPreclaim(*cands);
                                                        *cands = newcands;
-                                               }
-                                               else
+                                               } else
                                                        res = LOG_ERR;
                                        } else {
                                                assert((*cands)->ttype == 
TYPE_oid);
@@ -438,7 +439,7 @@ log_read_updates(logger *lg, trans *tr, 
                        } else {
                                lg->rbuf = t;
                                lg->rbufsize = tlen;
-                               for(BUN p = 0; p<(BUN) nr; p++) {
+                               for (BUN p = 0; p < (BUN) nr; p++) {
                                        if (r && BUNappend(r, t, true) != 
GDK_SUCCEED)
                                                res = LOG_ERR;
                                }
@@ -456,25 +457,25 @@ log_read_updates(logger *lg, trans *tr, 
                                        else
                                                res = LOG_ERR;
                                } else {
-                                       size_t tlen = lg->rbufsize/sizeof(int);
-                                       size_t cnt = 0, snr = (size_t)nr;
-                                       snr = (snr+31)/32;
+                                       size_t tlen = lg->rbufsize / 
sizeof(int);
+                                       size_t cnt = 0, snr = (size_t) nr;
+                                       snr = (snr + 31) / 32;
                                        assert(tlen);
-                                       for (; res == LOG_OK && snr > 0; 
snr-=cnt) {
-                                               cnt = snr>tlen?tlen:snr;
+                                       for (; res == LOG_OK && snr > 0; snr -= 
cnt) {
+                                               cnt = snr > tlen ? tlen : snr;
                                                if 
(!mnstr_readIntArray(lg->input_log, lg->rbuf, cnt))
                                                        res = LOG_ERR;
                                        }
                                }
                        } else {
                                if (!ATOMvarsized(tpe)) {
-                                       size_t cnt = 0, snr = (size_t)nr;
-                                       size_t tlen = 
lg->rbufsize/ATOMsize(tpe), ntlen = lg->rbufsize;
+                                       size_t cnt = 0, snr = (size_t) nr;
+                                       size_t tlen = lg->rbufsize / 
ATOMsize(tpe), ntlen = lg->rbufsize;
                                        assert(tlen);
                                        /* read in chunks of max
                                         * BUFSIZE/width rows */
-                                       for (; res == LOG_OK && snr > 0; 
snr-=cnt) {
-                                               cnt = snr>tlen?tlen:snr;
+                                       for (; res == LOG_OK && snr > 0; snr -= 
cnt) {
+                                               cnt = snr > tlen ? tlen : snr;
                                                void *t = rt(lg->rbuf, &ntlen, 
lg->input_log, cnt);
 
                                                if (t == NULL) {
@@ -495,9 +496,9 @@ log_read_updates(logger *lg, trans *tr, 
 
                                                if (t == NULL) {
                                                        /* see if failure was 
due to
-                                                       * malloc or something 
less
-                                                       * serious (in the 
current
-                                                       * context) */
+                                                        * malloc or something 
less
+                                                        * serious (in the 
current
+                                                        * context) */
                                                        if (strstr(GDKerrbuf, 
"alloc") == NULL)
                                                                res = LOG_EOF;
                                                        else
@@ -512,7 +513,7 @@ log_read_updates(logger *lg, trans *tr, 
                                }
                        }
                } else {
-                       void *(*rh) (ptr, size_t *, stream *, size_t) = 
BATatoms[TYPE_oid].atomRead;
+                       void *(*rh)(ptr, size_t *, stream *, size_t) = 
BATatoms[TYPE_oid].atomRead;
                        void *hv = ATOMnil(TYPE_oid);
                        offset = 0;
 
@@ -576,23 +577,23 @@ log_read_updates(logger *lg, trans *tr, 
                if (res == LOG_OK) {
                        if (tr_grow(tr) == GDK_SUCCEED) {
                                tr->changes[tr->nr].type = l->flag;
-                               if (l->flag==LOG_UPDATE_BULK && offset == -1) {
-                                       assert(cands); /* bat r is part of a 
group of bats logged together. */
+                               if (l->flag == LOG_UPDATE_BULK && offset == -1) 
{
+                                       assert(cands);  /* bat r is part of a 
group of bats logged together. */
                                        struct canditer ci;
                                        canditer_init(&ci, NULL, *cands);
                                        const oid first = canditer_peek(&ci);
                                        const oid last = canditer_last(&ci);
                                        offset = (lng) first;
                                        pnr = (lng) (last - first) + 1;
-                                       if (!lg->flushing ) {
+                                       if (!lg->flushing) {
                                                assert(uid == NULL);
                                                uid = *cands;
                                                BBPfix((*cands)->batCacheid);
                                                tr->changes[tr->nr].type = 
LOG_UPDATE;
                                        }
                                }
-                               if (l->flag==LOG_UPDATE_CONST) {
-                                       assert(!cands); // TODO: This might 
change in the future.
+                               if (l->flag == LOG_UPDATE_CONST) {
+                                       assert(!cands); /* TODO: This might 
change in the future. */
                                        tr->changes[tr->nr].type = 
LOG_UPDATE_BULK;
                                }
                                tr->changes[tr->nr].nr = pnr;
@@ -639,7 +640,7 @@ la_bat_update_count(logger *lg, log_id i
                        cp = p;
                }
                if (cp != BUN_NONE) {
-                       lng ocnt = *(lng*) Tloc(lg->catalog_cnt, cp);
+                       lng ocnt = *(lng *) Tloc(lg->catalog_cnt, cp);
                        assert(lg->catalog_cnt->hseqbase == 0);
                        if (ocnt < cnt && BUNreplace(lg->catalog_cnt, cp, &cnt, 
false) != GDK_SUCCEED) {
                                MT_rwlock_rdunlock(&cni.b->thashlock);
@@ -676,22 +677,22 @@ la_bat_updates(logger *lg, logaction *la
                        cnt = BATcount(b);
                        int is_msk = (b->ttype == TYPE_msk);
                        /* handle offset 0 ie clear */
-                       if (/* DISABLES CODE */ (0) && la->offset == 0 && cnt)
+                       if ( /* DISABLES CODE */ (0) && la->offset == 0 && cnt)
                                BATclear(b, true);
                        /* handle offset */
-                       if (cnt <= (BUN)la->offset) {
+                       if (cnt <= (BUN) la->offset) {
                                msk t = 1;
-                               if (cnt < (BUN)la->offset) { /* insert nils */
-                                       const void *tv = 
(is_msk)?&t:ATOMnilptr(b->ttype);
+                               if (cnt < (BUN) la->offset) {   /* insert nils 
*/
+                                       const void *tv = (is_msk) ? &t : 
ATOMnilptr(b->ttype);
                                        lng i, d = la->offset - BATcount(b);
-                                       for(i=0;i<d;i++) {
+                                       for (i = 0; i < d; i++) {
                                                if (BUNappend(b, tv, true) != 
GDK_SUCCEED) {
                                                        logbat_destroy(b);
                                                        return GDK_FAIL;
                                                }
                                        }
                                }
-                               if (BATcount(b) == (BUN)la->offset && 
BATappend(b, la->b, NULL, true) != GDK_SUCCEED) {
+                               if (BATcount(b) == (BUN) la->offset && 
BATappend(b, la->b, NULL, true) != GDK_SUCCEED) {
                                        logbat_destroy(b);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to