Changeset: b80b69e84cb8 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/b80b69e84cb8 Modified Files: gdk/gdk_bbp.c Branch: default Log Message:
Merge with Jun2023 branch. diffs (truncated from 2190 to 300 lines): diff --git a/ChangeLog.Jun2023 b/ChangeLog.Jun2023 --- a/ChangeLog.Jun2023 +++ b/ChangeLog.Jun2023 @@ -1,3 +1,8 @@ # ChangeLog file for devel # This file is updated with Maddlog +* Thu Aug 10 2023 Panagiotis Koutsourakis <kutsu...@monetdbsolutions.com> +- Improve performance of the ILIKE operator when the pattern contains only + ASCII characters. In this case we do not need to treat any characters as + UTF-8 and we can use much faster routines that perform byte comparisons. + diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -3073,11 +3073,14 @@ BATdescriptor(bat i) } if (incref(i, false, false) > 0) { b = BBP_cache(i); - if (b == NULL) + if (b == NULL) { b = getBBPdescriptor(i); - } else { - /* if incref fails, we must return NULL */ - b = NULL; + if (b == NULL) { + /* if loading failed, we need to + * compensate for the incref */ + decref(i, false, false, __func__); + } + } } if (lock) MT_lock_unset(&GDKswapLock(i)); diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c --- a/gdk/gdk_logger.c +++ b/gdk/gdk_logger.c @@ -35,7 +35,7 @@ static gdk_return log_del_bat(logger *lg #define LOG_CREATE 5 #define LOG_DESTROY 6 #define LOG_SEQ 7 -#define LOG_CLEAR 8 /* DEPRECATED */ +#define LOG_CLEAR 8 /* DEPRECATED */ #define LOG_BAT_GROUP 9 #ifdef NATIVE_WIN32 @@ -61,7 +61,7 @@ static const char *log_commands[] = { "LOG_CREATE", "LOG_DESTROY", "LOG_SEQ", - "", /* LOG_CLEAR IS DEPRECATED */ + "", /* LOG_CLEAR IS DEPRECATED */ "LOG_BAT_GROUP", }; @@ -92,9 +92,9 @@ typedef struct logformat_t { int id; } logformat; -typedef enum {LOG_OK, LOG_EOF, LOG_ERR} log_return; - -static gdk_return bm_commit(logger *lg); +typedef enum { LOG_OK, LOG_EOF, LOG_ERR } log_return; + +static gdk_return bm_commit(logger *lg, uint32_t *updated, BUN maxupdated); static gdk_return tr_grow(trans *tr); #define log_lock(lg) MT_lock_set(&(lg)->lock) @@ -300,10 +300,10 @@ string_reader(logger *lg, BAT *b, lng nr lng SZ = 0; log_return res = LOG_OK; - for (; nr && res == LOG_OK; ) { + while (nr && res == LOG_OK) { if (mnstr_readLng(lg->input_log, &SZ) != 1) return LOG_EOF; - sz = (size_t)SZ; + sz = (size_t) SZ; char *buf = lg->rbuf; if (lg->rbufsize < sz) { if (!(buf = GDKrealloc(lg->rbuf, sz))) @@ -321,18 +321,22 @@ string_reader(logger *lg, BAT *b, lng nr char *strings[CHUNK_SIZE]; int cur = 0; - for(; nr>0 && res == LOG_OK && t < (buf+sz); nr--) { + for (; nr > 0 && res == LOG_OK && t < (buf + sz); nr--) { strings[cur++] = t; - if (cur == CHUNK_SIZE && b && BUNappendmulti(b, strings, cur, true) != GDK_SUCCEED) + if (cur == CHUNK_SIZE && + b && + BUNappendmulti(b, strings, cur, true) != GDK_SUCCEED) res = LOG_ERR; if (cur == CHUNK_SIZE) cur = 0; /* find next */ - while(*t) + while (*t) t++; t++; } - if (cur && b && BUNappendmulti(b, strings, cur, true) != GDK_SUCCEED) + if (cur && + b && + BUNappendmulti(b, strings, cur, true) != GDK_SUCCEED) res = LOG_ERR; } return res; @@ -340,13 +344,13 @@ string_reader(logger *lg, BAT *b, lng nr struct offset { - lng os /*offset within source BAT in logfile */; - lng nr /*number of values to be copied*/; - lng od /*offset within destination BAT in database*/; + lng os; /* offset within source BAT in logfile */ + lng nr; /* number of values to be copied */ + lng od; /* offset within destination BAT in database */ }; static log_return -log_read_updates(logger *lg, trans *tr, logformat *l, log_id id, BAT** cands) +log_read_updates(logger *lg, trans *tr, logformat *l, log_id id, BAT **cands) { log_return res = LOG_OK; lng nr, pnr; @@ -365,12 +369,12 @@ log_read_updates(logger *lg, trans *tr, if (tpe >= 0) { BAT *uid = NULL; BAT *r = NULL; - void *(*rt) (ptr, size_t *, stream *, size_t) = BATatoms[tpe].atomRead; + void *(*rt)(ptr, size_t *, stream *, size_t) = BATatoms[tpe].atomRead; lng offset; assert(nr <= (lng) BUN_MAX); if (!lg->flushing && l->flag == LOG_UPDATE) { - uid = COLnew(0, TYPE_oid, (BUN)nr, PERSISTENT); + uid = COLnew(0, TYPE_oid, (BUN) nr, PERSISTENT); if (uid == NULL) { return LOG_ERR; } @@ -387,20 +391,17 @@ log_read_updates(logger *lg, trans *tr, assert((*cands)->ttype == TYPE_void); BATtseqbase(*cands, (oid) offset); BATsetcount(*cands, (BUN) nr); - } - else if (!lg->flushing) { + } else if (!lg->flushing) { assert(BATcount(*cands) > 0); - BAT* dense = BATdense(0, (oid) offset, (BUN) nr); - BAT* newcands = NULL; - if (!dense ) { + BAT *dense = BATdense(0, (oid) offset, (BUN) nr); + BAT *newcands = NULL; + if (!dense) { res = LOG_ERR; - } - else if ((*cands)->ttype == TYPE_void) { - if ( (newcands = BATmergecand(*cands, dense)) ) { + } else if ((*cands)->ttype == TYPE_void) { + if ((newcands = BATmergecand(*cands, dense))) { BBPreclaim(*cands); *cands = newcands; - } - else + } else res = LOG_ERR; } else { assert((*cands)->ttype == TYPE_oid); @@ -438,7 +439,7 @@ log_read_updates(logger *lg, trans *tr, } else { lg->rbuf = t; lg->rbufsize = tlen; - for(BUN p = 0; p<(BUN) nr; p++) { + for (BUN p = 0; p < (BUN) nr; p++) { if (r && BUNappend(r, t, true) != GDK_SUCCEED) res = LOG_ERR; } @@ -456,25 +457,25 @@ log_read_updates(logger *lg, trans *tr, else res = LOG_ERR; } else { - size_t tlen = lg->rbufsize/sizeof(int); - size_t cnt = 0, snr = (size_t)nr; - snr = (snr+31)/32; + size_t tlen = lg->rbufsize / sizeof(int); + size_t cnt = 0, snr = (size_t) nr; + snr = (snr + 31) / 32; assert(tlen); - for (; res == LOG_OK && snr > 0; snr-=cnt) { - cnt = snr>tlen?tlen:snr; + for (; res == LOG_OK && snr > 0; snr -= cnt) { + cnt = snr > tlen ? tlen : snr; if (!mnstr_readIntArray(lg->input_log, lg->rbuf, cnt)) res = LOG_ERR; } } } else { if (!ATOMvarsized(tpe)) { - size_t cnt = 0, snr = (size_t)nr; - size_t tlen = lg->rbufsize/ATOMsize(tpe), ntlen = lg->rbufsize; + size_t cnt = 0, snr = (size_t) nr; + size_t tlen = lg->rbufsize / ATOMsize(tpe), ntlen = lg->rbufsize; assert(tlen); /* read in chunks of max * BUFSIZE/width rows */ - for (; res == LOG_OK && snr > 0; snr-=cnt) { - cnt = snr>tlen?tlen:snr; + for (; res == LOG_OK && snr > 0; snr -= cnt) { + cnt = snr > tlen ? tlen : snr; void *t = rt(lg->rbuf, &ntlen, lg->input_log, cnt); if (t == NULL) { @@ -495,9 +496,9 @@ log_read_updates(logger *lg, trans *tr, if (t == NULL) { /* see if failure was due to - * malloc or something less - * serious (in the current - * context) */ + * malloc or something less + * serious (in the current + * context) */ if (strstr(GDKerrbuf, "alloc") == NULL) res = LOG_EOF; else @@ -512,7 +513,7 @@ log_read_updates(logger *lg, trans *tr, } } } else { - void *(*rh) (ptr, size_t *, stream *, size_t) = BATatoms[TYPE_oid].atomRead; + void *(*rh)(ptr, size_t *, stream *, size_t) = BATatoms[TYPE_oid].atomRead; void *hv = ATOMnil(TYPE_oid); offset = 0; @@ -576,23 +577,23 @@ log_read_updates(logger *lg, trans *tr, if (res == LOG_OK) { if (tr_grow(tr) == GDK_SUCCEED) { tr->changes[tr->nr].type = l->flag; - if (l->flag==LOG_UPDATE_BULK && offset == -1) { - assert(cands); /* bat r is part of a group of bats logged together. */ + if (l->flag == LOG_UPDATE_BULK && offset == -1) { + assert(cands); /* bat r is part of a group of bats logged together. */ struct canditer ci; canditer_init(&ci, NULL, *cands); const oid first = canditer_peek(&ci); const oid last = canditer_last(&ci); offset = (lng) first; pnr = (lng) (last - first) + 1; - if (!lg->flushing ) { + if (!lg->flushing) { assert(uid == NULL); uid = *cands; BBPfix((*cands)->batCacheid); tr->changes[tr->nr].type = LOG_UPDATE; } } - if (l->flag==LOG_UPDATE_CONST) { - assert(!cands); // TODO: This might change in the future. + if (l->flag == LOG_UPDATE_CONST) { + assert(!cands); /* TODO: This might change in the future. */ tr->changes[tr->nr].type = LOG_UPDATE_BULK; } tr->changes[tr->nr].nr = pnr; @@ -639,7 +640,7 @@ la_bat_update_count(logger *lg, log_id i cp = p; } if (cp != BUN_NONE) { - lng ocnt = *(lng*) Tloc(lg->catalog_cnt, cp); + lng ocnt = *(lng *) Tloc(lg->catalog_cnt, cp); assert(lg->catalog_cnt->hseqbase == 0); if (ocnt < cnt && BUNreplace(lg->catalog_cnt, cp, &cnt, false) != GDK_SUCCEED) { MT_rwlock_rdunlock(&cni.b->thashlock); @@ -676,22 +677,22 @@ la_bat_updates(logger *lg, logaction *la cnt = BATcount(b); int is_msk = (b->ttype == TYPE_msk); /* handle offset 0 ie clear */ - if (/* DISABLES CODE */ (0) && la->offset == 0 && cnt) + if ( /* DISABLES CODE */ (0) && la->offset == 0 && cnt) BATclear(b, true); /* handle offset */ - if (cnt <= (BUN)la->offset) { + if (cnt <= (BUN) la->offset) { msk t = 1; - if (cnt < (BUN)la->offset) { /* insert nils */ - const void *tv = (is_msk)?&t:ATOMnilptr(b->ttype); + if (cnt < (BUN) la->offset) { /* insert nils */ + const void *tv = (is_msk) ? &t : ATOMnilptr(b->ttype); lng i, d = la->offset - BATcount(b); - for(i=0;i<d;i++) { + for (i = 0; i < d; i++) { if (BUNappend(b, tv, true) != GDK_SUCCEED) { logbat_destroy(b); return GDK_FAIL; } } } - if (BATcount(b) == (BUN)la->offset && BATappend(b, la->b, NULL, true) != GDK_SUCCEED) { + if (BATcount(b) == (BUN) la->offset && BATappend(b, la->b, NULL, true) != GDK_SUCCEED) { logbat_destroy(b); _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org