Changeset: 4dc82dbcced9 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/4dc82dbcced9 Modified Files: sql/include/sql_relation.h Branch: balanced_union Log Message:
Merges with default diffs (truncated from 2010 to 300 lines): diff --git a/clients/Tests/MAL-signatures-hge.test b/clients/Tests/MAL-signatures-hge.test --- a/clients/Tests/MAL-signatures-hge.test +++ b/clients/Tests/MAL-signatures-hge.test @@ -51033,56 +51033,6 @@ user_statistics pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng], X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str]) SYSMONstatistics; (empty) -tokenizer -append -command tokenizer.append(X_0:str):oid -TKNZRappend; -tokenize a new string and append it to the tokenizer (duplicate elimination is performed) -tokenizer -close -command tokenizer.close():void -TKNZRclose; -close the current tokenizer store -tokenizer -depositFile -command tokenizer.depositFile(X_0:str):void -TKNZRdepositFile; -batch insertion from a file of strings to tokenize, each string is separated by a new line -tokenizer -getCardinality -command tokenizer.getCardinality():bat[:lng] -TKNZRgetCardinality; -debugging function that returns the unique tokens at each level -tokenizer -getCount -command tokenizer.getCount():bat[:lng] -TKNZRgetCount; -debugging function that returns the size of the bats at each level -tokenizer -getIndex -command tokenizer.getIndex():bat[:oid] -TKNZRgetIndex; -administrative function that returns the INDEX bat -tokenizer -getLevel -command tokenizer.getLevel(X_0:int):bat[:str] -TKNZRgetLevel; -administrative function that returns the bat on level i -tokenizer -locate -pattern tokenizer.locate(X_0:str):oid -TKNZRlocate; -if the given string is in the store returns its oid, otherwise oid_nil -tokenizer -open -command tokenizer.open(X_0:str):void -TKNZRopen; -open the named tokenizer store, a new one is created if the specified name does not exist -tokenizer -take -pattern tokenizer.take(X_0:oid):str -TKNZRtakeOid; -reconstruct and returns the i-th string txtsim dameraulevenshtein pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int diff --git a/clients/Tests/MAL-signatures.test b/clients/Tests/MAL-signatures.test --- a/clients/Tests/MAL-signatures.test +++ b/clients/Tests/MAL-signatures.test @@ -39358,56 +39358,6 @@ user_statistics pattern sysmon.user_statistics() (X_0:bat[:str], X_1:bat[:lng], X_2:bat[:lng], X_3:bat[:timestamp], X_4:bat[:timestamp], X_5:bat[:lng], X_6:bat[:str]) SYSMONstatistics; (empty) -tokenizer -append -command tokenizer.append(X_0:str):oid -TKNZRappend; -tokenize a new string and append it to the tokenizer (duplicate elimination is performed) -tokenizer -close -command tokenizer.close():void -TKNZRclose; -close the current tokenizer store -tokenizer -depositFile -command tokenizer.depositFile(X_0:str):void -TKNZRdepositFile; -batch insertion from a file of strings to tokenize, each string is separated by a new line -tokenizer -getCardinality -command tokenizer.getCardinality():bat[:lng] -TKNZRgetCardinality; -debugging function that returns the unique tokens at each level -tokenizer -getCount -command tokenizer.getCount():bat[:lng] -TKNZRgetCount; -debugging function that returns the size of the bats at each level -tokenizer -getIndex -command tokenizer.getIndex():bat[:oid] -TKNZRgetIndex; -administrative function that returns the INDEX bat -tokenizer -getLevel -command tokenizer.getLevel(X_0:int):bat[:str] -TKNZRgetLevel; -administrative function that returns the bat on level i -tokenizer -locate -pattern tokenizer.locate(X_0:str):oid -TKNZRlocate; -if the given string is in the store returns its oid, otherwise oid_nil -tokenizer -open -command tokenizer.open(X_0:str):void -TKNZRopen; -open the named tokenizer store, a new one is created if the specified name does not exist -tokenizer -take -pattern tokenizer.take(X_0:oid):str -TKNZRtakeOid; -reconstruct and returns the i-th string txtsim dameraulevenshtein pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int diff --git a/common/stream/stream.h b/common/stream/stream.h --- a/common/stream/stream.h +++ b/common/stream/stream.h @@ -245,7 +245,7 @@ typedef struct bstream { stream_export bstream *bstream_create(stream *rs, size_t chunk_size); // used all over stream_export void bstream_destroy(bstream *s); // all over -stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c, tokenizer.c +stream_export ssize_t bstream_read(bstream *s, size_t size); // tablet.c stream_export ssize_t bstream_next(bstream *s); // all over /* Callback stream is a stream where the read and write functions are diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -2247,7 +2247,7 @@ BBPdump(void) continue; BAT *b = BBP_desc(i); unsigned status = BBP_status(i); - printf("# %d: " ALGOOPTBATFMT "refs=%d lrefs=%d status=%u%s", + printf("# %d: " ALGOOPTBATFMT " refs=%d lrefs=%d status=%u%s", i, ALGOOPTBATPAR(b), BBP_refs(i), diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c --- a/gdk/gdk_logger.c +++ b/gdk/gdk_logger.c @@ -2085,6 +2085,12 @@ log_load(const char *fn, const char *log lg->seqs_val = BATdescriptor(BBPindex(bak)); strconcat_len(bak, sizeof(bak), fn, "_dseqs", NULL); lg->dseqs = BATdescriptor(BBPindex(bak)); + if (lg->seqs_id == NULL || + lg->seqs_val == NULL || + lg->dseqs == NULL) { + GDKerror("Logger_new: cannot load seqs bats"); + goto error; + } } else { lg->seqs_id = logbat_new(TYPE_int, 1, PERSISTENT); lg->seqs_val = logbat_new(TYPE_lng, 1, PERSISTENT); diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c --- a/gdk/gdk_storage.c +++ b/gdk/gdk_storage.c @@ -550,7 +550,7 @@ GDKload(int farmid, const char *nme, con for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) { n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected)); if (n < 0) - GDKsyserror("GDKload: cannot read: name=%s, ext=%s, %zu bytes missing\n", nme, ext ? ext : "", (size_t) n_expected); + GDKsyserror("GDKload: cannot read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "", size, n_expected); #ifndef __COVERITY__ /* Coverity doesn't seem to * recognize that we're just @@ -567,7 +567,8 @@ GDKload(int farmid, const char *nme, con /* we couldn't read all, error * already generated */ GDKfree(ret); - GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected); + if (n >= 0) /* don't report error twice */ + GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected); ret = NULL; } #ifndef NDEBUG @@ -763,13 +764,12 @@ BATsave_iter(BAT *b, BATiter *bi, BUN si } if (size != b->batCount || b->batInserted < b->batCount) { /* if the sizes don't match, the BAT must be dirty */ - b->batCopiedtodisk = false; b->theap->dirty = true; if (b->tvheap) b->tvheap->dirty = true; - } else { - b->batCopiedtodisk = true; } + /* there is something on disk now */ + b->batCopiedtodisk = true; MT_lock_unset(&b->theaplock); if (locked && b->thash && b->thash != (Hash *) 1) BAThashsave(b, dosync); diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog --- a/monetdb5/ChangeLog +++ b/monetdb5/ChangeLog @@ -1,3 +1,7 @@ # ChangeLog file for MonetDB5 # This file is updated with Maddlog +* Wed Sep 13 2023 Sjoerd Mullender <sjo...@acm.org> +- Removed the MAL tokenizer module. It was never usable from SQL and + in this form never would be. + diff --git a/monetdb5/modules/kernel/bat5.c b/monetdb5/modules/kernel/bat5.c --- a/monetdb5/modules/kernel/bat5.c +++ b/monetdb5/modules/kernel/bat5.c @@ -675,17 +675,16 @@ HASHinfo(BAT *bk, BAT *bv, Hash *h, str return GDK_SUCCEED; } - static str -BATinfo(BAT **key, BAT **val, const bat bid) +BKCinfo(bat *ret1, bat *ret2, const bat *bid) { const char *mode, *accessmode; BAT *bk = NULL, *bv = NULL, *b; char bf[oidStrlen]; char buf[32]; - if ((b = BATdescriptor(bid)) == NULL) { - throw(MAL, "BATinfo", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + if ((b = BATdescriptor(*bid)) == NULL) { + throw(MAL, "bat.info", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); } bk = COLnew(0, TYPE_str, 128, TRANSIENT); @@ -694,7 +693,7 @@ BATinfo(BAT **key, BAT **val, const bat BBPreclaim(bk); BBPreclaim(bv); BBPunfix(b->batCacheid); - throw(MAL, "bat.getInfo", SQLSTATE(HY013) MAL_MALLOC_FAIL); + throw(MAL, "bat.info", SQLSTATE(HY013) MAL_MALLOC_FAIL); } BATiter bi = bat_iterator(b); @@ -718,11 +717,11 @@ BATinfo(BAT **key, BAT **val, const bat accessmode = "unknown"; } - if (BUNappend(bk, "batId", false) != GDK_SUCCEED || - BUNappend(bv, BATgetId(b), false) != GDK_SUCCEED || - BUNappend(bk, "batCacheid", false) != GDK_SUCCEED || - BUNappend(bv, local_itoa((ssize_t) b->batCacheid, buf), - false) != GDK_SUCCEED + if (BUNappend(bk, "batId", false) != GDK_SUCCEED + || BUNappend(bv, BATgetId(b), false) != GDK_SUCCEED + || BUNappend(bk, "batCacheid", false) != GDK_SUCCEED + || BUNappend(bv, local_itoa((ssize_t) b->batCacheid, buf), + false) != GDK_SUCCEED || BUNappend(bk, "tparentid", false) != GDK_SUCCEED || BUNappend(bv, local_itoa((ssize_t) bi.h->parentid, buf), false) != GDK_SUCCEED @@ -803,7 +802,7 @@ BATinfo(BAT **key, BAT **val, const bat BBPreclaim(bk); BBPreclaim(bv); BBPunfix(b->batCacheid); - throw(MAL, "bat.getInfo", SQLSTATE(HY013) MAL_MALLOC_FAIL); + throw(MAL, "bat.info", SQLSTATE(HY013) MAL_MALLOC_FAIL); } /* dump index information */ MT_rwlock_rdlock(&b->thashlock); @@ -813,25 +812,12 @@ BATinfo(BAT **key, BAT **val, const bat BBPreclaim(bk); BBPreclaim(bv); BBPunfix(b->batCacheid); - throw(MAL, "bat.getInfo", SQLSTATE(HY013) MAL_MALLOC_FAIL); + throw(MAL, "bat.info", SQLSTATE(HY013) MAL_MALLOC_FAIL); } MT_rwlock_rdunlock(&b->thashlock); bat_iterator_end(&bi); - *key = bk; - *val = bv; assert(BATcount(bk) == BATcount(bv)); - BBPunfix(bid); - return MAL_SUCCEED; -} - -static str -BKCinfo(bat *ret1, bat *ret2, const bat *bid) -{ - BAT *bv, *bk; - str msg; - - if ((msg = BATinfo(&bk, &bv, *bid)) != NULL) - return msg; + BBPunfix(b->batCacheid); *ret1 = bk->batCacheid; BBPkeepref(bk); *ret2 = bv->batCacheid; diff --git a/monetdb5/modules/kernel/batmmath.c b/monetdb5/modules/kernel/batmmath.c --- a/monetdb5/modules/kernel/batmmath.c +++ b/monetdb5/modules/kernel/batmmath.c @@ -95,9 +95,10 @@ CMDscienceUNARY(MalStkPtr stk, InstrPtr _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org