Changeset: 372a90d58baa for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/372a90d58baa Modified Files: clients/mapiclient/mclient.c gdk/gdk_bbp.c gdk/gdk_logger.c gdk/gdk_logger_internals.h gdk/gdk_posix.c sql/backends/monet5/sql.c sql/common/sql_list.c sql/include/sql_catalog.h sql/include/sql_list.h sql/storage/bat/bat_storage.c sql/storage/objectset.c sql/storage/sql_storage.h sql/storage/store.c sql/test/emptydb/Tests/check.stable.out.32bit Branch: default Log Message:
Merge with Sep2022 branch. diffs (truncated from 1687 to 300 lines): diff --git a/cmake/monetdb-findpackages.cmake b/cmake/monetdb-findpackages.cmake --- a/cmake/monetdb-findpackages.cmake +++ b/cmake/monetdb-findpackages.cmake @@ -30,6 +30,9 @@ endif() if(WITH_CMOCKA) find_package(CMocka) + if (TARGET cmocka::cmocka) + set(CMOCKA_LIBRARY cmocka::cmocka) + endif() endif() if(WITH_PCRE) diff --git a/gdk/ChangeLog.Sep2022 b/gdk/ChangeLog.Sep2022 --- a/gdk/ChangeLog.Sep2022 +++ b/gdk/ChangeLog.Sep2022 @@ -1,6 +1,17 @@ # ChangeLog file for GDK # This file is updated with Maddlog +* Mon Feb 20 2023 Sjoerd Mullender <sjo...@acm.org> +- A race condition was fixed where certain write-ahead log messages + could get intermingled, resulting in a corrupted WAL file. +- If opening of a file failed when it was supposed to get memory mapped, + an incorrect value was returned to indicate the failure, causing + crashes later on. This has been fixed. + +* Thu Feb 16 2023 Sjoerd Mullender <sjo...@acm.org> +- A race condition was fixed that could result in a missing tail file + for a string bat (i.e. a file with .tail1, .tail2, or .tail4 extension). + * Mon Feb 13 2023 Sjoerd Mullender <sjo...@acm.org> - When saving a bat failed for some reason during a low-level commit, this was logged in the log file, but the error was then subsequently diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -648,7 +648,7 @@ BBPreadBBPline(FILE *fp, unsigned bbpver { char buf[4096]; uint64_t batid; - uint16_t status; + unsigned int status; unsigned int properties; int nread, n; char *s; @@ -676,14 +676,14 @@ BBPreadBBPline(FILE *fp, unsigned bbpver if (bbpversion <= GDKLIBRARY_HSIZE ? sscanf(buf, - "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64 + "%" SCNu64 " %u %128s %19s %u %" SCNu64 " %" SCNu64 " %" SCNu64 "%n", &batid, &status, batname, filename, &properties, &count, &capacity, &base, &nread) < 8 : sscanf(buf, - "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64 + "%" SCNu64 " %u %128s %19s %u %" SCNu64 " %" SCNu64 "%n", &batid, &status, batname, filename, @@ -3795,7 +3795,21 @@ BBPsync(int cnt, bat *restrict subcommit if (size > bi.count) /* includes sizes==NULL */ size = bi.count; bi.b->batInserted = size; - if (b && size != 0) { + if (bi.b->ttype >= 0 && ATOMvarsized(bi.b->ttype)) { + /* see epilogue() for other part of this */ + MT_lock_set(&bi.b->theaplock); + /* remember the tail we're saving */ + if (BATsetprop_nolock(bi.b, (enum prop_t) 20, TYPE_ptr, &bi.h) == NULL) { + GDKerror("setprop failed\n"); + ret = GDK_FAIL; + } else { + if (bi.b->oldtail == NULL) + bi.b->oldtail = (Heap *) 1; + HEAPincref(bi.h); + } + MT_lock_unset(&bi.b->theaplock); + } + if (ret == GDK_SUCCEED && b && size != 0) { /* wait for BBPSAVING so that we * can set it, wait for * BBPUNLOADING before @@ -3873,6 +3887,23 @@ BBPsync(int cnt, bat *restrict subcommit ret == GDK_SUCCEED ? "" : " failed", (t0 = GDKusec()) - t1); + if (ret != GDK_SUCCEED) { + /* clean up extra refs we created */ + for (int idx = 1; idx < cnt; idx++) { + bat i = subcommit ? subcommit[idx] : idx; + BAT *b = BBP_desc(i); + if (b && ATOMvarsized(b->ttype)) { + MT_lock_set(&b->theaplock); + ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20); + if (p != NULL) { + HEAPdecref(p->val.pval, false); + BATrmprop_nolock(b, (enum prop_t) 20); + } + MT_lock_unset(&b->theaplock); + } + } + } + /* turn off the BBPSYNCING bits for all bats, even when things * didn't go according to plan (i.e., don't check for ret == * GDK_SUCCEED) */ diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c --- a/gdk/gdk_logger.c +++ b/gdk/gdk_logger.c @@ -656,8 +656,10 @@ la_bat_updates(logger *lg, logaction *la if (bid < 0) return GDK_FAIL; - if (bid == 0) - return GDK_SUCCEED; /* ignore bats no longer in the catalog */ + if (!bid) { + GDKerror("la_bat_updates failed to find bid for object %d\n", la->cid); + return GDK_FAIL; + } if (!lg->flushing) { b = BATdescriptor(bid); @@ -750,6 +752,10 @@ la_bat_destroy(logger *lg, logaction *la if (bid < 0) return GDK_FAIL; + if (!bid) { + GDKerror("la_bat_destroy failed to find bid for object %d\n", la->cid); + return GDK_FAIL; + } if (bid && log_del_bat(lg, bid) != GDK_SUCCEED) return GDK_FAIL; return GDK_SUCCEED; @@ -1031,6 +1037,7 @@ log_open_output(logger *lg) } lg->end = 0; + lg->drops = 0; if (!LOG_DISABLED(lg)) { char id[32]; char *filename; @@ -1661,8 +1668,6 @@ bm_subcommit(logger *lg) sizes[i] = BATcount(dcatalog); n[i++] = dcatalog->batCacheid; - if (cleanup < (lg->cnt/2)) - cleanup = 0; if (cleanup && (rcnt=cleanup_and_swap(lg, r, bids, lids, cnts, catalog_bid, catalog_id, dcatalog, cleanup)) < 0) { GDKfree(n); GDKfree(r); @@ -1681,7 +1686,7 @@ bm_subcommit(logger *lg) sizes[i] = BATcount(lg->seqs_id); n[i++] = lg->seqs_val->batCacheid; } - if (!cleanup && lg->seqs_id && BATcount(lg->dseqs) > (BATcount(lg->seqs_id)/2)) { + if (!cleanup && lg->seqs_id && BATcount(lg->dseqs) > (BATcount(lg->seqs_id)/2) && BATcount(lg->dseqs) > 10 ) { BAT *tids, *ids, *vals; tids = bm_tids(lg->seqs_id, lg->dseqs); @@ -2160,6 +2165,8 @@ log_new(int debug, const char *fn, const .funcdata = funcdata, .id = 0, + .drops = 0, + .end = 0, .saved_id = getBBPlogno(), /* get saved log numer from bbp */ .saved_tid = (int)getBBPtransid(), /* get saved transaction id from bbp */ }; @@ -2284,7 +2291,7 @@ log_create(int debug, const char *fn, co static ulng log_next_logfile(logger *lg, ulng ts) { - int m = (GDKdebug & FORCEMITOMASK)?1000:10; + int m = (GDKdebug & FORCEMITOMASK)?1000:100; if (!lg->pending || !lg->pending->next) return 0; if (lg->pending != lg->current && lg->pending->last_ts <= ts) { @@ -2313,21 +2320,18 @@ log_cleanup_range(logger *lg, ulng id) gdk_return log_activate(logger *lg) { + gdk_return res = GDK_SUCCEED; MT_lock_set(&lg->rotation_lock); log_lock(lg); - if (lg->end > 0 && lg->saved_id+1 == lg->id) { + if (lg->drops > 100000 && lg->end > 0 && lg->saved_id+1 == lg->id) { lg->id++; log_close_output(lg); /* start new file */ - if (log_open_output(lg) != GDK_SUCCEED) { - log_unlock(lg); - MT_lock_unset(&lg->rotation_lock); - return GDK_FAIL; - } + res = log_open_output(lg); } log_unlock(lg); MT_lock_unset(&lg->rotation_lock); - return GDK_SUCCEED; + return res; } gdk_return @@ -2462,16 +2466,19 @@ log_constant(logger *lg, int type, ptr v gdk_return (*wt) (const void *, stream *, size_t) = BATatoms[type].atomWrite; + log_lock(lg); if (log_write_format(lg, &l) != GDK_SUCCEED || !mnstr_writeLng(lg->output_log, nr) || mnstr_write(lg->output_log, &tpe, 1, 1) != 1 || !mnstr_writeLng(lg->output_log, offset)) { (void) ATOMIC_DEC(&lg->refcount); + log_unlock(lg); ok = GDK_FAIL; goto bailout; } ok = wt(val, lg->output_log, 1); + log_unlock(lg); if (lg->debug & 1) fprintf(stderr, "#Logged %d " LLFMT " inserts\n", id, nr); @@ -2668,6 +2675,11 @@ log_bat_transient(logger *lg, log_id id) log_unlock(lg); return GDK_FAIL; } + if (!bid) { + GDKerror("log_bat_transient failed to find bid for object %d\n", id); + log_unlock(lg); + return GDK_FAIL; + } l.flag = LOG_DESTROY; l.id = id; @@ -2685,7 +2697,9 @@ log_bat_transient(logger *lg, log_id id) bid); BAT *b = BBPquickdesc(bid); assert(b); - lg->end += BATcount(b); + BUN cnt = BATcount(b); + lg->end += cnt; + lg->drops += cnt; gdk_return r = log_del_bat(lg, bid); log_unlock(lg); if (r != GDK_SUCCEED) @@ -2837,7 +2851,7 @@ new_logfile(logger *lg, stream* output_l const lng p = (lng) getfilepos(getFile(lg->output_log)); if (p == -1) return GDK_FAIL; - if (( p > log_large || (lg->end*1024) > log_large )) { + if (((!lg->pending || !lg->pending->next) && lg->drops > 100000) || p > log_large || (lg->end*1024) > log_large) { log_lock(lg); if (ATOMIC_GET(&lg->refcount) == 1) { lg->id++; @@ -2934,6 +2948,10 @@ log_tdone(logger *lg, ulng commit_ts) if (lg->current) { lg->current->last_ts = commit_ts; } + stream* output_log = lg->output_log; + ulng id = lg->id; + if (lg->flushing_output_log && new_logfile(lg, output_log, id) != GDK_SUCCEED) + GDKfatal("Could not create new log file\n"); } gdk_return @@ -2969,8 +2987,7 @@ log_tflush(logger* lg, ulng log_file_id, id = lg->id; MT_lock_unset(&lg->rotation_lock); if (mnstr_flush(output_log, MNSTR_FLUSH_DATA) || - (!(GDKdebug & NOSYNCMASK) && mnstr_fsync(output_log)) || - new_logfile(lg, output_log, id) != GDK_SUCCEED) { + (!(GDKdebug & NOSYNCMASK) && mnstr_fsync(output_log))) { /* flush failed */ MT_lock_set(&lg->rotation_lock); lg->flushing_output_log = false; @@ -3156,6 +3173,10 @@ log_find_bat(logger *lg, log_id id) log_lock(lg); log_bid bid = internal_find_bat(lg, id, -1); log_unlock(lg); + if (!bid) { + GDKerror("logger_find_bat failed to find bid for object %d\n", id); + return GDK_FAIL; + } return bid; } diff --git a/gdk/gdk_logger_internals.h b/gdk/gdk_logger_internals.h --- a/gdk/gdk_logger_internals.h +++ b/gdk/gdk_logger_internals.h @@ -30,6 +30,7 @@ struct logger { int saved_tid; /* id of transaction which was flushed out (into BBP storage) */ bool flushing; bool flushnow; + ulng drops; bool request_rotation; _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org