Changeset: 7a26d14b0fd2 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/7a26d14b0fd2 Modified Files: MonetDB.spec clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_private.h sql/storage/bat/bat_storage.c Branch: default Log Message:
Merge with Jan2022 branch. diffs (truncated from 1761 to 300 lines): diff --git a/ChangeLog.Jan2022 b/ChangeLog.Jan2022 --- a/ChangeLog.Jan2022 +++ b/ChangeLog.Jan2022 @@ -1,3 +1,6 @@ # ChangeLog file for devel # This file is updated with Maddlog +* Wed Jan 12 2022 Sjoerd Mullender <sjo...@acm.org> +- A couple of concurrency issues have been fixed. + diff --git a/MonetDB.spec b/MonetDB.spec --- a/MonetDB.spec +++ b/MonetDB.spec @@ -878,18 +878,6 @@ fi result set - GH#7202: DISTINCT does not work when sorting by additional columns -* Mon Jan 3 2022 Panagiotis Koutsourakis <kutsu...@monetdbsolutions.com> - 11.43.1-20220103 -- gdk: Implement string imprints (strimps for short) a pre-filter structure - for strings in order to accelerate LIKE queries. If a strimp exists - for a specific string column the strings are pre-filtered, rejecting - strings that cannot possibly match, before the more expensive and - accurate matching algorithms run. Strimps are created automatically - or using 'sys.strimp_create' with arguments the names of the schema, - table and column. Automatic strimp creation is controlled by two - user settable gdk options: 'gdk_use_strimps' (default value "no") and - 'gdk_strimps_threshold' (default value 1.000.000). See the manual for - more details. - * Wed Dec 15 2021 Pedro Ferreira <pedro.ferre...@monetdbsolutions.com> - 11.43.1-20220103 - monetdb5: The storage cleanup in the 11.41.5 (Jul2021) release made the OLTP optimizer pipeline obsolete, thus it was removed. diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -9600,10 +9600,6 @@ [ "streams", "readStr", "unsafe command streams.readStr(X_0:streams):str ", "mnstr_read_stringwrap;", "read string data from the stream" ] [ "streams", "writeInt", "unsafe command streams.writeInt(X_0:streams, X_1:int):void ", "mnstr_writeIntwrap;", "write data on the stream" ] [ "streams", "writeStr", "unsafe command streams.writeStr(X_0:streams, X_1:str):void ", "mnstr_write_stringwrap;", "write data on the stream" ] -[ "strimps", "mkstrimp", "unsafe pattern strimps.mkstrimp(X_0:bat[:str], X_1:bat[:oid]):void ", "PATstrimpCreate;", "construct the strimp a BAT" ] -[ "strimps", "strimpfilter", "pattern strimps.strimpfilter(X_0:str, X_1:str):bit ", "PATstrimpFilter;", "" ] -[ "strimps", "strimpfilterjoin", "pattern strimps.strimpfilterjoin(X_0:str, X_1:any, X_2:any, X_3:any, X_4:any, X_5:any) (X_6:bat[:oid], X_7:bat[:str]) ", "PATstrimpFilter;", "" ] -[ "strimps", "strimpfilterselect", "pattern strimps.strimpfilterselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit):bat[:oid] ", "PATstrimpFilterSelect;", "" ] [ "sysmon", "pause", "unsafe pattern sysmon.pause(X_0:int):void ", "SYSMONpause;", "Suspend a running query" ] [ "sysmon", "pause", "unsafe pattern sysmon.pause(X_0:lng):void ", "SYSMONpause;", "Suspend a running query" ] [ "sysmon", "pause", "unsafe pattern sysmon.pause(X_0:sht):void ", "SYSMONpause;", "Suspend a running query" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -12921,10 +12921,6 @@ [ "streams", "readStr", "unsafe command streams.readStr(X_0:streams):str ", "mnstr_read_stringwrap;", "read string data from the stream" ] [ "streams", "writeInt", "unsafe command streams.writeInt(X_0:streams, X_1:int):void ", "mnstr_writeIntwrap;", "write data on the stream" ] [ "streams", "writeStr", "unsafe command streams.writeStr(X_0:streams, X_1:str):void ", "mnstr_write_stringwrap;", "write data on the stream" ] -[ "strimps", "mkstrimp", "unsafe pattern strimps.mkstrimp(X_0:bat[:str], X_1:bat[:oid]):void ", "PATstrimpCreate;", "construct the strimp a BAT" ] -[ "strimps", "strimpfilter", "pattern strimps.strimpfilter(X_0:str, X_1:str):bit ", "PATstrimpFilter;", "" ] -[ "strimps", "strimpfilterjoin", "pattern strimps.strimpfilterjoin(X_0:str, X_1:any, X_2:any, X_3:any, X_4:any, X_5:any) (X_6:bat[:oid], X_7:bat[:str]) ", "PATstrimpFilter;", "" ] -[ "strimps", "strimpfilterselect", "pattern strimps.strimpfilterselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit):bat[:oid] ", "PATstrimpFilterSelect;", "" ] [ "sysmon", "pause", "unsafe pattern sysmon.pause(X_0:int):void ", "SYSMONpause;", "Suspend a running query" ] [ "sysmon", "pause", "unsafe pattern sysmon.pause(X_0:lng):void ", "SYSMONpause;", "Suspend a running query" ] [ "sysmon", "pause", "unsafe pattern sysmon.pause(X_0:sht):void ", "SYSMONpause;", "Suspend a running query" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -149,6 +149,7 @@ BAT *BATgroupvariance_sample(BAT *b, BAT BUN BATgrows(BAT *b); BUN BATguess_uniques(BAT *b, struct canditer *ci); gdk_return BAThash(BAT *b); +bool BAThasstrimps(BAT *b); void BAThseqbase(BAT *b, oid o); gdk_return BATimprints(BAT *b); BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one, BUN estimate); @@ -393,8 +394,8 @@ BUN ORDERfndlast(BAT *b, Heap *oidxh, co BUN SORTfnd(BAT *b, const void *v); BUN SORTfndfirst(BAT *b, const void *v); BUN SORTfndlast(BAT *b, const void *v); -gdk_return STRMPappendBitstring(BAT *b, const char *s); gdk_return STRMPcreate(BAT *b, BAT *s); +void STRMPdestroy(BAT *b); BAT *STRMPfilter(BAT *b, BAT *s, const char *q); MT_Id THRcreate(void (*f)(void *), void *arg, enum MT_thr_detach d, const char *name); void *THRdata[THREADDATA]; diff --git a/debian/changelog b/debian/changelog --- a/debian/changelog +++ b/debian/changelog @@ -40,21 +40,6 @@ monetdb (11.43.1) unstable; urgency=low monetdb (11.43.1) unstable; urgency=low - * gdk: Implement string imprints (strimps for short) a pre-filter structure - for strings in order to accelerate LIKE queries. If a strimp exists - for a specific string column the strings are pre-filtered, rejecting - strings that cannot possibly match, before the more expensive and - accurate matching algorithms run. Strimps are created automatically - or using 'sys.strimp_create' with arguments the names of the schema, - table and column. Automatic strimp creation is controlled by two - user settable gdk options: 'gdk_use_strimps' (default value "no") and - 'gdk_strimps_threshold' (default value 1.000.000). See the manual for - more details. - - -- Panagiotis Koutsourakis <kutsu...@monetdbsolutions.com> Mon, 3 Jan 2022 14:02:13 +0100 - -monetdb (11.43.1) unstable; urgency=low - * monetdb5: The storage cleanup in the 11.41.5 (Jul2021) release made the OLTP optimizer pipeline obsolete, thus it was removed. diff --git a/gdk/ChangeLog-Archive b/gdk/ChangeLog-Archive --- a/gdk/ChangeLog-Archive +++ b/gdk/ChangeLog-Archive @@ -6,18 +6,6 @@ FILE_ATTIBUTE_NOT_CONTENT_INDEXED, meaning that they should not be indexed by indexing or search services. -* Mon Jan 3 2022 Panagiotis Koutsourakis <kutsu...@monetdbsolutions.com> - 11.43.1-20220103 -- Implement string imprints (strimps for short) a pre-filter structure - for strings in order to accelerate LIKE queries. If a strimp exists - for a specific string column the strings are pre-filtered, rejecting - strings that cannot possibly match, before the more expensive and - accurate matching algorithms run. Strimps are created automatically - or using 'sys.strimp_create' with arguments the names of the schema, - table and column. Automatic strimp creation is controlled by two - user settable gdk options: 'gdk_use_strimps' (default value "no") and - 'gdk_strimps_threshold' (default value 1.000.000). See the manual for - more details. - * Wed Aug 11 2021 Sjoerd Mullender <sjo...@acm.org> - 11.43.1-20220103 - Many (most) low level functions that could take a long time (such as BATjoin) can now be aborted with a timeout. When the function takes too diff --git a/gdk/ChangeLog.Jan2022 b/gdk/ChangeLog.Jan2022 --- a/gdk/ChangeLog.Jan2022 +++ b/gdk/ChangeLog.Jan2022 @@ -1,3 +1,10 @@ # ChangeLog file for GDK # This file is updated with Maddlog +* Wed Jan 12 2022 Panagiotis Koutsourakis <kutsu...@monetdbsolutions.com> +- Implement string imprints (strimps for short) a pre-filter structure + for strings in order to accelerate LIKE queries. If a strimp exists + for a specific string column the strings are pre-filtered, rejecting + strings that cannot possibly match, before the more expensive and + accurate matching algorithms run. + diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -1790,6 +1790,12 @@ gdk_export gdk_return BATimprints(BAT *b gdk_export void IMPSdestroy(BAT *b); gdk_export lng IMPSimprintsize(BAT *b); +/* Strimps exported functions */ +gdk_export gdk_return STRMPcreate(BAT *b, BAT *s); +gdk_export BAT *STRMPfilter(BAT *b, BAT *s, const char *q); +gdk_export void STRMPdestroy(BAT *b); +gdk_export bool BAThasstrimps(BAT *b); + /* The ordered index structure */ gdk_export gdk_return BATorderidx(BAT *b, bool stable); @@ -2401,11 +2407,6 @@ gdk_export BAT *BATsample_with_seed(BAT CALLBACK; \ } while (0) -/* - * String Imprints Development/Testing. TODO: remove the following. - */ - -#include "gdk_strimps.h" typedef struct gdk_callback { char *name; int argc; diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -233,8 +233,6 @@ void STRMPincref(Strimps *strimps) __attribute__((__visibility__("hidden"))); void STRMPdecref(Strimps *strimps, bool remove) __attribute__((__visibility__("hidden"))); -void STRMPdestroy(BAT *b) - __attribute__((__visibility__("hidden"))); void STRMPfree(BAT *b) __attribute__((__visibility__("hidden"))); void MT_init_posix(void) diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -143,9 +143,7 @@ hashselect(BAT *b, BATiter *bi, struct c MT_rwlock_rdlock(&b->thashlock); if (b->thash == NULL) { GDKerror("Hash destroyed before we could use it\n"); - BBPreclaim(bn); - MT_rwlock_rdunlock(&b->thashlock); - return NULL; + goto bailout; } } switch (ATOMbasetype(b->ttype)) { @@ -168,11 +166,8 @@ hashselect(BAT *b, BATiter *bi, struct c dst = buninsfix(bn, dst, cnt, o, maximum - BATcapacity(bn), maximum); - if (dst == NULL) { - MT_rwlock_rdunlock(&b->thashlock); - BBPreclaim(bn); - return NULL; - } + if (dst == NULL) + goto bailout; cnt++; } } @@ -184,11 +179,8 @@ hashselect(BAT *b, BATiter *bi, struct c dst = buninsfix(bn, dst, cnt, o, maximum - BATcapacity(bn), maximum); - if (dst == NULL) { - MT_rwlock_rdunlock(&b->thashlock); - BBPreclaim(bn); - return NULL; - } + if (dst == NULL) + goto bailout; cnt++; } } @@ -210,6 +202,7 @@ hashselect(BAT *b, BATiter *bi, struct c return bn; bailout: + MT_rwlock_rdunlock(&b->thashlock); BBPreclaim(bn); return NULL; } diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c --- a/gdk/gdk_strimps.c +++ b/gdk/gdk_strimps.c @@ -80,6 +80,8 @@ #include "gdk.h" #include "gdk_private.h" +#include "gdk_strimps.h" + /* Macros for accessing metadada of a strimp. These are recorded in the * first 8 bytes of the heap. @@ -698,6 +700,21 @@ STRMPcreateStrimpHeap(BAT *b, BAT *s) return r; } +bool +BAThasstrimps(BAT *b) +{ + BAT *pb; + if (VIEWtparent(b)) { + pb = BBP_cache(VIEWtparent(b)); + assert(pb); + } else { + pb = b; + } + + return BATcheckstrimps(pb); + +} + gdk_return STRMPcreate(BAT *b, BAT *s) { @@ -762,58 +779,6 @@ STRMPcreate(BAT *b, BAT *s) return GDK_SUCCEED; } -gdk_return -STRMPappendBitstring(BAT *b, const char *s) -{ - lng t0 = 0; - BAT *pb; - uint64_t *dh; - Strimps *strmp; - const float extend_factor = 1.5; - - TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec(); - if (ATOMstorage(b->ttype) != TYPE_str) { - GDKerror("Cannot manipulate strimps index for non string bats\n"); - return GDK_FAIL; - } - - if (VIEWtparent(b)) { - pb = BBP_cache(VIEWtparent(b)); - assert(pb); - } else { - pb = b; - } - - if (!BATcheckstrimps(pb)) { - GDKerror("Strimp missing, cannot append value\n"); - return GDK_FAIL; - } - MT_lock_set(&pb->batIdxLock); - strmp = pb->tstrimps; - /* Extend heap if there is not enough space */ - if (strmp->strimps.free >= strmp->strimps.size + sizeof(uint64_t)) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list