Changeset: db8144a929c8 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/db8144a929c8 Branch: sqloptimizer Log Message:
Merged with default diffs (truncated from 2926 to 300 lines): diff --git a/clients/Tests/MAL-signatures.test b/clients/Tests/MAL-signatures.test --- a/clients/Tests/MAL-signatures.test +++ b/clients/Tests/MAL-signatures.test @@ -33008,6 +33008,11 @@ similarity command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl] fstrcmp0_impl_bulk; Normalized edit distance between two strings +baturl +extractURLHost +command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str] +BATextractURLHost; +Extract host from BAT of URLs batuuid isaUUID command batuuid.isaUUID(X_0:bat[:str]):bat[:bit] @@ -47999,6 +48004,11 @@ command txtsim.stringdiff(X_0:str, X_1:s stringdiff_impl; calculate the soundexed editdistance url +extractURLHost +command url.extractURLHost(X_0:str, X_1:bit):str +extractURLHost; +Extract host from a URL relaxed version +url getAnchor command url.getAnchor(X_0:url):str URLgetAnchor; @@ -48032,7 +48042,7 @@ url getHost command url.getHost(X_0:url):str URLgetHost; -Extract the server name from the URL +Extract the server name from the URL strict version url getPort command url.getPort(X_0:url):str diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -758,8 +758,9 @@ typedef struct { #define GDKLIBRARY_MINMAX_POS 061042U /* first in Nov2019: no min/max position; no BBPinfo value */ #define GDKLIBRARY_TAILN 061043U /* first in Jul2021: str offset heaps names don't take width into account */ #define GDKLIBRARY_HASHASH 061044U /* first in Jul2021: hashash bit in string heaps */ +#define GDKLIBRARY_HSIZE 061045U /* first in Jan2022: heap "size" values */ /* if the version number is updated, also fix snapshot_bats() in bat_logger.c */ -#define GDKLIBRARY 061045U /* first after Jul2021 */ +#define GDKLIBRARY 061046U /* first after Jan2022 */ typedef struct BAT { /* static bat properties */ diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -180,28 +180,6 @@ BATsetdims(BAT *b, uint16_t width) } const char * -gettailnamebi(const BATiter *bi) -{ - if (bi->type == TYPE_str) { - switch (bi->width) { - case 1: - return "tail1"; - case 2: - return "tail2"; - case 4: -#if SIZEOF_VAR_T == 8 - return "tail4"; - case 8: -#endif - break; - default: - MT_UNREACHABLE(); - } - } - return "tail"; -} - -const char * gettailname(const BAT *b) { if (b->ttype == TYPE_str) { diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -443,6 +443,8 @@ heapinit(BAT *b, const char *buf, (void) bbpversion; /* could be used to implement compatibility */ minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case */ + size = 0; /* for GDKLIBRARY_HSIZE case */ + storage = STORE_INVALID; /* for GDKLIBRARY_HSIZE case */ if (bbpversion <= GDKLIBRARY_MINMAX_POS ? sscanf(buf, " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64 @@ -453,6 +455,7 @@ heapinit(BAT *b, const char *buf, &nokey1, &nosorted, &norevsorted, &base, &free, &size, &storage, &n) < 12 : + bbpversion <= GDKLIBRARY_HSIZE ? sscanf(buf, " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 @@ -461,7 +464,16 @@ heapinit(BAT *b, const char *buf, type, &width, &var, &properties, &nokey0, &nokey1, &nosorted, &norevsorted, &base, &free, &size, &storage, &minpos, &maxpos, - &n) < 14) { + &n) < 14 : + sscanf(buf, + " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64 + " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 + " %" SCNu64 " %" SCNu64 " %" SCNu64 + "%n", + type, &width, &var, &properties, &nokey0, + &nokey1, &nosorted, &norevsorted, &base, + &free, &minpos, &maxpos, + &n) < 12) { TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno); return -1; } @@ -515,7 +527,14 @@ heapinit(BAT *b, const char *buf, /* (properties & 0x0200) is the old tdense flag */ b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base; b->theap->free = (size_t) free; - b->theap->size = (size_t) size; + /* set heap size to match capacity */ + if (b->ttype == TYPE_msk) { + /* round up capacity to multiple of 32 */ + b->batCapacity = (b->batCapacity + 31) & ~((BUN) 31); + b->theap->size = b->batCapacity / 8; + } else { + b->theap->size = (size_t) b->batCapacity << b->tshift; + } b->theap->base = NULL; settailname(b->theap, filename, t, width); b->theap->storage = STORE_INVALID; @@ -531,25 +550,30 @@ heapinit(BAT *b, const char *buf, b->tmaxpos = (BUN) maxpos; else b->tmaxpos = BUN_NONE; - if (b->theap->free > b->theap->size) { - TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" in heap of bat %d on line %d\n", (int) bid, lineno); - return -1; - } return n; } static int -vheapinit(BAT *b, const char *buf, bat bid, const char *filename, int lineno) +vheapinit(BAT *b, const char *buf, bat bid, unsigned bbpversion, const char *filename, int lineno) { int n = 0; uint64_t free, size; uint16_t storage; + (void) bbpversion; /* could be used to implement compatibility */ + + size = 0; /* for GDKLIBRARY_HSIZE case */ + storage = STORE_INVALID; /* for GDKLIBRARY_HSIZE case */ if (b->tvarsized && b->ttype != TYPE_void) { - if (sscanf(buf, + if (bbpversion <= GDKLIBRARY_HSIZE ? + sscanf(buf, " %" SCNu64 " %" SCNu64 " %" SCNu16 "%n", - &free, &size, &storage, &n) < 3) { + &free, &size, &storage, &n) < 3 : + sscanf(buf, + " %" SCNu64 + "%n", + &free, &n) < 1) { TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno); return -1; } @@ -558,6 +582,13 @@ vheapinit(BAT *b, const char *buf, bat b TRC_CRITICAL(GDK, "cannot allocate memory for heap."); return -1; } + if (ATOMstorage(b->ttype) == TYPE_str && + free < GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN) + size = GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN; + else if (free < 512) + size = 512; + else + size = free; *b->tvheap = (Heap) { .free = (size_t) free, .size = (size_t) size, @@ -572,10 +603,6 @@ vheapinit(BAT *b, const char *buf, bat b strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename), filename, ".theap", NULL); ATOMIC_INIT(&b->tvheap->refs, 1); - if (b->tvheap->free > b->tvheap->size) { - TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" in var heap of bat %d on line %d\n", (int) bid, lineno); - return -1; - } } return n; } @@ -605,7 +632,7 @@ BBPreadEntries(FILE *fp, unsigned bbpver int nread, n; char *s, *options = NULL; char logical[1024]; - uint64_t count, capacity, base = 0; + uint64_t count, capacity = 0, base = 0; #ifdef GDKLIBRARY_HASHASH int Thashash; #endif @@ -621,14 +648,21 @@ BBPreadEntries(FILE *fp, unsigned bbpver *s = 0; } - if (sscanf(buf, + if (bbpversion <= GDKLIBRARY_HSIZE ? + sscanf(buf, "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64 " %" SCNu64 " %" SCNu64 "%n", &batid, &status, headname, filename, - &properties, - &count, &capacity, &base, - &nread) < 8) { + &properties, &count, &capacity, &base, + &nread) < 8 : + sscanf(buf, + "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64 + " %" SCNu64 + "%n", + &batid, &status, headname, filename, + &properties, &count, &base, + &nread) < 7) { TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno); goto bailout; } @@ -680,7 +714,8 @@ BBPreadEntries(FILE *fp, unsigned bbpver bn->batRestricted = (properties & 0x06) >> 1; bn->batCount = (BUN) count; bn->batInserted = bn->batCount; - bn->batCapacity = (BUN) capacity; + /* set capacity to at least count */ + bn->batCapacity = (BUN) count <= BATTINY ? BATTINY : (BUN) count; char name[MT_NAME_LEN]; snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */ MT_lock_init(&bn->theaplock, name); @@ -706,7 +741,7 @@ BBPreadEntries(FILE *fp, unsigned bbpver goto bailout; } nread += n; - n = vheapinit(bn, buf + nread, bid, filename, lineno); + n = vheapinit(bn, buf + nread, bid, bbpversion, filename, lineno); if (n < 0) { BATdestroy(bn); goto bailout; @@ -925,6 +960,7 @@ BBPheader(FILE *fp, int *lineno, bat *bb return 0; } if (bbpversion != GDKLIBRARY && + bbpversion != GDKLIBRARY_HSIZE && bbpversion != GDKLIBRARY_HASHASH && bbpversion != GDKLIBRARY_TAILN && bbpversion != GDKLIBRARY_MINMAX_POS) { @@ -1805,8 +1841,9 @@ BBPexit(void) * reclaimed as well. */ static inline int -heap_entry(FILE *fp, BAT *b, BUN size, BATiter *bi) +heap_entry(FILE *fp, BATiter *bi, BUN size) { + BAT *b = bi->b; size_t free = bi->hfree; if (size < BUN_NONE) { if ((bi->type >= 0 && ATOMstorage(bi->type) == TYPE_msk)) @@ -1818,7 +1855,7 @@ heap_entry(FILE *fp, BAT *b, BUN size, B } if ((GDKdebug & TAILCHKMASK) && free > 0) { - char *fname = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), gettailname(b)); + char *fname = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), gettailnamebi(bi)); if (fname != NULL) { struct stat stb; if (stat(fname, &stb) == -1) { @@ -1834,7 +1871,7 @@ heap_entry(FILE *fp, BAT *b, BUN size, B } return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " " - BUNFMT " " OIDFMT " %zu %zu %d %" PRIu64" %" PRIu64, + BUNFMT " " OIDFMT " %zu %" PRIu64" %" PRIu64, bi->type >= 0 ? BATatoms[bi->type].name : ATOMunknown_name(bi->type), bi->width, b->tvarsized, @@ -1850,25 +1887,23 @@ heap_entry(FILE *fp, BAT *b, BUN size, B b->tnorevsorted >= size ? 0 : b->tnorevsorted, b->tseqbase, free, - bi->h->size, - 0, - bi->minpos < b->hseqbase + size ? (uint64_t) bi->minpos : (uint64_t) oid_nil, - bi->maxpos < b->hseqbase + size ? (uint64_t) bi->maxpos : (uint64_t) oid_nil); + bi->minpos < size ? (uint64_t) bi->minpos : (uint64_t) oid_nil, + bi->maxpos < size ? (uint64_t) bi->maxpos : (uint64_t) oid_nil); _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org