Changeset: db8144a929c8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/db8144a929c8
Branch: sqloptimizer
Log Message:

Merged with default


diffs (truncated from 2926 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -33008,6 +33008,11 @@ similarity
 command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl] 
 fstrcmp0_impl_bulk;
 Normalized edit distance between two strings
+baturl
+extractURLHost
+command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str] 
+BATextractURLHost;
+Extract host from BAT of URLs
 batuuid
 isaUUID
 command batuuid.isaUUID(X_0:bat[:str]):bat[:bit] 
@@ -47999,6 +48004,11 @@ command txtsim.stringdiff(X_0:str, X_1:s
 stringdiff_impl;
 calculate the soundexed editdistance
 url
+extractURLHost
+command url.extractURLHost(X_0:str, X_1:bit):str 
+extractURLHost;
+Extract host from a URL relaxed version
+url
 getAnchor
 command url.getAnchor(X_0:url):str 
 URLgetAnchor;
@@ -48032,7 +48042,7 @@ url
 getHost
 command url.getHost(X_0:url):str 
 URLgetHost;
-Extract the server name from the URL
+Extract the server name from the URL strict version
 url
 getPort
 command url.getPort(X_0:url):str 
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -758,8 +758,9 @@ typedef struct {
 #define GDKLIBRARY_MINMAX_POS  061042U /* first in Nov2019: no min/max 
position; no BBPinfo value */
 #define GDKLIBRARY_TAILN       061043U /* first in Jul2021: str offset heaps 
names don't take width into account */
 #define GDKLIBRARY_HASHASH     061044U /* first in Jul2021: hashash bit in 
string heaps */
+#define GDKLIBRARY_HSIZE       061045U /* first in Jan2022: heap "size" values 
*/
 /* if the version number is updated, also fix snapshot_bats() in bat_logger.c 
*/
-#define GDKLIBRARY             061045U /* first after Jul2021 */
+#define GDKLIBRARY             061046U /* first after Jan2022 */
 
 typedef struct BAT {
        /* static bat properties */
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -180,28 +180,6 @@ BATsetdims(BAT *b, uint16_t width)
 }
 
 const char *
-gettailnamebi(const BATiter *bi)
-{
-       if (bi->type == TYPE_str) {
-               switch (bi->width) {
-               case 1:
-                       return "tail1";
-               case 2:
-                       return "tail2";
-               case 4:
-#if SIZEOF_VAR_T == 8
-                       return "tail4";
-               case 8:
-#endif
-                       break;
-               default:
-                       MT_UNREACHABLE();
-               }
-       }
-       return "tail";
-}
-
-const char *
 gettailname(const BAT *b)
 {
        if (b->ttype == TYPE_str) {
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -443,6 +443,8 @@ heapinit(BAT *b, const char *buf,
        (void) bbpversion;      /* could be used to implement compatibility */
 
        minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case 
*/
+       size = 0;                             /* for GDKLIBRARY_HSIZE case */
+       storage = STORE_INVALID;              /* for GDKLIBRARY_HSIZE case */
        if (bbpversion <= GDKLIBRARY_MINMAX_POS ?
            sscanf(buf,
                   " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
@@ -453,6 +455,7 @@ heapinit(BAT *b, const char *buf,
                   &nokey1, &nosorted, &norevsorted, &base,
                   &free, &size, &storage,
                   &n) < 12 :
+           bbpversion <= GDKLIBRARY_HSIZE ?
            sscanf(buf,
                   " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
                   " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
@@ -461,7 +464,16 @@ heapinit(BAT *b, const char *buf,
                   type, &width, &var, &properties, &nokey0,
                   &nokey1, &nosorted, &norevsorted, &base,
                   &free, &size, &storage, &minpos, &maxpos,
-                  &n) < 14) {
+                  &n) < 14 :
+           sscanf(buf,
+                  " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
+                  " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
+                  " %" SCNu64 " %" SCNu64 " %" SCNu64
+                  "%n",
+                  type, &width, &var, &properties, &nokey0,
+                  &nokey1, &nosorted, &norevsorted, &base,
+                  &free, &minpos, &maxpos,
+                  &n) < 12) {
                TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", 
lineno);
                return -1;
        }
@@ -515,7 +527,14 @@ heapinit(BAT *b, const char *buf,
        /* (properties & 0x0200) is the old tdense flag */
        b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil 
? oid_nil : (oid) base;
        b->theap->free = (size_t) free;
-       b->theap->size = (size_t) size;
+       /* set heap size to match capacity */
+       if (b->ttype == TYPE_msk) {
+               /* round up capacity to multiple of 32 */
+               b->batCapacity = (b->batCapacity + 31) & ~((BUN) 31);
+               b->theap->size = b->batCapacity / 8;
+       } else {
+               b->theap->size = (size_t) b->batCapacity << b->tshift;
+       }
        b->theap->base = NULL;
        settailname(b->theap, filename, t, width);
        b->theap->storage = STORE_INVALID;
@@ -531,25 +550,30 @@ heapinit(BAT *b, const char *buf,
                b->tmaxpos = (BUN) maxpos;
        else
                b->tmaxpos = BUN_NONE;
-       if (b->theap->free > b->theap->size) {
-               TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" in heap 
of bat %d on line %d\n", (int) bid, lineno);
-               return -1;
-       }
        return n;
 }
 
 static int
-vheapinit(BAT *b, const char *buf, bat bid, const char *filename, int lineno)
+vheapinit(BAT *b, const char *buf, bat bid, unsigned bbpversion, const char 
*filename, int lineno)
 {
        int n = 0;
        uint64_t free, size;
        uint16_t storage;
 
+       (void) bbpversion;      /* could be used to implement compatibility */
+
+       size = 0;                             /* for GDKLIBRARY_HSIZE case */
+       storage = STORE_INVALID;              /* for GDKLIBRARY_HSIZE case */
        if (b->tvarsized && b->ttype != TYPE_void) {
-               if (sscanf(buf,
+               if (bbpversion <= GDKLIBRARY_HSIZE ?
+                   sscanf(buf,
                           " %" SCNu64 " %" SCNu64 " %" SCNu16
                           "%n",
-                          &free, &size, &storage, &n) < 3) {
+                          &free, &size, &storage, &n) < 3 :
+                   sscanf(buf,
+                          " %" SCNu64
+                          "%n",
+                          &free, &n) < 1) {
                        TRC_CRITICAL(GDK, "invalid format for BBP.dir on line 
%d", lineno);
                        return -1;
                }
@@ -558,6 +582,13 @@ vheapinit(BAT *b, const char *buf, bat b
                        TRC_CRITICAL(GDK, "cannot allocate memory for heap.");
                        return -1;
                }
+               if (ATOMstorage(b->ttype) == TYPE_str &&
+                   free < GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * 
GDK_VARALIGN)
+                       size = GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * 
GDK_VARALIGN;
+               else if (free < 512)
+                       size = 512;
+               else
+                       size = free;
                *b->tvheap = (Heap) {
                        .free = (size_t) free,
                        .size = (size_t) size,
@@ -572,10 +603,6 @@ vheapinit(BAT *b, const char *buf, bat b
                strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
                              filename, ".theap", NULL);
                ATOMIC_INIT(&b->tvheap->refs, 1);
-               if (b->tvheap->free > b->tvheap->size) {
-                       TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" 
in var heap of bat %d on line %d\n", (int) bid, lineno);
-                       return -1;
-               }
        }
        return n;
 }
@@ -605,7 +632,7 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                int nread, n;
                char *s, *options = NULL;
                char logical[1024];
-               uint64_t count, capacity, base = 0;
+               uint64_t count, capacity = 0, base = 0;
 #ifdef GDKLIBRARY_HASHASH
                int Thashash;
 #endif
@@ -621,14 +648,21 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                        *s = 0;
                }
 
-               if (sscanf(buf,
+               if (bbpversion <= GDKLIBRARY_HSIZE ?
+                   sscanf(buf,
                           "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64
                           " %" SCNu64 " %" SCNu64
                           "%n",
                           &batid, &status, headname, filename,
-                          &properties,
-                          &count, &capacity, &base,
-                          &nread) < 8) {
+                          &properties, &count, &capacity, &base,
+                          &nread) < 8 :
+                   sscanf(buf,
+                          "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64
+                          " %" SCNu64
+                          "%n",
+                          &batid, &status, headname, filename,
+                          &properties, &count, &base,
+                          &nread) < 7) {
                        TRC_CRITICAL(GDK, "invalid format for BBP.dir on line 
%d", lineno);
                        goto bailout;
                }
@@ -680,7 +714,8 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                bn->batRestricted = (properties & 0x06) >> 1;
                bn->batCount = (BUN) count;
                bn->batInserted = bn->batCount;
-               bn->batCapacity = (BUN) capacity;
+               /* set capacity to at least count */
+               bn->batCapacity = (BUN) count <= BATTINY ? BATTINY : (BUN) 
count;
                char name[MT_NAME_LEN];
                snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* 
fits */
                MT_lock_init(&bn->theaplock, name);
@@ -706,7 +741,7 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                        goto bailout;
                }
                nread += n;
-               n = vheapinit(bn, buf + nread, bid, filename, lineno);
+               n = vheapinit(bn, buf + nread, bid, bbpversion, filename, 
lineno);
                if (n < 0) {
                        BATdestroy(bn);
                        goto bailout;
@@ -925,6 +960,7 @@ BBPheader(FILE *fp, int *lineno, bat *bb
                return 0;
        }
        if (bbpversion != GDKLIBRARY &&
+           bbpversion != GDKLIBRARY_HSIZE &&
            bbpversion != GDKLIBRARY_HASHASH &&
            bbpversion != GDKLIBRARY_TAILN &&
            bbpversion != GDKLIBRARY_MINMAX_POS) {
@@ -1805,8 +1841,9 @@ BBPexit(void)
  * reclaimed as well.
  */
 static inline int
-heap_entry(FILE *fp, BAT *b, BUN size, BATiter *bi)
+heap_entry(FILE *fp, BATiter *bi, BUN size)
 {
+       BAT *b = bi->b;
        size_t free = bi->hfree;
        if (size < BUN_NONE) {
                if ((bi->type >= 0 && ATOMstorage(bi->type) == TYPE_msk))
@@ -1818,7 +1855,7 @@ heap_entry(FILE *fp, BAT *b, BUN size, B
        }
 
        if ((GDKdebug & TAILCHKMASK) && free > 0) {
-               char *fname = GDKfilepath(0, BATDIR, 
BBP_physical(b->batCacheid), gettailname(b));
+               char *fname = GDKfilepath(0, BATDIR, 
BBP_physical(b->batCacheid), gettailnamebi(bi));
                if (fname != NULL) {
                        struct stat stb;
                        if (stat(fname, &stb) == -1) {
@@ -1834,7 +1871,7 @@ heap_entry(FILE *fp, BAT *b, BUN size, B
        }
 
        return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
-                      BUNFMT " " OIDFMT " %zu %zu %d %" PRIu64" %" PRIu64,
+                      BUNFMT " " OIDFMT " %zu %" PRIu64" %" PRIu64,
                       bi->type >= 0 ? BATatoms[bi->type].name : 
ATOMunknown_name(bi->type),
                       bi->width,
                       b->tvarsized,
@@ -1850,25 +1887,23 @@ heap_entry(FILE *fp, BAT *b, BUN size, B
                       b->tnorevsorted >= size ? 0 : b->tnorevsorted,
                       b->tseqbase,
                       free,
-                      bi->h->size,
-                      0,
-                      bi->minpos < b->hseqbase + size ? (uint64_t) bi->minpos 
: (uint64_t) oid_nil,
-                      bi->maxpos < b->hseqbase + size ? (uint64_t) bi->maxpos 
: (uint64_t) oid_nil);
+                      bi->minpos < size ? (uint64_t) bi->minpos : (uint64_t) 
oid_nil,
+                      bi->maxpos < size ? (uint64_t) bi->maxpos : (uint64_t) 
oid_nil);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to