Changeset: 4cd6061fb4c2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4cd6061fb4c2
Added Files:
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        sql/jdbc/tests/Tests/Test_Dobjects.SQL.bat
Removed Files:
        gdk/gdk_search.h
        sql/test/Users/Tests/util.py
Modified Files:
        configure.ag
        gdk/Makefile.ag
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_delta.c
        gdk/gdk_search.c
        java/ChangeLog
        java/src/main/java/nl/cwi/monetdb/jdbc/MonetDatabaseMetaData.java
        java/tests/Test_Dobjects.java
        monetdb5/extras/mal_optimizer_template/Tests/opt_sql_append.stable.err
        monetdb5/extras/mal_optimizer_template/Tests/opt_sql_append.stable.out
        
monetdb5/extras/mal_optimizer_template/Tests/opt_sql_append.stable.out.single
        monetdb5/mal/Tests/dynamicload.malC
        monetdb5/mal/Tests/dynamicload.stable.err
        monetdb5/mal/mal_listing.c
        monetdb5/optimizer/Tests/JPexample.malC
        monetdb5/optimizer/Tests/JPexample.stable.out
        monetdb5/optimizer/Tests/projectionchain.malC
        monetdb5/optimizer/Tests/projectionchain.stable.out
        monetdb5/optimizer/opt_pipes.c
        monetdb5/optimizer/opt_projectionpath.c
        monetdb5/optimizer/opt_volcano.c
        sql/backends/monet5/sql_optimizer.c
        sql/backends/monet5/sql_user.c
        sql/benchmarks/ssbm/Tests/04-explain.stable.out
        sql/benchmarks/ssbm/Tests/04-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/05-explain.stable.out
        sql/benchmarks/ssbm/Tests/05-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/06-explain.stable.out
        sql/benchmarks/ssbm/Tests/06-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/07-explain.stable.out
        sql/benchmarks/ssbm/Tests/07-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/08-explain.stable.out
        sql/benchmarks/ssbm/Tests/08-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/09-explain.stable.out
        sql/benchmarks/ssbm/Tests/09-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/11-explain.stable.out
        sql/benchmarks/ssbm/Tests/11-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/12-explain.stable.out
        sql/benchmarks/ssbm/Tests/12-explain.stable.out.int128
        sql/benchmarks/ssbm/Tests/13-explain.stable.out
        sql/benchmarks/ssbm/Tests/13-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/02-explain.stable.out
        sql/benchmarks/tpch/Tests/03-explain.stable.out
        sql/benchmarks/tpch/Tests/03-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/05-explain.stable.out
        sql/benchmarks/tpch/Tests/05-explain.stable.out.32bit
        sql/benchmarks/tpch/Tests/05-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/07-explain.stable.out
        sql/benchmarks/tpch/Tests/07-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/08-explain.stable.out
        sql/benchmarks/tpch/Tests/08-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/09-explain.stable.out
        sql/benchmarks/tpch/Tests/09-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/10-explain.stable.out
        sql/benchmarks/tpch/Tests/10-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/15-explain.stable.out
        sql/benchmarks/tpch/Tests/15-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/16-explain.stable.out
        sql/benchmarks/tpch/Tests/16-explain.stable.out.32bit
        sql/benchmarks/tpch/Tests/17-explain.stable.out
        sql/benchmarks/tpch/Tests/17-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/18-explain.stable.out
        sql/benchmarks/tpch/Tests/18-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/19-explain.stable.out
        sql/benchmarks/tpch/Tests/19-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/20-explain.stable.out
        sql/benchmarks/tpch/Tests/20-explain.stable.out.32bit
        sql/benchmarks/tpch/Tests/20-explain.stable.out.int128
        sql/benchmarks/tpch/Tests/21-explain.stable.out
        sql/benchmarks/tpch/Tests/21-explain.stable.out.32bit
        sql/benchmarks/tpch/Tests/22-explain.stable.out
        sql/benchmarks/tpch/Tests/22-explain.stable.out.32bit
        sql/benchmarks/tpch/Tests/22-explain.stable.out.int128
        sql/jdbc/tests/Tests/All
        sql/jdbc/tests/Tests/Test_Dobjects.stable.out
        sql/jdbc/tests/Tests/Test_PSmetadata.stable.out
        sql/jdbc/tests/Tests/Test_Rmetadata.stable.out
        sql/jdbc/tests/Tests/Test_Rsqldata.stable.out
        sql/server/rel_optimizer.c
        sql/server/rel_optimizer.h
        sql/server/rel_rel.c
        sql/server/rel_rel.h
        sql/test/Tests/setoptimizer.stable.err
        sql/test/Tests/setoptimizer.stable.out
        sql/test/Tests/setoptimizer.stable.out.Windows
        sql/test/Users/Tests/changePasswordUser.SQL.py
        sql/test/Users/Tests/changeSchemaUser.SQL.py
        sql/test/Users/Tests/columnRights.SQL.py
        sql/test/Users/Tests/dropManyUsers.Bug-3764.SQL.py
        sql/test/Users/Tests/grantMonetdb.SQL.py
        sql/test/Users/Tests/grantMonetdbToRegularUser.Bug-3771.SQL.py
        sql/test/Users/Tests/grantMonetdbToSchemaOwner.Bug-3771.SQL.py
        sql/test/Users/Tests/grantPrivilegesNonDefaultRole.Bug-3365.SQL.py
        sql/test/Users/Tests/grantRevokeAndGrantAgain.Bug-3765.SQL.py
        sql/test/Users/Tests/grantRole.Bug-3772.SQL.py
        sql/test/Users/Tests/renameUser.SQL.py
        sql/test/Users/Tests/role.SQL.py
        sql/test/Users/Tests/schemaRights.SQL.py
        sql/test/Users/Tests/withGrantOption.SQL.py
        sql/test/pg_regress/Tests/int8.sql
        sql/test/pg_regress/Tests/int8.stable.err
        sql/test/pg_regress/Tests/int8.stable.out
        sql/test/pg_regress/Tests/int8.stable.out.int128
        testing/Mtest.py.in
Branch: leftmart
Log Message:

Merge with default branch.


diffs (truncated from 4961 to 300 lines):

diff --git a/configure.ag b/configure.ag
--- a/configure.ag
+++ b/configure.ag
@@ -3456,6 +3456,7 @@ for comp in \
        'java_control ' \
        'java_jdbc    ' \
        'liblas       ' \
+       'liblzma      ' \
        'libxml2      ' \
        'lidar        ' \
        'netcdf       ' \
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -14,7 +14,7 @@ lib_gdk = {
        SOURCES = \
                gdk.h gdk_cand.h gdk_atomic.h gdk_batop.c \
                gdk_select.c \
-               gdk_search.c gdk_search.h gdk_tm.c \
+               gdk_search.c gdk_hash.c gdk_hash.h gdk_tm.c \
                gdk_align.c gdk_bbp.c gdk_bbp.h \
                gdk_heap.c gdk_utils.c gdk_utils.h \
                gdk_atoms.c gdk_atoms.h \
@@ -50,7 +50,7 @@ headers_h = {
                gdk_calc.h \
                gdk_delta.h \
                gdk_posix.h \
-               gdk_search.h \
+               gdk_hash.h \
                gdk_system.h \
                gdk_utils.h
 }
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -872,7 +872,7 @@ typedef struct {
         varsized:1,            /* varsized (1) or fixedsized (0) */
         key:2,                 /* duplicates allowed? */
         dense:1,               /* OID only: only consecutive values */
-        nonil:1,               /* nonil isn't propchecked yet */
+        nonil:1,               /* there are no nils in the column */
         nil:1,                 /* there is a nil in the column */
         sorted:1,              /* column is sorted in ascending order */
         revsorted:1;           /* column is sorted in descending order */
@@ -1265,6 +1265,16 @@ gdk_export gdk_return BATdel(BAT *b, BAT
 gdk_export gdk_return BUNinplace(BAT *b, BUN p, const void *right, bit force);
 gdk_export gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bit force);
 
+/* Functions to perform a binary search on a sorted BAT.
+ * See gdk_search.c for details. */
+gdk_export BUN SORTfnd(BAT *b, const void *v);
+gdk_export BUN SORTfndfirst(BAT *b, const void *v);
+gdk_export BUN SORTfndlast(BAT *b, const void *v);
+
+gdk_export BUN ORDERfnd(BAT *b, const void *v);
+gdk_export BUN ORDERfndfirst(BAT *b, const void *v);
+gdk_export BUN ORDERfndlast(BAT *b, const void *v);
+
 gdk_export BUN BUNfnd(BAT *b, const void *right);
 
 #define BUNfndVOID(b, v)                                               \
@@ -2404,7 +2414,7 @@ gdk_export void GDKsyserror(_In_z_ _Prin
  * @
  */
 #include "gdk_delta.h"
-#include "gdk_search.h"
+#include "gdk_hash.h"
 #include "gdk_atoms.h"
 #include "gdk_bbp.h"
 #include "gdk_utils.h"
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1325,7 +1325,7 @@ BUNfnd(BAT *b, const void *v)
                        return SORTfnd(b, v);
        }
        bi = bat_iterator(b);
-       switch (ATOMstorage(b->ttype)) {
+       switch (ATOMbasetype(b->ttype)) {
        case TYPE_bte:
                HASHfnd_bte(r, bi, v);
                break;
@@ -1333,10 +1333,14 @@ BUNfnd(BAT *b, const void *v)
                HASHfnd_sht(r, bi, v);
                break;
        case TYPE_int:
-       case TYPE_flt:
                HASHfnd_int(r, bi, v);
                break;
+       case TYPE_flt:
+               HASHfnd_flt(r, bi, v);
+               break;
        case TYPE_dbl:
+               HASHfnd_dbl(r, bi, v);
+               break;
        case TYPE_lng:
                HASHfnd_lng(r, bi, v);
                break;
diff --git a/gdk/gdk_delta.c b/gdk/gdk_delta.c
--- a/gdk/gdk_delta.c
+++ b/gdk/gdk_delta.c
@@ -171,9 +171,6 @@ BATundo(BAT *b)
        bunfirst = b->batDeleted;
        bunlast = b->batFirst;
        if (bunlast > b->batDeleted) {
-               BUN i = bunfirst;
-               BAT *bm = BBP_cache(-b->batCacheid);
-
                /* elements are 'inserted' => zap properties */
                b->hsorted = 0;
                b->hrevsorted = 0;
@@ -183,18 +180,7 @@ BATundo(BAT *b)
                        BATkey(b, FALSE);
                if (b->tkey)
                        BATkey(BATmirror(b), FALSE);
-
-               for (p = bunfirst; p < bunlast; p++, i++) {
-                       ptr h = BUNhead(bi, p);
-                       ptr t = BUNtail(bi, p);
-
-                       if (b->H->hash) {
-                               HASHins(bm, i, h);
-                       }
-                       if (b->T->hash) {
-                               HASHins(b, i, t);
-                       }
-               }
+               HASHremove(b);
        }
        b->batFirst = b->batDeleted;
        BATsetcount(b, b->batInserted);
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
new file mode 100644
--- /dev/null
+++ b/gdk/gdk_hash.c
@@ -0,0 +1,639 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V.
+ */
+
+/*
+ * - Hash Table Creation
+ * The hash indexing scheme for BATs reserves a block of memory to
+ * maintain the hash table and a collision list. A one-to-one mapping
+ * exists between the BAT and the collision list using the BUN
+ * index. NOTE: we alloc the link list as a parallel array to the BUN
+ * array; hence the hash link array has the same size as
+ * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
+ * and delete to assume that there is hash space iff there is BUN
+ * space. If there is no BUN space, the BATextend now destroys the
+ * hash table.
+ *
+ * The hash mask size is a power of two, so we can do bitwise AND on
+ * the hash (integer) number to quickly find the head of the bucket
+ * chain.  Clearly, the hash mask size is a crucial parameter. If we
+ * know that the column is unique (hkey), we use direct hashing (mask
+ * size ~= BATcount). Otherwise we dynamically determine the mask size
+ * by starting out with mask size = BATcount/64 (just 1.5% of memory
+ * storage overhead). Then we start building the hash table on the
+ * first 25% of the BAT. As we aim for max-collisions list length of
+ * 4, the list on 25% should not exceed length 1. So, if a small
+ * number of collisssions occurs (mask/2) then we abandon the attempt
+ * and restart with a mask that is 4 times larger. This converges
+ * after three cycles to direct hashing.
+ */
+
+#include "monetdb_config.h"
+#include "gdk.h"
+#include "gdk_private.h"
+
+static int
+HASHwidth(BUN hashsize)
+{
+       if (hashsize <= (BUN) BUN2_NONE)
+               return BUN2;
+#if SIZEOF_BUN <= 4
+       return BUN4;
+#else
+       if (hashsize <= (BUN) BUN4_NONE)
+               return BUN4;
+       return BUN8;
+#endif
+}
+
+BUN
+HASHmask(BUN cnt)
+{
+       BUN m = cnt;
+
+       /* find largest power of 2 smaller than or equal to cnt */
+       m |= m >> 1;
+       m |= m >> 2;
+       m |= m >> 4;
+       m |= m >> 8;
+       m |= m >> 16;
+#if SIZEOF_BUN == 8
+       m |= m >> 32;
+#endif
+       m -= m >> 1;
+
+       /* if cnt is more than 1/3 into the gap between m and 2*m,
+          double m */
+       if (m + m - cnt < 2 * (cnt - m))
+               m += m;
+       if (m < BATTINY)
+               m = BATTINY;
+       return m;
+}
+
+static void
+HASHclear(Hash *h)
+{
+       /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
+        * are all equal to -1 (~0), i.e., have all bits set,
+        * we can use a simple memset() to clear the Hash,
+        * rather than iteratively assigning individual
+        * BUNi_NONE values in a for-loop
+        */
+       memset(h->Hash, 0xFF, (h->mask + 1) * h->width);
+}
+
+#define HASH_VERSION           1
+#define HASH_HEADER_SIZE       5 /* nr of size_t fields in header */
+
+Hash *
+HASHnew(Heap *hp, int tpe, BUN size, BUN mask, BUN count)
+{
+       Hash *h = NULL;
+       int width = HASHwidth(size);
+
+       if (HEAPalloc(hp, mask + size + HASH_HEADER_SIZE * SIZEOF_SIZE_T / 
width, width) != GDK_SUCCEED)
+               return NULL;
+       hp->free = (mask + size) * width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
+       h = GDKmalloc(sizeof(Hash));
+       if (h == NULL)
+               return NULL;
+       h->lim = size;
+       h->mask = mask - 1;
+       h->width = width;
+       switch (width) {
+       case BUN2:
+               h->nil = (BUN) BUN2_NONE;
+               break;
+       case BUN4:
+               h->nil = (BUN) BUN4_NONE;
+               break;
+#ifdef BUN8
+       case BUN8:
+               h->nil = (BUN) BUN8_NONE;
+               break;
+#endif
+       default:
+               assert(0);
+       }
+       h->Link = hp->base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
+       h->Hash = (void *) ((char *) h->Link + h->lim * width);
+       h->type = tpe;
+       h->heap = hp;
+       HASHclear(h);           /* zero the mask */
+       ((size_t *) hp->base)[0] = HASH_VERSION;
+       ((size_t *) hp->base)[1] = size;
+       ((size_t *) hp->base)[2] = mask;
+       ((size_t *) hp->base)[3] = width;
+       ((size_t *) hp->base)[4] = count;
+       ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask 
" BUNFMT ",width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, 
width, h->nil, (size + mask) * width);
+       return h;
+}
+
+#define starthash(TYPE)                                                        
\
+       do {                                                            \
+               TYPE *v = (TYPE *) BUNtloc(bi, 0);                      \
+               for (; r < p; r++) {                                    \
+                       BUN c = (BUN) hash_##TYPE(h, v+r);              \
+                                                                       \
+                       if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \
+                               break; /* mask too full */              \
+                       HASHputlink(h, r, HASHget(h, c));               \
+                       HASHput(h, c, r);                               \
+               }                                                       \
+       } while (0)
+#define finishhash(TYPE)                                       \
+       do {                                                    \
+               TYPE *v = (TYPE *) BUNtloc(bi, 0);              \
+               for (; p < q; p++) {                            \
+                       BUN c = (BUN) hash_##TYPE(h, v + p);    \
+                                                               \
+                       HASHputlink(h, p, HASHget(h, c));       \
+                       HASHput(h, c, p);                       \
+               }                                               \
+       } while (0)
+
+/* collect HASH statistics for analysis */
+static void
+HASHcollisions(BAT *b, Hash *h)
+{
+       lng cnt, entries = 0, max = 0;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to