Changeset: 4cd6061fb4c2 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4cd6061fb4c2 Added Files: gdk/gdk_hash.c gdk/gdk_hash.h sql/jdbc/tests/Tests/Test_Dobjects.SQL.bat Removed Files: gdk/gdk_search.h sql/test/Users/Tests/util.py Modified Files: configure.ag gdk/Makefile.ag gdk/gdk.h gdk/gdk_bat.c gdk/gdk_delta.c gdk/gdk_search.c java/ChangeLog java/src/main/java/nl/cwi/monetdb/jdbc/MonetDatabaseMetaData.java java/tests/Test_Dobjects.java monetdb5/extras/mal_optimizer_template/Tests/opt_sql_append.stable.err monetdb5/extras/mal_optimizer_template/Tests/opt_sql_append.stable.out monetdb5/extras/mal_optimizer_template/Tests/opt_sql_append.stable.out.single monetdb5/mal/Tests/dynamicload.malC monetdb5/mal/Tests/dynamicload.stable.err monetdb5/mal/mal_listing.c monetdb5/optimizer/Tests/JPexample.malC monetdb5/optimizer/Tests/JPexample.stable.out monetdb5/optimizer/Tests/projectionchain.malC monetdb5/optimizer/Tests/projectionchain.stable.out monetdb5/optimizer/opt_pipes.c monetdb5/optimizer/opt_projectionpath.c monetdb5/optimizer/opt_volcano.c sql/backends/monet5/sql_optimizer.c sql/backends/monet5/sql_user.c sql/benchmarks/ssbm/Tests/04-explain.stable.out sql/benchmarks/ssbm/Tests/04-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/05-explain.stable.out sql/benchmarks/ssbm/Tests/05-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/06-explain.stable.out sql/benchmarks/ssbm/Tests/06-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/07-explain.stable.out sql/benchmarks/ssbm/Tests/07-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/08-explain.stable.out sql/benchmarks/ssbm/Tests/08-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/09-explain.stable.out sql/benchmarks/ssbm/Tests/09-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/11-explain.stable.out sql/benchmarks/ssbm/Tests/11-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/12-explain.stable.out sql/benchmarks/ssbm/Tests/12-explain.stable.out.int128 sql/benchmarks/ssbm/Tests/13-explain.stable.out sql/benchmarks/ssbm/Tests/13-explain.stable.out.int128 sql/benchmarks/tpch/Tests/02-explain.stable.out sql/benchmarks/tpch/Tests/03-explain.stable.out sql/benchmarks/tpch/Tests/03-explain.stable.out.int128 sql/benchmarks/tpch/Tests/05-explain.stable.out sql/benchmarks/tpch/Tests/05-explain.stable.out.32bit sql/benchmarks/tpch/Tests/05-explain.stable.out.int128 sql/benchmarks/tpch/Tests/07-explain.stable.out sql/benchmarks/tpch/Tests/07-explain.stable.out.int128 sql/benchmarks/tpch/Tests/08-explain.stable.out sql/benchmarks/tpch/Tests/08-explain.stable.out.int128 sql/benchmarks/tpch/Tests/09-explain.stable.out sql/benchmarks/tpch/Tests/09-explain.stable.out.int128 sql/benchmarks/tpch/Tests/10-explain.stable.out sql/benchmarks/tpch/Tests/10-explain.stable.out.int128 sql/benchmarks/tpch/Tests/15-explain.stable.out sql/benchmarks/tpch/Tests/15-explain.stable.out.int128 sql/benchmarks/tpch/Tests/16-explain.stable.out sql/benchmarks/tpch/Tests/16-explain.stable.out.32bit sql/benchmarks/tpch/Tests/17-explain.stable.out sql/benchmarks/tpch/Tests/17-explain.stable.out.int128 sql/benchmarks/tpch/Tests/18-explain.stable.out sql/benchmarks/tpch/Tests/18-explain.stable.out.int128 sql/benchmarks/tpch/Tests/19-explain.stable.out sql/benchmarks/tpch/Tests/19-explain.stable.out.int128 sql/benchmarks/tpch/Tests/20-explain.stable.out sql/benchmarks/tpch/Tests/20-explain.stable.out.32bit sql/benchmarks/tpch/Tests/20-explain.stable.out.int128 sql/benchmarks/tpch/Tests/21-explain.stable.out sql/benchmarks/tpch/Tests/21-explain.stable.out.32bit sql/benchmarks/tpch/Tests/22-explain.stable.out sql/benchmarks/tpch/Tests/22-explain.stable.out.32bit sql/benchmarks/tpch/Tests/22-explain.stable.out.int128 sql/jdbc/tests/Tests/All sql/jdbc/tests/Tests/Test_Dobjects.stable.out sql/jdbc/tests/Tests/Test_PSmetadata.stable.out sql/jdbc/tests/Tests/Test_Rmetadata.stable.out sql/jdbc/tests/Tests/Test_Rsqldata.stable.out sql/server/rel_optimizer.c sql/server/rel_optimizer.h sql/server/rel_rel.c sql/server/rel_rel.h sql/test/Tests/setoptimizer.stable.err sql/test/Tests/setoptimizer.stable.out sql/test/Tests/setoptimizer.stable.out.Windows sql/test/Users/Tests/changePasswordUser.SQL.py sql/test/Users/Tests/changeSchemaUser.SQL.py sql/test/Users/Tests/columnRights.SQL.py sql/test/Users/Tests/dropManyUsers.Bug-3764.SQL.py sql/test/Users/Tests/grantMonetdb.SQL.py sql/test/Users/Tests/grantMonetdbToRegularUser.Bug-3771.SQL.py sql/test/Users/Tests/grantMonetdbToSchemaOwner.Bug-3771.SQL.py sql/test/Users/Tests/grantPrivilegesNonDefaultRole.Bug-3365.SQL.py sql/test/Users/Tests/grantRevokeAndGrantAgain.Bug-3765.SQL.py sql/test/Users/Tests/grantRole.Bug-3772.SQL.py sql/test/Users/Tests/renameUser.SQL.py sql/test/Users/Tests/role.SQL.py sql/test/Users/Tests/schemaRights.SQL.py sql/test/Users/Tests/withGrantOption.SQL.py sql/test/pg_regress/Tests/int8.sql sql/test/pg_regress/Tests/int8.stable.err sql/test/pg_regress/Tests/int8.stable.out sql/test/pg_regress/Tests/int8.stable.out.int128 testing/Mtest.py.in Branch: leftmart Log Message:
Merge with default branch. diffs (truncated from 4961 to 300 lines): diff --git a/configure.ag b/configure.ag --- a/configure.ag +++ b/configure.ag @@ -3456,6 +3456,7 @@ for comp in \ 'java_control ' \ 'java_jdbc ' \ 'liblas ' \ + 'liblzma ' \ 'libxml2 ' \ 'lidar ' \ 'netcdf ' \ diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag --- a/gdk/Makefile.ag +++ b/gdk/Makefile.ag @@ -14,7 +14,7 @@ lib_gdk = { SOURCES = \ gdk.h gdk_cand.h gdk_atomic.h gdk_batop.c \ gdk_select.c \ - gdk_search.c gdk_search.h gdk_tm.c \ + gdk_search.c gdk_hash.c gdk_hash.h gdk_tm.c \ gdk_align.c gdk_bbp.c gdk_bbp.h \ gdk_heap.c gdk_utils.c gdk_utils.h \ gdk_atoms.c gdk_atoms.h \ @@ -50,7 +50,7 @@ headers_h = { gdk_calc.h \ gdk_delta.h \ gdk_posix.h \ - gdk_search.h \ + gdk_hash.h \ gdk_system.h \ gdk_utils.h } diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -872,7 +872,7 @@ typedef struct { varsized:1, /* varsized (1) or fixedsized (0) */ key:2, /* duplicates allowed? */ dense:1, /* OID only: only consecutive values */ - nonil:1, /* nonil isn't propchecked yet */ + nonil:1, /* there are no nils in the column */ nil:1, /* there is a nil in the column */ sorted:1, /* column is sorted in ascending order */ revsorted:1; /* column is sorted in descending order */ @@ -1265,6 +1265,16 @@ gdk_export gdk_return BATdel(BAT *b, BAT gdk_export gdk_return BUNinplace(BAT *b, BUN p, const void *right, bit force); gdk_export gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bit force); +/* Functions to perform a binary search on a sorted BAT. + * See gdk_search.c for details. */ +gdk_export BUN SORTfnd(BAT *b, const void *v); +gdk_export BUN SORTfndfirst(BAT *b, const void *v); +gdk_export BUN SORTfndlast(BAT *b, const void *v); + +gdk_export BUN ORDERfnd(BAT *b, const void *v); +gdk_export BUN ORDERfndfirst(BAT *b, const void *v); +gdk_export BUN ORDERfndlast(BAT *b, const void *v); + gdk_export BUN BUNfnd(BAT *b, const void *right); #define BUNfndVOID(b, v) \ @@ -2404,7 +2414,7 @@ gdk_export void GDKsyserror(_In_z_ _Prin * @ */ #include "gdk_delta.h" -#include "gdk_search.h" +#include "gdk_hash.h" #include "gdk_atoms.h" #include "gdk_bbp.h" #include "gdk_utils.h" diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -1325,7 +1325,7 @@ BUNfnd(BAT *b, const void *v) return SORTfnd(b, v); } bi = bat_iterator(b); - switch (ATOMstorage(b->ttype)) { + switch (ATOMbasetype(b->ttype)) { case TYPE_bte: HASHfnd_bte(r, bi, v); break; @@ -1333,10 +1333,14 @@ BUNfnd(BAT *b, const void *v) HASHfnd_sht(r, bi, v); break; case TYPE_int: - case TYPE_flt: HASHfnd_int(r, bi, v); break; + case TYPE_flt: + HASHfnd_flt(r, bi, v); + break; case TYPE_dbl: + HASHfnd_dbl(r, bi, v); + break; case TYPE_lng: HASHfnd_lng(r, bi, v); break; diff --git a/gdk/gdk_delta.c b/gdk/gdk_delta.c --- a/gdk/gdk_delta.c +++ b/gdk/gdk_delta.c @@ -171,9 +171,6 @@ BATundo(BAT *b) bunfirst = b->batDeleted; bunlast = b->batFirst; if (bunlast > b->batDeleted) { - BUN i = bunfirst; - BAT *bm = BBP_cache(-b->batCacheid); - /* elements are 'inserted' => zap properties */ b->hsorted = 0; b->hrevsorted = 0; @@ -183,18 +180,7 @@ BATundo(BAT *b) BATkey(b, FALSE); if (b->tkey) BATkey(BATmirror(b), FALSE); - - for (p = bunfirst; p < bunlast; p++, i++) { - ptr h = BUNhead(bi, p); - ptr t = BUNtail(bi, p); - - if (b->H->hash) { - HASHins(bm, i, h); - } - if (b->T->hash) { - HASHins(b, i, t); - } - } + HASHremove(b); } b->batFirst = b->batDeleted; BATsetcount(b, b->batInserted); diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c new file mode 100644 --- /dev/null +++ b/gdk/gdk_hash.c @@ -0,0 +1,639 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V. + */ + +/* + * - Hash Table Creation + * The hash indexing scheme for BATs reserves a block of memory to + * maintain the hash table and a collision list. A one-to-one mapping + * exists between the BAT and the collision list using the BUN + * index. NOTE: we alloc the link list as a parallel array to the BUN + * array; hence the hash link array has the same size as + * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert + * and delete to assume that there is hash space iff there is BUN + * space. If there is no BUN space, the BATextend now destroys the + * hash table. + * + * The hash mask size is a power of two, so we can do bitwise AND on + * the hash (integer) number to quickly find the head of the bucket + * chain. Clearly, the hash mask size is a crucial parameter. If we + * know that the column is unique (hkey), we use direct hashing (mask + * size ~= BATcount). Otherwise we dynamically determine the mask size + * by starting out with mask size = BATcount/64 (just 1.5% of memory + * storage overhead). Then we start building the hash table on the + * first 25% of the BAT. As we aim for max-collisions list length of + * 4, the list on 25% should not exceed length 1. So, if a small + * number of collisssions occurs (mask/2) then we abandon the attempt + * and restart with a mask that is 4 times larger. This converges + * after three cycles to direct hashing. + */ + +#include "monetdb_config.h" +#include "gdk.h" +#include "gdk_private.h" + +static int +HASHwidth(BUN hashsize) +{ + if (hashsize <= (BUN) BUN2_NONE) + return BUN2; +#if SIZEOF_BUN <= 4 + return BUN4; +#else + if (hashsize <= (BUN) BUN4_NONE) + return BUN4; + return BUN8; +#endif +} + +BUN +HASHmask(BUN cnt) +{ + BUN m = cnt; + + /* find largest power of 2 smaller than or equal to cnt */ + m |= m >> 1; + m |= m >> 2; + m |= m >> 4; + m |= m >> 8; + m |= m >> 16; +#if SIZEOF_BUN == 8 + m |= m >> 32; +#endif + m -= m >> 1; + + /* if cnt is more than 1/3 into the gap between m and 2*m, + double m */ + if (m + m - cnt < 2 * (cnt - m)) + m += m; + if (m < BATTINY) + m = BATTINY; + return m; +} + +static void +HASHclear(Hash *h) +{ + /* since BUN2_NONE, BUN4_NONE, BUN8_NONE + * are all equal to -1 (~0), i.e., have all bits set, + * we can use a simple memset() to clear the Hash, + * rather than iteratively assigning individual + * BUNi_NONE values in a for-loop + */ + memset(h->Hash, 0xFF, (h->mask + 1) * h->width); +} + +#define HASH_VERSION 1 +#define HASH_HEADER_SIZE 5 /* nr of size_t fields in header */ + +Hash * +HASHnew(Heap *hp, int tpe, BUN size, BUN mask, BUN count) +{ + Hash *h = NULL; + int width = HASHwidth(size); + + if (HEAPalloc(hp, mask + size + HASH_HEADER_SIZE * SIZEOF_SIZE_T / width, width) != GDK_SUCCEED) + return NULL; + hp->free = (mask + size) * width + HASH_HEADER_SIZE * SIZEOF_SIZE_T; + h = GDKmalloc(sizeof(Hash)); + if (h == NULL) + return NULL; + h->lim = size; + h->mask = mask - 1; + h->width = width; + switch (width) { + case BUN2: + h->nil = (BUN) BUN2_NONE; + break; + case BUN4: + h->nil = (BUN) BUN4_NONE; + break; +#ifdef BUN8 + case BUN8: + h->nil = (BUN) BUN8_NONE; + break; +#endif + default: + assert(0); + } + h->Link = hp->base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; + h->Hash = (void *) ((char *) h->Link + h->lim * width); + h->type = tpe; + h->heap = hp; + HASHclear(h); /* zero the mask */ + ((size_t *) hp->base)[0] = HASH_VERSION; + ((size_t *) hp->base)[1] = size; + ((size_t *) hp->base)[2] = mask; + ((size_t *) hp->base)[3] = width; + ((size_t *) hp->base)[4] = count; + ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask " BUNFMT ",width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, width, h->nil, (size + mask) * width); + return h; +} + +#define starthash(TYPE) \ + do { \ + TYPE *v = (TYPE *) BUNtloc(bi, 0); \ + for (; r < p; r++) { \ + BUN c = (BUN) hash_##TYPE(h, v+r); \ + \ + if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \ + break; /* mask too full */ \ + HASHputlink(h, r, HASHget(h, c)); \ + HASHput(h, c, r); \ + } \ + } while (0) +#define finishhash(TYPE) \ + do { \ + TYPE *v = (TYPE *) BUNtloc(bi, 0); \ + for (; p < q; p++) { \ + BUN c = (BUN) hash_##TYPE(h, v + p); \ + \ + HASHputlink(h, p, HASHget(h, c)); \ + HASHput(h, c, p); \ + } \ + } while (0) + +/* collect HASH statistics for analysis */ +static void +HASHcollisions(BAT *b, Hash *h) +{ + lng cnt, entries = 0, max = 0; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list