Changeset: 56cb5c4722cf for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=56cb5c4722cf Removed Files: monetdb5/modules/mal/mkey.h monetdb5/modules/mal/projectionpath.h monetdb5/modules/mal/sample.h Modified Files: clients/Tests/exports.stable.out monetdb5/modules/mal/CMakeLists.txt monetdb5/modules/mal/mkey.c monetdb5/modules/mal/projectionpath.c monetdb5/modules/mal/sample.c sql/backends/monet5/sql.c sql/backends/monet5/sql.h Branch: Oct2020 Log Message:
Export less diffs (truncated from 432 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -687,7 +687,6 @@ int mo_system_config(opt **Set, int setl const char *wsaerror(int); # monetdb5 -str ALGprojectionpath(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str AUTHaddRemoteTableCredentials(const char *local_table, const char *localuser, const char *uri, const char *remoteuser, const char *pass, bool pw_encrypted); str AUTHaddUser(oid *ret, Client c, const char *user, const char *pass); str AUTHchangePassword(Client c, const char *oldpass, const char *passwd); @@ -921,13 +920,6 @@ int MCpushClientInput(Client c, bstream void MCstopClients(Client c); str MCsuspendClient(int id); int MCvalid(Client c); -str MKEYbathash(bat *res, const bat *bid); -str MKEYbulk_rotate_xor_hash(bat *ret, const bat *hid, const int *nbits, const bat *bid); -str MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); -str MKEYconstbulk_rotate_xor_hash(bat *ret, const lng *h, const int *nbits, const bat *bid); -str MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); -str MKEYrotate(lng *ret, const lng *v, const int *nbits); -str MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); void MPresetProfiler(stream *fdout); char *MSP_locate_sqlscript(const char *mod_name, bit recurse); str MSinitClientPrg(Client cntxt, str mod, str nme); @@ -1020,7 +1012,6 @@ str RMTput(Client cntxt, MalBlkPtr mb, M str RMTregister(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str RMTregisterSupervisor(int *ret, str *sup_uuid, str *query_uuid); str RMTresolve(bat *ret, str *pat); -str SAMPLEuniform(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str SERVERbindBAT(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str SERVERclient(void *res, const Stream *In, const Stream *Out); str SERVERconnect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pc); diff --git a/monetdb5/modules/mal/CMakeLists.txt b/monetdb5/modules/mal/CMakeLists.txt --- a/monetdb5/modules/mal/CMakeLists.txt +++ b/monetdb5/modules/mal/CMakeLists.txt @@ -18,7 +18,7 @@ target_sources(malmodules inspect.c inspect.h manual.c manual.h mal_io.c mal_io.h - mkey.c mkey.h + mkey.c manifold.c manifold.h iterator.c iterator.h clients.c clients.h @@ -34,12 +34,12 @@ target_sources(malmodules remote.c remote.h txtsim.c txtsim.h tokenizer.c tokenizer.h - sample.c sample.h + sample.c json_util.c querylog.c querylog.h sysmon.c sysmon.h tracer.c tracer.h - projectionpath.c projectionpath.h + projectionpath.c tablet.c tablet.h batcalc.c calc.c PUBLIC diff --git a/monetdb5/modules/mal/mkey.c b/monetdb5/modules/mal/mkey.c --- a/monetdb5/modules/mal/mkey.c +++ b/monetdb5/modules/mal/mkey.c @@ -15,7 +15,9 @@ * values together. We create a hash and rotate command to do this. */ #include "monetdb_config.h" -#include "mkey.h" +#include "mal.h" +#include "mal_interpreter.h" +#include "mal_exception.h" #define MKEYHASH_bte(valp) ((ulng) (lng) *(const bte*)(valp)) #define MKEYHASH_sht(valp) ((ulng) (lng) *(const sht*)(valp)) @@ -33,14 +35,14 @@ GDK_ROTATE(ulng x, int y, int z) } /* TODO: nil handling. however; we do not want to lose time in bulk_rotate_xor_hash with that */ -str +static str MKEYrotate(lng *res, const lng *val, const int *n) { *res = (lng) GDK_ROTATE((ulng) *val, *n, (sizeof(lng)*8) - *n); return MAL_SUCCEED; } -str +static str MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) { lng *res; @@ -87,7 +89,7 @@ MKEYhash(Client cntxt, MalBlkPtr mb, Mal return MAL_SUCCEED; } -str +static str MKEYbathash(bat *res, const bat *bid) { BAT *b, *dst; @@ -184,7 +186,7 @@ MKEYbathash(bat *res, const bat *bid) return MAL_SUCCEED; } -str +static str MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) { lng *dst = getArgReference_lng(stk, p, 0); @@ -227,7 +229,7 @@ MKEYrotate_xor_hash(Client cntxt, MalBlk return MAL_SUCCEED; } -str +static str MKEYbulk_rotate_xor_hash(bat *res, const bat *hid, const int *nbits, const bat *bid) { BAT *hb, *b, *bn; @@ -340,7 +342,7 @@ MKEYbulk_rotate_xor_hash(bat *res, const return MAL_SUCCEED; } -str +static str MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) { bat *res = getArgReference_bat(stk, p, 0); @@ -420,7 +422,7 @@ MKEYbulkconst_rotate_xor_hash(Client cnt return MAL_SUCCEED; } -str +static str MKEYconstbulk_rotate_xor_hash(bat *res, const lng *h, const int *nbits, const bat *bid) { BAT *b, *bn; @@ -529,8 +531,30 @@ mel_func mkey_init_funcs[] = { pattern("mkey", "bulk_rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))), command("mkey", "bulk_rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",1))), command("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",1))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",bte))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",bte))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",sht))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",sht))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",int))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",int))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",lng))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",lng))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",oid))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",oid))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",lng))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",lng))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",flt))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",flt))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",dbl))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",dbl))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),argany("v",0))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batargany("b",1))), + pattern("calc", "rotate_xor_hash", MKEYrotate_xor_hash, false, "", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",1))), + command("batcalc", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "", args(1,4, batarg("",int),batarg("h",lng),arg("nbits",int),batargany("b",1))), #ifdef HAVE_HGE pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",hge))), + pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",hge))), + command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",hge))), #endif { .imp=NULL } }; diff --git a/monetdb5/modules/mal/mkey.h b/monetdb5/modules/mal/mkey.h deleted file mode 100644 --- a/monetdb5/modules/mal/mkey.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Copyright 1997 - July 2008 CWI, August 2008 - 2020 MonetDB B.V. - */ - -/* - * @- The Problem - * When creating a join, we want to make a unique key of the attributes on both - * sides and then join these keys. Consider the following BATs. - * - * @verbatim - * orders customer link - * ==================== ===================== =========== - * zipcode h_nr zipcode hnr oid cid - * o1 13 9 c1 11 10 o1 c5 - * o2 11 10 c2 11 11 o2 c1 - * o3 11 11 c3 12 2 o3 c2 - * o4 12 5 c4 12 1 o4 nil - * o5 11 10 c5 13 9 o5 c1 - * o6 12 2 c6 14 7 o6 c3 - * o7 13 9 o7 c5 - * o8 12 1 o8 c4 - * o9 13 9 o9 c5 - * @end verbatim - * - * The current approach is designed to take minimal memory, as our previous - * solutions to the problem did not scale well. In case of singular keys, - * the link is executed by a simple join. Before going into the join, we - * make sure the end result size is not too large, which is done by looking - * at relation sizes (if the other key is unique) or, if that is not possible, - * by computing the exact join size. - * - * The join algorithm was also improved to do dynamic sampling to determine - * with high accuracy the join size, so that we can alloc in one go a memory - * region of sufficient size. This also reduces the ds\_link memory requirements. - * - * For compound keys, those that consist of multiple attributes, we now compute - * a derived column that contains an integer hash value derived from all - * key columns. - * This is done by computing a hash value for each individual key column - * and combining those by bitwise XOR and left-rotation. That is, for each - * column,we rotate the working hash value by N bits and XOR the hash value - * of the column over it. The working hash value is initialized with zero, - * and after all columns are processed, this working value is used as output. - * Computing the hash value for all columns in the key for one table is done - * by the command hash(). Hence, we do hash on both sides, and join - * that together with a simple join: - * - * @code{join(hash(keys), hash(keys.reverse);} - * - * One complication of this procedure are nil values: - * @table - * @itemize - * @item - * it may happen that the final hash-value (an int formed by a - * random bit pattern) accidentally has the value of int(nil). - * Notice that join never matches nil values. - * Hence these accidental nils must be replaced by a begin value (currently: 0). - * @item - * in case any of the compound key values is nil, our nil semantics - * require us that those tuples may never match on a join. Consequently, - * during the hash() processing of all compound key columns for computing - * the hash value, we also maintain a bit-bat that records which tuples had - * a nil value. The bit-bat is initialized to false, and the results of the - * nil-check on each column is OR-ed to it. - * Afterwards, the hash-value of all tuples that have this nil-bit set to - * TRUE are forced to int(nil), which will exclude them from matching. - * @end itemize - * - * Joining on hash values produces a @emph{superset} of the join result: - * it may happen that two different key combinations hash on the same value, - * which will make them match on the join (false hits). The final part - * of the ds\_link therefore consists of filtering out the false hits. - * This is done incrementally by joining back the join result to the original - * columns, incrementally one by one for each pair of corresponding - * columns. These values are compared with each other and we AND the - * result of this comparison together for each pair of columns. - * The bat containing these bits is initialized to all TRUE and serves as - * final result after all column pairs have been compared. - * The initial join result is finally filtered with this bit-bat. - * - * Joining back from the initial join-result to the original columns on - * both sides takes quite a lot of memory. For this reason, the false - * hit-filtering is done in slices (not all tuples at one time). - * In this way the memory requirements of this phase are kept low. - * In fact, the most memory demanding part of the join is the int-join - * on hash number, which takes N*24 bytes (where N= |L| = |R|). - * In comparison, the previous CTmultigroup/CTmultiderive approach - * took N*48 bytes. Additionally, by making it possible to use merge-sort, - * it avoids severe performance degradation (memory thrashing) as produced - * by the old ds\_link when the inner join relation would be larger than memory. - * - * If ds\_link performance is still an issue, the sort-merge join used here - * could be replaced by partitioned hash-join with radix-cluster/decluster. - * - * @+ Implementation - */ -#ifndef _MKEY_H -#define _MKEY_H - -/*#define _DEBUG_MKEY_ */ - -#include "mal.h" -#include "mal_interpreter.h" -#include "mal_exception.h" - -mal_export str MKEYrotate(lng *ret, const lng *v, const int *nbits); -mal_export str MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); -mal_export str MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); -mal_export str MKEYbulk_rotate_xor_hash(bat *ret, const bat *hid, const int *nbits, const bat *bid); -mal_export str MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); -mal_export str MKEYconstbulk_rotate_xor_hash(bat *ret, const lng *h, const int *nbits, const bat *bid); -mal_export str MKEYbathash(bat *res, const bat *bid); - -#endif /* _MKEY_H */ diff --git a/monetdb5/modules/mal/projectionpath.c b/monetdb5/modules/mal/projectionpath.c --- a/monetdb5/modules/mal/projectionpath.c +++ b/monetdb5/modules/mal/projectionpath.c @@ -7,9 +7,10 @@ */ #include "monetdb_config.h" -#include "projectionpath.h" +#include "mal_client.h" +#include "mal_interpreter.h" _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list