Changeset: ed25fe6fc948 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ed25fe6fc948 Added Files: monetdb5/modules/mal/cluster.c monetdb5/modules/mal/cluster.h monetdb5/modules/mal/cluster.mal Removed Files: monetdb5/modules/mal/cluster.mx Modified Files: monetdb5/modules/mal/Makefile.ag Branch: default Log Message:
De-Mx of cluster module Straightforward code expansion. diffs (truncated from 1994 to 300 lines): diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag --- a/monetdb5/modules/mal/Makefile.ag +++ b/monetdb5/modules/mal/Makefile.ag @@ -34,7 +34,7 @@ lib_mal = { bbp.c bbp.h \ box.c box.h \ clients.c clients.h \ - cluster.mx \ + cluster.c cluster.h \ const.c const.h \ constraints.c constraints.h \ factories.c factories.h \ @@ -77,7 +77,7 @@ headers_mal = { urlbox.mal transaction.mal \ mal_mapi.mal sabaoth.mal remote.mal \ txtsim.mal recycle.mal \ - cluster.mx trader.mal \ + cluster.mal trader.mal \ tokenizer.mal zorder.mal sample.mal } diff --git a/monetdb5/modules/mal/cluster.mx b/monetdb5/modules/mal/cluster.c rename from monetdb5/modules/mal/cluster.mx rename to monetdb5/modules/mal/cluster.c --- a/monetdb5/modules/mal/cluster.mx +++ b/monetdb5/modules/mal/cluster.c @@ -1,25 +1,22 @@ -@/ -The contents of this file are subject to the MonetDB Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.monetdb.org/Legal/MonetDBLicense +/* + * The contents of this file are subject to the MonetDB Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.monetdb.org/Legal/MonetDBLicense + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is the MonetDB Database System. + * + * The Initial Developer of the Original Code is CWI. + * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. + * Copyright August 2008-2012 MonetDB B.V. + * All Rights Reserved. +*/ -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is the MonetDB Database System. - -The Initial Developer of the Original Code is CWI. -Portions created by CWI are Copyright (C) 1997-July 2008 CWI. -Copyright August 2008-2012 MonetDB B.V. -All Rights Reserved. -@ - -@f cluster - -@c /* * @a Martin Kersten, Niels Nes * @v 1.0 @@ -62,172 +59,147 @@ All Rights Reserved. * The remaining step is to perform this in parallel * for all BATs comprising a relational table. */ -@mal -module cluster; - -command key(b:bat[:oid,:any_1]) :bat[:oid,:oid] -address CLUSTER_key -comment "Create the hash key list"; - -command map(b:bat[:oid,:oid]) :bat[:oid,:oid] -address CLUSTER_map -comment "Recluster a hash key table and produce a re-allocation map"; - -pattern column(m:bat[:oid,:oid], b:bat[:oid,:any_1]):bat[:oid,:any_1] -address CLUSTER_column -comment "Reorder tail of the BAT using the cluster map"; - -pattern table(b:bat[:oid,:any]...):bat[:oid,:oid] -address CLUSTER_table -comment "Cluster the BATs using the first one as reference. -Return the oid map used"; - -@= mal_cls_new -command new(b:bat[:oid,:@1], bits:int, offset:int) - (psum:bat[:oid,:wrd], map:bat[:oid,:wrd]) -address CLS_create_@1 -comment "Compute the cluster map for bat b of hash key values. A cluster map is a list of unique (new) BUN positions. The p(refix) sum is a by product which returns the prefix sum of the per masked key frequency."; - -command new(b:bat[:oid,:@1], bits:int, offset:int, order:bit) - (psum:bat[:oid,:wrd], map:bat[:oid,:bte]) -address CLS_create2_@1 -comment "Compute the cluster map for bat b. A cluster map is a list of partition ids. The p(refix) sum is a by product which returns the prefix sum of the per partition frequency. Prefix sum and map can be use to 'cluster' related columns based on b. Incase the offset is non negative it is used to shift the key values. Offset together with the mask bits should make sure that the result of the partitioning can be used in a order by sequence. If this holds and the input is sorted we return a 'compressed' representation of the map, ie [:oid,:bte], the repeated values are not inserted. The order flag indicates that the clustering is used for ordering, ie partition keys aren't fixed so we can internaly optimize partition sizes."; -@ -@mal - -@:mal_cls_new(bte)@ -@:mal_cls_new(sht)@ -@:mal_cls_new(int)@ -@:mal_cls_new(wrd)@ -@:mal_cls_new(lng)@ -@:mal_cls_new(flt)@ -@:mal_cls_new(dbl)@ - -command map(cluster:bat[:oid,:wrd], b:bat[:oid,:any_1]) :bat[:oid,:any_1] -address CLS_map -comment "Reorder tail of bat b, using a cluster map"; - -command map(psum:bat[:oid,:wrd], cluster:bat[:oid,:bte], b:bat[:oid,:any_1]) :bat[:oid,:any_1] -address CLS_map2 -comment "Reorder tail of bat b, using a cluster prefix sum and map. In case the -map is sorted with a non dense head we assume no remapping is needed"; - -pattern split(clustered:bat[:oid,:any_1], psum:bat[:oid,:wrd]) :bat[:oid,:any_1]... -address CLS_split -comment "split the clustered bat into parts"; - -@h -#ifndef _CLUSTER_H -#define _CLUSTER_H - -#include <mal.h> -#include "mal_interpreter.h" -#include "mal_client.h" - -/*#define _CLUSTER_DEBUG for local debugging */ - -#ifdef WIN32 -#if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) && !defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) && !defined(LIBMONETDB5) -#define cluster_export extern __declspec(dllimport) -#else -#define cluster_export extern __declspec(dllexport) -#endif -#else -#define cluster_export extern -#endif - -cluster_export str CLUSTER_key( bat *M, bat *B); -cluster_export str CLUSTER_map(bat *RB, bat *B); -cluster_export str CLUSTER_apply(bat *bid, BAT *nb, BAT *cmap); -cluster_export str CLUSTER_column( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); -cluster_export str CLUSTER_table( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); - -@= hdr_cls_new -cluster_export str CLS_create_@1(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, unsigned int *offset); -cluster_export str CLS_create2_@1(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, unsigned int *offset, bit *order); -@ -@h -@:hdr_cls_new(bte)@ -@:hdr_cls_new(sht)@ -@:hdr_cls_new(int)@ -@:hdr_cls_new(wrd)@ -@:hdr_cls_new(lng)@ -@:hdr_cls_new(flt)@ -@:hdr_cls_new(dbl)@ - -cluster_export str CLS_map(bat *rb, bat *cmap, bat *b); -cluster_export str CLS_map2(bat *rb, bat *psum, bat *cmap, bat *b); -cluster_export str CLS_split( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); - -#endif /* _CLUSTER_H */ -@c - #include "monetdb_config.h" #include "cluster.h" #include <mal_exception.h> #include "algebra.h" -@= map_fixed static void -CLUSTER_key_@1( BAT *map, BAT *b) +CLUSTER_key_bte( BAT *map, BAT *b) { - @1 *bt, *be; + bte *bt, *be; oid *o; assert(BUNfirst(map) == 0); assert(BUNfirst(b) == 0); o = (oid*)Tloc(map, 0); - bt = (@1*)Tloc(b, 0); + bt = (bte*)Tloc(b, 0); be = bt + BATcount(b); for ( ; bt < be; bt++){ - BUN h = hash_@1(b->T->hash,bt); + BUN h = hash_bte(b->T->hash,bt); *o++= h; } } -#if 0 -static str -CLUSTER_column_@1(BAT *nb, BAT *b, BAT *cmap) +static void +CLUSTER_key_sht( BAT *map, BAT *b) { - @1 *r,*qb; - oid *ct, *ce; - int cnt=0; - - assert(BUNfirst(nb) == 0); + sht *bt, *be; + oid *o; + + assert(BUNfirst(map) == 0); assert(BUNfirst(b) == 0); - assert(BUNfirst(cmap) == 0); - assert(cmap->ttype == TYPE_oid); - r = (@1*)Tloc(nb, 0); - qb = (@1*)Tloc(b, 0); - ct = (oid *)Tloc(cmap, 0); - ce = ct + BATcount(cmap); - nb->H->heap.dirty = nb->T->heap.dirty= TRUE; - for ( ; ct < ce; ct++){ - *r++ = qb[*ct]; - if ( ((++cnt) % 1000000) == 0){ - BATsave(nb); - nb->H->heap.dirty = nb->T->heap.dirty= TRUE; - } + o = (oid*)Tloc(map, 0); + bt = (sht*)Tloc(b, 0); + be = bt + BATcount(b); + for ( ; bt < be; bt++){ + BUN h = hash_sht(b->T->hash,bt); + *o++= h; } - BATsetcount(nb, BATcount(b)); - nb->tsorted= FALSE; - nb->trevsorted= FALSE; - nb->tdense= FALSE; - BATderiveProps(nb, 0); - return MAL_SUCCEED; } -#endif -@ -@c -@:map_fixed(bte)@ -@:map_fixed(sht)@ -@:map_fixed(oid)@ -@:map_fixed(wrd)@ -@:map_fixed(int)@ -@:map_fixed(lng)@ -@:map_fixed(flt)@ -@:map_fixed(dbl)@ +static void +CLUSTER_key_oid( BAT *map, BAT *b) +{ + oid *bt, *be; + oid *o; + + assert(BUNfirst(map) == 0); + assert(BUNfirst(b) == 0); + o = (oid*)Tloc(map, 0); + bt = (oid*)Tloc(b, 0); + be = bt + BATcount(b); + for ( ; bt < be; bt++){ + BUN h = hash_oid(b->T->hash,bt); + *o++= h; + } +} + +static void +CLUSTER_key_wrd( BAT *map, BAT *b) +{ + wrd *bt, *be; + oid *o; + + assert(BUNfirst(map) == 0); + assert(BUNfirst(b) == 0); + o = (oid*)Tloc(map, 0); + bt = (wrd*)Tloc(b, 0); + be = bt + BATcount(b); + for ( ; bt < be; bt++){ + BUN h = hash_wrd(b->T->hash,bt); + *o++= h; + } +} + +static void +CLUSTER_key_int( BAT *map, BAT *b) +{ + int *bt, *be; + oid *o; + + assert(BUNfirst(map) == 0); + assert(BUNfirst(b) == 0); + o = (oid*)Tloc(map, 0); + bt = (int*)Tloc(b, 0); _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list