Changeset: 4bfb9ec728be for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4bfb9ec728be Added Files: gdk/gdk_bitvector.c gdk/gdk_bitvector.h Modified Files: clients/Tests/exports.stable.out gdk/Makefile.ag Branch: bitvector Log Message:
Create bitvector branch. diffs (286 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -434,6 +434,7 @@ void canditer_setidx(struct canditer *ci BAT *canditer_slice(struct canditer *ci, BUN lo, BUN hi); BAT *canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2); int closedir(DIR *dir); +void clrBitVector(BitVector vector, BUN i, const bte bits); char *ctime_r(const time_t *restrict, char *restrict); date date_add_day(date dt, int days) __attribute__((__const__)); date date_add_month(date dt, int months) __attribute__((__const__)); @@ -482,12 +483,15 @@ void geomsqlfix_set(geomsqlfix_fptr); bool geomversion_get(void); void geomversion_set(void); bat getBBPsize(void); +int getBitVector(BitVector vector, BUN i, const bte bits); +size_t getBitVectorSize(const BUN cnt, const int width); char *get_bin_path(void); int gettimeofday(struct timeval *tv, int *ignore_zone); struct tm *gmtime_r(const time_t *restrict, struct tm *restrict); ssize_t hgeFromStr(const char *src, size_t *len, hge **dst, bool external); ssize_t hgeToStr(str *dst, size_t *len, const hge *src, bool external); const hge hge_nil; +void initBitMasks(void); ssize_t intFromStr(const char *src, size_t *len, int **dst, bool external); ssize_t intToStr(str *dst, size_t *len, const int *src, bool external); const int int_nil; @@ -525,6 +529,7 @@ char *mo_find_option(opt *set, int setle void mo_free_options(opt *set, int setlen); void mo_print_options(opt *set, int setlen); int mo_system_config(opt **Set, int setlen); +BitVector newBitVector(BUN cnt, int width); const oid oid_nil; DIR *opendir(const char *dirname); void print_trace(void); @@ -533,6 +538,7 @@ ssize_t ptrToStr(str *dst, size_t *len, const ptr ptr_nil; struct dirent *readdir(DIR *dir); void rewinddir(DIR *dir); +void setBitVector(BitVector vector, const BUN i, const bte bits, const BitVectorChunk value); ssize_t shtFromStr(const char *src, size_t *len, sht **dst, bool external); ssize_t shtToStr(str *dst, size_t *len, const sht *src, bool external); const sht sht_nil; @@ -553,6 +559,7 @@ timestamp timestamp_fromusec(lng usec) _ ssize_t timestamp_precision_tostr(str *buf, size_t *len, timestamp val, int precision, bool external); ssize_t timestamp_tostr(str *buf, size_t *len, const timestamp *val, bool external); ssize_t timestamp_tz_fromstr(const char *buf, size_t *len, timestamp **ret, bool external); +int tstBitVector(BitVector vector, BUN i, const bte bits); const timestamp unixepoch; gdk_return void_inplace(BAT *b, oid id, const void *val, bool force) __attribute__((__warn_unused_result__)); int win_mkdir(const char *, const int mode); diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag --- a/gdk/Makefile.ag +++ b/gdk/Makefile.ag @@ -12,6 +12,7 @@ lib_gdk = { VERSION = $(GDK_VERSION) NAME = bat SOURCES = \ + gdk_bitvector.c gdk_bitvector.h \ gdk_calc.c gdk_calc.h gdk_calc_compare.h gdk_calc_private.h \ gdk_select.c \ gdk_analytic_bounds.c gdk_analytic.h \ diff --git a/gdk/gdk_bitvector.c b/gdk/gdk_bitvector.c new file mode 100644 --- /dev/null +++ b/gdk/gdk_bitvector.c @@ -0,0 +1,180 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 2008-2015 MonetDB B.V. + */ + +/* author: M Kersten + * This collection of routines manages put/get of bit patterns into a vector. + * The width of the individual elements is limited to sizeof(int) + * The meta-information is not stored within the vector. + */ +/* Unit testing +#include "stdio.h" +#include "stdlib.h" +#include "ctype.h" +#include "math.h" +#include "malloc.h" + +typedef unsigned int *BitVector; +typedef unsigned long BUN; +#define BUNFMT "%u" + */ + +#include "monetdb_config.h" +#include "gdk.h" +#include "gdk_bitvector.h" + +//#define _DEBUG_BITVECTOR_ + +#define BITS (sizeof( unsigned int) * 8) +static unsigned int masks[BITS+1]; + +void initBitMasks(void) +{ + unsigned int i,v=1; + for( i=0; i<BITS; i++){ + masks[i+1] = v; + v = (v << 1) | 1; + } +} + +size_t +getBitVectorSize(const BUN cnt, const int width) +{ + size_t size; + size = ((cnt * width) / BITS + (((cnt * width) % BITS ) > 0) ) * sizeof(unsigned int); + return size; +} + +BitVector +newBitVector(BUN cnt, int width) +{ + if( (unsigned) width > BITS) + return 0; + + initBitMasks(); + return (BitVector) GDKzalloc( getBitVectorSize(cnt,width)); +} + +// get the bits of cell i +int +getBitVector(BitVector vector, BUN i, const bte bits) +{ + BUN cid; + unsigned int value = 0, shift, m1; + + cid = (i * bits) / BITS; + shift = ( i * bits) % BITS; + + if( bits == 1){ + value = (vector[cid] & (1 << shift)) >>shift; + return value; + } + + if ( (shift + bits) <= BITS){ + // fits in a single cell + value = (vector[cid] >> shift) & masks[bits]; +#ifdef _DEBUG_BITVECTOR_ + printf("#getBitVector %ld i "BUNFMT" bits %d value %3d cell %10d cid "BUNFMT" shift %d\n",(long)vector,i,bits, value, vector[cid],cid,shift); +#endif + }else{ + // spread over two cells + m1 = BITS - shift; + value = ((vector[cid] & (masks[m1]<<shift)) >> shift) | ((vector[cid+1] & masks[bits - m1]) << m1); +#ifdef _DEBUG_BITVECTOR_ + printf("#getBitVector %ld i "BUNFMT" bits %d value %3d cell %10d %10d cid "BUNFMT" shift %d m1 %d\n",(long)vector,i,bits, value, vector[cid], vector[cid+1],cid,shift,m1); +#endif + } + return value; +} + +// set the bits of cell idx to the lower number of bits of the value +void +setBitVector(BitVector vector, const BUN i, const bte bits, const BitVectorChunk value) +{ + BUN cid; + unsigned int m1, shift; + + cid = (i * bits) / BITS; + shift = ( i * bits) % BITS; + + if( bits == 1){ + vector[cid] = (vector[cid] & ~(1 << shift)) | ((value > 0) <<shift); + return; + } + + if ( (shift + bits) <= BITS){ + // fits in a single cell + vector[cid]= (vector[cid] & ~( masks[bits] << shift)) | ((value & masks[bits]) << shift); +#ifdef _DEBUG_BITVECTOR_ + printf("#setBitVector %ld i "BUNFMT" bits %d value %3d cell %10d cid "BUNFMT" shift %d\n",(long)vector,i,bits, value, vector[cid],cid,shift); +#endif + } else{ + // spread over two cells + m1 = BITS - shift; + vector[cid]= (vector[cid] & ~( masks[m1] << shift)) | ( (value & masks[m1]) << shift); + vector[cid+1]= 0 | ( ((value>>m1) & masks[bits-m1])); +#ifdef _DEBUG_BITVECTOR_ + printf("#setBitVector %ld i "BUNFMT" bits %d value %3d cell %10d %10d cid "BUNFMT" shift %d m1 %d\n",(long)vector,i,bits, value, vector[cid], vector[cid+1],cid,shift,m1); +#endif + } +#ifdef _DEBUG_BITVECTOR_ + m1 = getBitVector(vector,i,bits); + printf("#get it back %s %d %d\n", (value == m1? "":"MISMATCH"),value,m1); +#endif +} + +// clear a cell +void +clrBitVector(BitVector vector, BUN i, const bte bits) +{ + setBitVector(vector,i,bits, 0); +} + + +int +tstBitVector(BitVector m, BUN idx, const bte width) +{ + return getBitVector(m,idx,width) > 0; +} + + +/* Unit testing +static void +printVector(BitVector v, BUN cnt, int width) +{ + int i; + for ( i = 0; i< cnt; i++) + printf("[%d] %d\n",i, getBitVector(v,i,width)); +} + +int main(int argc, char **argv) +{ + int cnt, width,i,j,k; + BitVector vector; + + if( argc != 3){ + printf("use:%s <cnt> <width>\n",argv[0]); + exit(-1); + } + cnt = atoi(argv[1]); + width= atoi(argv[2]); + printf("testing bitvectors %d %d %d\n",cnt,width, BITS); + initBitMasks(); + vector = newBitVector(cnt,width); + + printVector(vector,cnt,width); + for(i = 0; i< cnt; i++) + setBitVector(vector,i,width, i); + printVector(vector,cnt,width); + for(i = 0; i < cnt; i++){ + j = rand() % width; + setBitVector(vector,i,width, j ); + if( j != (k = getBitVector(vector,i,width)) ) + printf("mismatch[%d] %d %d\n",i,j,k); + } +} +*/ diff --git a/gdk/gdk_bitvector.h b/gdk/gdk_bitvector.h new file mode 100644 --- /dev/null +++ b/gdk/gdk_bitvector.h @@ -0,0 +1,34 @@ + +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 2008-2015 MonetDB B.V. + */ + +/* + * The primitives to create, maintain, and use a bit mask. + * Each bitvector is represented by Vector,Cnt,Width + * where Vector is a bit sequence with CNT number of chunks of size Width + * The Vector is aligned at sizeof(lng) = 64 bits + * Cnt is a BUN, values are represented as lng + */ +#ifndef _GDK_MASK_H_ +#define _GDK_MASK_H_ + +typedef unsigned int BitVectorChunk; + +typedef BitVectorChunk *BitVector; + +gdk_export void initBitMasks(void); +gdk_export size_t getBitVectorSize(const BUN cnt, const int width); +gdk_export BitVector newBitVector(BUN cnt, int width); +gdk_export void setBitVector(BitVector vector, const BUN i, const bte bits, const BitVectorChunk value); +gdk_export void clrBitVector(BitVector vector, BUN i, const bte bits); +gdk_export int tstBitVector(BitVector vector, BUN i, const bte bits); +gdk_export int getBitVector(BitVector vector, BUN i, const bte bits); + +#define BitVectorSize(CNT, BITS) wordaligned(((CNT) * (BITS) / CHAR_BIT) + ( ((CNT) * (BITS)) % CHAR_BIT != 0 ), BitVectorChunk) + +#endif /* _GDK_MASK_H_ */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list