I wrote: > Jeff Janes <jeff.ja...@gmail.com> writes: >> My biggest gripe with it at the moment is that the signature size should be >> expressed in bits, and then internally rounded up to a multiple of 16, >> rather than having it be expressed in 'uint16'. >> If that were done it would be easier to fix the documentation to be more >> understandable.
> +1 ... that sort of definition seems much more future-proof, too. > IMO it's not too late to change this. (We probably don't want to change > the on-disk representation of the reloptions, but we could convert from > bits to words in bloptions().) There were no objections to this, but also no action. Attached is a draft patch ... any complaints? regards, tom lane
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index c21eebf..a9daf63 100644 *** a/contrib/bloom/bloom.h --- b/contrib/bloom/bloom.h *************** typedef BloomPageOpaqueData *BloomPageOp *** 79,96 **** #define BLOOM_HEAD_BLKNO (1) /* first data page */ /* ! * Maximum of bloom signature length in uint16. Actual value ! * is 512 bytes */ ! #define MAX_BLOOM_LENGTH (256) /* Bloom index options */ typedef struct BloomOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ ! int bloomLength; /* length of signature in uint16 */ ! int bitSize[INDEX_MAX_KEYS]; /* signature bits per index ! * key */ } BloomOptions; /* --- 79,108 ---- #define BLOOM_HEAD_BLKNO (1) /* first data page */ /* ! * We store Bloom signatures as arrays of uint16 words. */ ! typedef uint16 BloomSignatureWord; ! ! #define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord))) ! ! /* ! * Default and maximum Bloom signature length in bits. ! */ ! #define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS) ! #define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS) ! ! /* ! * Default and maximum signature bits generated per index key. ! */ ! #define DEFAULT_BLOOM_BITS 2 ! #define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1) /* Bloom index options */ typedef struct BloomOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ ! int bloomLength; /* length of signature in words (not bits!) */ ! int bitSize[INDEX_MAX_KEYS]; /* signature bits per index key */ } BloomOptions; /* *************** typedef struct BloomState *** 143,154 **** /* * Tuples are very different from all other relations */ - typedef uint16 SignType; - typedef struct BloomTuple { ItemPointerData heapPtr; ! SignType sign[FLEXIBLE_ARRAY_MEMBER]; } BloomTuple; #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign) --- 155,164 ---- /* * Tuples are very different from all other relations */ typedef struct BloomTuple { ItemPointerData heapPtr; ! BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER]; } BloomTuple; #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign) *************** typedef struct BloomTuple *** 156,162 **** /* Opaque data structure for bloom index scan */ typedef struct BloomScanOpaqueData { ! SignType *sign; /* Scan signature */ BloomState state; } BloomScanOpaqueData; --- 166,172 ---- /* Opaque data structure for bloom index scan */ typedef struct BloomScanOpaqueData { ! BloomSignatureWord *sign; /* Scan signature */ BloomState state; } BloomScanOpaqueData; *************** extern void BloomFillMetapage(Relation i *** 170,176 **** extern void BloomInitMetapage(Relation index); extern void BloomInitPage(Page page, uint16 flags); extern Buffer BloomNewBuffer(Relation index); ! extern void signValue(BloomState * state, SignType * sign, Datum value, int attno); extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull); extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple); --- 180,186 ---- extern void BloomInitMetapage(Relation index); extern void BloomInitPage(Page page, uint16 flags); extern Buffer BloomNewBuffer(Relation index); ! extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno); extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull); extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple); diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c index aebf32a..0c954dc 100644 *** a/contrib/bloom/blscan.c --- b/contrib/bloom/blscan.c *************** blgetbitmap(IndexScanDesc scan, TIDBitma *** 93,99 **** /* New search: have to calculate search signature */ ScanKey skey = scan->keyData; ! so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength); for (i = 0; i < scan->numberOfKeys; i++) { --- 93,99 ---- /* New search: have to calculate search signature */ ScanKey skey = scan->keyData; ! so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength); for (i = 0; i < scan->numberOfKeys; i++) { diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 4a5b343..8f13476 100644 *** a/contrib/bloom/blutils.c --- b/contrib/bloom/blutils.c *************** *** 27,49 **** #include "bloom.h" ! /* Signature dealing macros */ ! #define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType)) ! #define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) ) ! #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) ) ! #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) ) ! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 ) PG_FUNCTION_INFO_V1(blhandler); ! /* Kind of relation optioms for bloom index */ static relopt_kind bl_relopt_kind; static int32 myRand(void); static void mySrand(uint32 seed); /* ! * Module initialize function: initilized relation options. */ void _PG_init(void) --- 27,52 ---- #include "bloom.h" ! /* Signature dealing macros - note i is assumed to be of type int */ ! #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) ) ! #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) ) ! #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) ) ! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 ) PG_FUNCTION_INFO_V1(blhandler); ! /* Kind of relation options for bloom index */ static relopt_kind bl_relopt_kind; + /* parse table for fillRelOptions */ + static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1]; static int32 myRand(void); static void mySrand(uint32 seed); /* ! * Module initialize function: initialize info about Bloom relation options. ! * ! * Note: keep this in sync with makeDefaultBloomOptions(). */ void _PG_init(void) *************** _PG_init(void) *** 53,70 **** bl_relopt_kind = add_reloption_kind(); add_int_reloption(bl_relopt_kind, "length", ! "Length of signature in uint16 type", 5, 1, 256); for (i = 0; i < INDEX_MAX_KEYS; i++) { ! snprintf(buf, 16, "col%d", i + 1); add_int_reloption(bl_relopt_kind, buf, ! "Number of bits for corresponding column", 2, 1, 2048); } } /* * Bloom handler function: return IndexAmRoutine with access method parameters * and callbacks. */ --- 56,101 ---- bl_relopt_kind = add_reloption_kind(); + /* Option for length of signature */ add_int_reloption(bl_relopt_kind, "length", ! "Length of signature in bits", ! DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH); ! bl_relopt_tab[0].optname = "length"; ! bl_relopt_tab[0].opttype = RELOPT_TYPE_INT; ! bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength); + /* Number of bits for each possible index column: col1, col2, ... */ for (i = 0; i < INDEX_MAX_KEYS; i++) { ! snprintf(buf, sizeof(buf), "col%d", i + 1); add_int_reloption(bl_relopt_kind, buf, ! "Number of bits for corresponding column", ! DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS); ! bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext, ! buf); ! bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT; ! bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]); } } /* + * Construct a default set of Bloom options. + */ + static BloomOptions * + makeDefaultBloomOptions(void) + { + BloomOptions *opts; + int i; + + opts = (BloomOptions *) palloc0(sizeof(BloomOptions)); + opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS; + for (i = 0; i < INDEX_MAX_KEYS; i++) + opts->bitSize[i] = DEFAULT_BLOOM_BITS; + SET_VARSIZE(opts, sizeof(BloomOptions)); + return opts; + } + + /* * Bloom handler function: return IndexAmRoutine with access method parameters * and callbacks. */ *************** initBloomState(BloomState *state, Relati *** 157,163 **** memcpy(&state->opts, index->rd_amcache, sizeof(state->opts)); state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + ! sizeof(SignType) * state->opts.bloomLength; } /* --- 188,194 ---- memcpy(&state->opts, index->rd_amcache, sizeof(state->opts)); state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + ! sizeof(BloomSignatureWord) * state->opts.bloomLength; } /* *************** mySrand(uint32 seed) *** 208,214 **** * Add bits of given value to the signature. */ void ! signValue(BloomState *state, SignType *sign, Datum value, int attno) { uint32 hashVal; int nBit, --- 239,245 ---- * Add bits of given value to the signature. */ void ! signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) { uint32 hashVal; int nBit, *************** signValue(BloomState *state, SignType *s *** 231,238 **** for (j = 0; j < state->opts.bitSize[attno]; j++) { ! /* prevent mutiple evaluation */ ! nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE); SETBIT(sign, nBit); } } --- 262,269 ---- for (j = 0; j < state->opts.bitSize[attno]; j++) { ! /* prevent multiple evaluation in SETBIT macro */ ! nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS); SETBIT(sign, nBit); } } *************** BloomInitPage(Page page, uint16 flags) *** 362,400 **** } /* - * Adjust options of bloom index. - * - * This must produce default options when *opts is initially all-zero. - */ - static void - adjustBloomOptions(BloomOptions *opts) - { - int i; - - /* Default length of bloom filter is 5 of 16-bit integers */ - if (opts->bloomLength <= 0) - opts->bloomLength = 5; - else if (opts->bloomLength > MAX_BLOOM_LENGTH) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("length of bloom signature (%d) is greater than maximum %d", - opts->bloomLength, MAX_BLOOM_LENGTH))); - - /* Check signature length */ - for (i = 0; i < INDEX_MAX_KEYS; i++) - { - /* - * Zero and negative number of bits is meaningless. Also setting - * more bits than signature have seems useless. Replace both cases - * with 2 bits default. - */ - if (opts->bitSize[i] <= 0 - || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE) - opts->bitSize[i] = 2; - } - } - - /* * Fill in metapage for bloom index. */ void --- 393,398 ---- *************** BloomFillMetapage(Relation index, Page m *** 405,418 **** /* * Choose the index's options. If reloptions have been assigned, use ! * those, otherwise create default options by applying adjustBloomOptions ! * to a zeroed chunk of memory. We apply adjustBloomOptions to existing ! * reloptions too, just out of paranoia; they should be valid already. */ opts = (BloomOptions *) index->rd_options; if (!opts) ! opts = (BloomOptions *) palloc0(sizeof(BloomOptions)); ! adjustBloomOptions(opts); /* * Initialize contents of meta page, including a copy of the options, --- 403,413 ---- /* * Choose the index's options. If reloptions have been assigned, use ! * those, otherwise create default options. */ opts = (BloomOptions *) index->rd_options; if (!opts) ! opts = makeDefaultBloomOptions(); /* * Initialize contents of meta page, including a copy of the options, *************** bloptions(Datum reloptions, bool validat *** 462,491 **** relopt_value *options; int numoptions; BloomOptions *rdopts; - relopt_parse_elt tab[INDEX_MAX_KEYS + 1]; - int i; - char buf[16]; - - /* Option for length of signature */ - tab[0].optname = "length"; - tab[0].opttype = RELOPT_TYPE_INT; - tab[0].offset = offsetof(BloomOptions, bloomLength); - - /* Number of bits for each of possible columns: col1, col2, ... */ - for (i = 0; i < INDEX_MAX_KEYS; i++) - { - snprintf(buf, sizeof(buf), "col%d", i + 1); - tab[i + 1].optname = pstrdup(buf); - tab[i + 1].opttype = RELOPT_TYPE_INT; - tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]); - } options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions); rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions); fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions, ! validate, tab, INDEX_MAX_KEYS + 1); ! adjustBloomOptions(rdopts); return (bytea *) rdopts; } --- 457,471 ---- relopt_value *options; int numoptions; BloomOptions *rdopts; + /* Parse the user-given reloptions */ options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions); rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions); fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions, ! validate, bl_relopt_tab, lengthof(bl_relopt_tab)); ! /* Convert signature length to words, rounding up */ ! rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS; return (bytea *) rdopts; } diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml index 49cb066..abf30e7 100644 *** a/doc/src/sgml/bloom.sgml --- b/doc/src/sgml/bloom.sgml *************** *** 8,15 **** </indexterm> <para> ! <literal>bloom</> is a module which implements an index access method. It comes ! as an example of custom access methods and generic WAL records usage. But it is also useful in itself. </para> --- 8,15 ---- </indexterm> <para> ! <literal>bloom</> is a module that implements an index access method. It comes ! as an example of custom access methods and generic WAL record usage. But it is also useful in itself. </para> *************** *** 22,29 **** allows fast exclusion of non-candidate tuples via signatures. Since a signature is a lossy representation of all indexed attributes, search results must be rechecked using heap information. ! The user can specify signature length (in uint16, default is 5) and the ! number of bits, which can be set per attribute (1 < colN < 2048). </para> <para> --- 22,30 ---- allows fast exclusion of non-candidate tuples via signatures. Since a signature is a lossy representation of all indexed attributes, search results must be rechecked using heap information. ! The user can specify signature length in bits (default 80, maximum 4096) ! and the number of bits generated for each index column (default 2, ! maximum 4095). </para> <para> *************** *** 51,64 **** <term><literal>length</></term> <listitem> <para> ! Length of signature in uint16 type values </para> </listitem> </varlistentry> </variablelist> <variablelist> <varlistentry> ! <term><literal>col1 — col16</></term> <listitem> <para> Number of bits for corresponding column --- 52,65 ---- <term><literal>length</></term> <listitem> <para> ! Length of signature in bits </para> </listitem> </varlistentry> </variablelist> <variablelist> <varlistentry> ! <term><literal>col1 — col32</></term> <listitem> <para> Number of bits for corresponding column *************** *** 77,88 **** <programlisting> CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3) ! WITH (length=5, col1=2, col2=2, col3=4); </programlisting> <para> Here, we created a bloom index with a signature length of 80 bits, ! and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits. </para> <para> --- 78,89 ---- <programlisting> CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3) ! WITH (length=80, col1=2, col2=2, col3=4); </programlisting> <para> Here, we created a bloom index with a signature length of 80 bits, ! and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits. </para> <para>
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers