On 13 November 2017 at 22:46, Amit Langote <langote_amit...@lab.ntt.co.jp> wrote: > On 2017/11/10 12:30, Kyotaro HORIGUCHI wrote: >> In 0002, bms_add_range has a bit naive-looking loop >> >> + while (wordnum <= uwordnum) >> + { >> + bitmapword mask = (bitmapword) ~0; >> + >> + /* If working on the lower word, zero out bits below 'lower'. >> */ >> + if (wordnum == lwordnum) >> + { >> + int lbitnum = BITNUM(lower); >> + mask >>= lbitnum; >> + mask <<= lbitnum; >> + } >> + >> + /* Likewise, if working on the upper word, zero bits above >> 'upper' */ >> + if (wordnum == uwordnum) >> + { >> + int ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) >> + 1); >> + mask <<= ushiftbits; >> + mask >>= ushiftbits; >> + } >> + >> + a->words[wordnum++] |= mask; >> + } >> >> Without some aggressive optimization, the loop takes most of the >> time to check-and-jump for nothing especially with many >> partitions and somewhat unintuitive. >> >> The following uses a bit tricky bitmap operation but >> is straightforward as a whole. >> >> ===== >> /* fill the bits upper from BITNUM(lower) (0-based) of the first word */ >> a->workds[wordnum++] += ~(bitmapword)((1 << BITNUM(lower)) - 1); >> >> /* fill up intermediate words */ >> while (wordnum < uwordnum) >> a->words[wordnum++] = ~(bitmapword) 0; >> >> /* fill up to BITNUM(upper) bit (0-based) of the last word */ >> a->workds[wordnum++] |= >> (~(bitmapword) 0) >> (BITS_PER_BITMAPWORD - (BITNUM(upper) - 1)); >> ===== > > Considering also the David's comment downthread, I will try to incorporate > this into bms_add_range().
I've attached an implementation of the patch using this method. I've also attached bitset.c which runs each through their paces. I'd have expected Kyotaro's method to be faster, but gcc 7.2 with -O2 generates very slightly slower code. I didn't really check why. clang seems to do a better job with it. $ gcc -O2 bitset.c -o bitset && ./bitset bms_add_range in 0.694254 (6.94254 ns per loop) bms_add_range2 in 0.726643 (7.26643 ns per loop) 11111111111111111111111111111110 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 ------------- 11111111111111111111111111111110 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 $ gcc --version gcc (Ubuntu 7.2.0-8ubuntu3) 7.2.0 Copyright (C) 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ clang -O2 bitset.c -o bitset && ./bitset bms_add_range in 0.866554 (8.66554 ns per loop) bms_add_range2 in 0.467138 (4.67138 ns per loop) 11111111111111111111111111111110 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 ------------- 11111111111111111111111111111110 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 11111111111111111111111111111111 $ clang --version clang version 4.0.1-6 (tags/RELEASE_401/final) Target: x86_64-pc-linux-gnu Thread model: posix InstalledDir: /usr/bin Probably just go with Kyotaro's idea (v2). I don't think this is worth debating, I just wanted to show it's not that clear-cut. -- David Rowley http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
bms_add_range_v2.patch
Description: Binary data
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <time.h> #define MAX_WORDS 10 #define likely(x) __builtin_expect((x),1) #define unlikely(x) __builtin_expect((x),0) typedef unsigned int bitmapword; /* must be an unsigned type */ typedef struct Bitmapset { int nwords; /* number of words in array */ bitmapword words[MAX_WORDS]; /* really [nwords] */ } Bitmapset; #define BITS_PER_BITMAPWORD 32 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) void printbits(bitmapword b) { int x = BITS_PER_BITMAPWORD - 1; while (x >= 0) { putchar((b >> x) & 1 ? '1' : '0'); x--; } putchar('\n'); } void printbitmapset(Bitmapset *a) { int x; for (x = 0; x < MAX_WORDS; x++) { printbits(a->words[x]); } } Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper) { int lwordnum, lbitnum, uwordnum, ubitnum, wordnum; uwordnum = WORDNUM(upper); wordnum = lwordnum = WORDNUM(lower); /* * Starting at lower's wordnum, loop over each word up to upper's wordnum. * Along the way set all bits inclusively between lower and upper to 1. We * only need to handle the lwordnum and uwordnum specially so we don't set * any bits outside of the range. */ while (wordnum <= uwordnum) { bitmapword mask = ~(bitmapword) 0; /* If working on the lower word, zero out bits below 'lower'. */ if (wordnum == lwordnum) { //mask &= ~(bitmapword)((1 << BITNUM(lower)) - 1); int lbitnum = BITNUM(lower); mask >>= lbitnum; mask <<= lbitnum; } /* Likewise, if working on the upper word, zero bits above 'upper' */ if (wordnum == uwordnum) { int ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1); mask <<= ushiftbits; mask >>= ushiftbits; //mask &= (~(bitmapword) 0) >> (BITS_PER_BITMAPWORD - (BITNUM(upper) + 1)); } a->words[wordnum++] |= mask; } return a; } Bitmapset * bms_add_range2(Bitmapset *a, int lower, int upper) { int lwordnum, lbitnum, uwordnum, ushiftbits, wordnum; uwordnum = WORDNUM(upper); wordnum = lwordnum = WORDNUM(lower); lbitnum = BITNUM(lower); ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1); /* * Special case when lwordnum is the same as uwordnum we must perform the * upper and lower masking on the word. */ if (lwordnum == uwordnum) { a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1) & (~(bitmapword) 0) >> ushiftbits; } else { /* turn on lbitnum and all bits left of it */ a->words[wordnum++] |= ~(bitmapword)(((bitmapword) 1 << lbitnum) - 1); /* turn on all bits for any intermediate words */ while (wordnum < uwordnum) a->words[wordnum++] = ~(bitmapword) 0; /* turn on upper's bit and all bits right of it. */ a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits; } return a; } #define LOOPS 100000000 int main(void) { Bitmapset a, b; clock_t start, end; int loop; int lower = 1; //int upper = 65; #define upper loop % (MAX_WORDS * BITS_PER_BITMAPWORD) memset(&a, 0, sizeof(a)); memset(&b, 0, sizeof(b)); start = clock(); for (loop = LOOPS; loop >= 0; loop--) bms_add_range(&a,lower, upper); end = clock(); printf("bms_add_range in %g (%g ns per loop)\n", (double) (end - start) / CLOCKS_PER_SEC, (double) (end - start) / CLOCKS_PER_SEC / LOOPS * 1000000000); start = clock(); for (loop = LOOPS; loop >= 0; loop--) bms_add_range2(&b,lower, upper); end = clock(); printf("bms_add_range2 in %g (%g ns per loop)\n", (double) (end - start) / CLOCKS_PER_SEC, (double) (end - start) / CLOCKS_PER_SEC / LOOPS * 1000000000); printbitmapset(&a); printf("-------------\n"); printbitmapset(&b); return 0; }