On 02.02.2015 13:43, Rasmus Villemoes wrote: > On Sat, Jan 31 2015, yury.no...@gmail.com wrote: > >> From: Yury Norov <y.no...@samsung.com> >> >> New implementations takes less space in source file (see diffstat) >> and in object. For me it's 710 vs 453 bytes of text. >> > New version generally looks good. Please include a summary of the > changes between the versions either below the --- line or in a 0/n cover > letter, especially since you've now expanded the scope of the series. > > Comments below. > >> Patch was boot-tested on x86_64 and MIPS (big-endian) machines. >> Performance tests were ran on userspace with code like this: >> >> /* addr[] is filled from /dev/urandom */ >> start = clock(); >> while (ret < nbits) >> ret = find_next_bit(addr, nbits, ret + 1); >> >> end = clock(); >> printf("%ld\t", (unsigned long) end - start); >> >> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next: >> (for find_next_bit, nbits is 8M, for find_first_bit - 80K) >> >> find_next_bit: find_first_bit: >> new current new current >> 26932 43151 14777 14925 >> 26947 43182 14521 15423 >> 26507 43824 15053 14705 >> 27329 43759 14473 14777 >> 26895 43367 14847 15023 >> 26990 43693 15103 15163 >> 26775 43299 15067 15232 >> 27282 42752 14544 15121 >> 27504 43088 14644 14858 >> 26761 43856 14699 15193 >> 26692 43075 14781 14681 >> 27137 42969 14451 15061 >> ... ... >> >> find_next_bit performance gain is 35-40%; >> find_first_bit - no measurable difference. >> >> Signed-off-by: Yury Norov <yury.no...@gmail.com> >> --- >> lib/find_last_bit.c | 31 ++----- >> lib/find_next_bit.c | 254 >> +++++++++++++++------------------------------------- >> 2 files changed, 79 insertions(+), 206 deletions(-) >> >> diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c >> index 91ca09f..e67e970 100644 >> --- a/lib/find_last_bit.c >> +++ b/lib/find_last_bit.c >> @@ -4,44 +4,29 @@ >> * Written by Rusty Russell <ru...@rustcorp.com.au> >> * (Inspired by David Howell's find_next_bit implementation) >> * >> + * Rewritten by Yury Norov <yury.no...@gmail.com> to decrease >> + * size and improve performance, 2015. >> + * >> * This program is free software; you can redistribute it and/or >> * modify it under the terms of the GNU General Public License >> * as published by the Free Software Foundation; either version >> * 2 of the License, or (at your option) any later version. >> */ >> >> -#include <linux/bitops.h> > Why do you remove that #include? It is rather important that the header > and implementation don't get out of sync. I know that kernel.h includes > bitops.h, but please don't rely on such things. Quoting SubmitChecklist: > > 1: If you use a facility then #include the file that defines/declares > that facility. Don't depend on other header files pulling in ones > that you use. > > >> #include <linux/export.h> >> -#include <asm/types.h> >> -#include <asm/byteorder.h> > However, getting rid of includes that are no longer needed is certainly > a good thing. Yes, linux/bitops.h are to get back. >> +#include <linux/kernel.h> >> >> #ifndef find_last_bit >> >> unsigned long find_last_bit(const unsigned long *addr, unsigned long size) >> { >> - unsigned long words; >> - unsigned long tmp; >> - >> - /* Start at final word. */ >> - words = size / BITS_PER_LONG; >> - >> - /* Partial final word? */ >> - if (size & (BITS_PER_LONG-1)) { >> - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG >> - - (size & (BITS_PER_LONG-1))))); >> - if (tmp) >> - goto found; >> - } >> + unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG); >> >> - while (words) { >> - tmp = addr[--words]; >> - if (tmp) { >> -found: >> - return words * BITS_PER_LONG + __fls(tmp); >> - } >> + while (idx--) { >> + if (addr[idx]) >> + return min(idx * BITS_PER_LONG + __fls(addr[idx]), >> size); >> } >> >> - /* Not found */ >> return size; >> } >> EXPORT_SYMBOL(find_last_bit); >> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c >> index 0cbfc0b..ebfb3dc 100644 >> --- a/lib/find_next_bit.c >> +++ b/lib/find_next_bit.c >> @@ -3,18 +3,45 @@ >> * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. >> * Written by David Howells (dhowe...@redhat.com) >> * >> + * Rewritten by Yury Norov <yury.no...@gmail.com> to decrease >> + * size and improve performance, 2015. >> + * >> * This program is free software; you can redistribute it and/or >> * modify it under the terms of the GNU General Public License >> * as published by the Free Software Foundation; either version >> * 2 of the License, or (at your option) any later version. >> */ >> >> -#include <linux/bitops.h> >> #include <linux/export.h> >> -#include <asm/types.h> >> -#include <asm/byteorder.h> >> +#include <linux/kernel.h> > Same as above. > >> +#define HIGH_BITS_MASK(nr) (ULONG_MAX << (nr)) >> + >> +#if !defined(find_next_bit) || !defined(find_next_zero_bit) >> +static unsigned long _find_next_bit(const unsigned long *addr, >> + unsigned long nbits, unsigned long start, bool set) >> +{ >> + unsigned long tmp = set ? addr[start / BITS_PER_LONG] >> + : ~addr[start / BITS_PER_LONG]; >> + >> + /* Handle 1st word. */ >> + if (!IS_ALIGNED(start, BITS_PER_LONG)) { >> + tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG); >> + start = round_down(start, BITS_PER_LONG); >> + } >> >> -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) >> + while (!tmp) { >> + start += BITS_PER_LONG; >> + if (start >= nbits) >> + return nbits; >> + >> + tmp = set ? addr[start / BITS_PER_LONG] >> + : ~addr[start / BITS_PER_LONG]; >> + } >> + >> + return start + __ffs(tmp); >> +} >> +#endif >> >> #ifndef find_next_bit >> /* >> @@ -23,86 +50,22 @@ >> unsigned long find_next_bit(const unsigned long *addr, unsigned long size, >> unsigned long offset) >> { >> - const unsigned long *p = addr + BITOP_WORD(offset); >> - unsigned long result = offset & ~(BITS_PER_LONG-1); >> - unsigned long tmp; >> - >> if (offset >= size) >> return size; > Why can't this ... > > >> - size -= result; >> - offset %= BITS_PER_LONG; >> - if (offset) { >> - tmp = *(p++); >> - tmp &= (~0UL << offset); >> - if (size < BITS_PER_LONG) >> - goto found_first; >> - if (tmp) >> - goto found_middle; >> - size -= BITS_PER_LONG; >> - result += BITS_PER_LONG; >> - } >> - while (size & ~(BITS_PER_LONG-1)) { >> - if ((tmp = *(p++))) >> - goto found_middle; >> - result += BITS_PER_LONG; >> - size -= BITS_PER_LONG; >> - } >> - if (!size) >> - return result; >> - tmp = *p; >> >> -found_first: >> - tmp &= (~0UL >> (BITS_PER_LONG - size)); >> - if (tmp == 0UL) /* Are any bits set? */ >> - return result + size; /* Nope. */ >> -found_middle: >> - return result + __ffs(tmp); >> + return min(_find_next_bit(addr, size, offset, 1), size); > ... and this be part of _find_next_bit? Can find_next_bit not be simply > 'return _find_next_bit(addr, size, offset, 1);', and similarly for > find_next_zero_bit? Btw., passing true and false for the boolean > parameter may be a little clearer. I moved size checkers out of '_find_next_bit' to let user call it from his code if he knows for sure that size/offset pair is valid. This may help save a couple of clocks. I think, I'll walk over the code to find how many such places we have. If not too much / not in critical paths, checks may be moved into the function.
Or maybe leave as is and place some comment for future?... > >> } >> EXPORT_SYMBOL(find_next_bit); >> #endif >> >> #ifndef find_next_zero_bit >> -/* >> - * This implementation of find_{first,next}_zero_bit was stolen from >> - * Linus' asm-alpha/bitops.h. >> - */ >> unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long >> size, >> unsigned long offset) >> { >> - const unsigned long *p = addr + BITOP_WORD(offset); >> - unsigned long result = offset & ~(BITS_PER_LONG-1); >> - unsigned long tmp; >> - >> if (offset >= size) >> return size; > See above. > >> - size -= result; >> - offset %= BITS_PER_LONG; >> - if (offset) { >> - tmp = *(p++); >> - tmp |= ~0UL >> (BITS_PER_LONG - offset); >> - if (size < BITS_PER_LONG) >> - goto found_first; >> - if (~tmp) >> - goto found_middle; >> - size -= BITS_PER_LONG; >> - result += BITS_PER_LONG; >> - } >> - while (size & ~(BITS_PER_LONG-1)) { >> - if (~(tmp = *(p++))) >> - goto found_middle; >> - result += BITS_PER_LONG; >> - size -= BITS_PER_LONG; >> - } >> - if (!size) >> - return result; >> - tmp = *p; >> >> -found_first: >> - tmp |= ~0UL << size; >> - if (tmp == ~0UL) /* Are any bits zero? */ >> - return result + size; /* Nope. */ >> -found_middle: >> - return result + ffz(tmp); >> + return min(_find_next_bit(addr, size, offset, 0), size); > See above. > >> } >> EXPORT_SYMBOL(find_next_zero_bit); >> #endif >> @@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit); >> */ >> unsigned long find_first_bit(const unsigned long *addr, unsigned long size) >> { >> - const unsigned long *p = addr; >> - unsigned long result = 0; >> - unsigned long tmp; >> + unsigned long idx; >> >> - while (size & ~(BITS_PER_LONG-1)) { >> - if ((tmp = *(p++))) >> - goto found; >> - result += BITS_PER_LONG; >> - size -= BITS_PER_LONG; >> + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { >> + if (addr[idx]) >> + return min(idx * BITS_PER_LONG + __ffs(addr[idx]), >> size); >> } >> - if (!size) >> - return result; >> >> - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); >> - if (tmp == 0UL) /* Are any bits set? */ >> - return result + size; /* Nope. */ >> -found: >> - return result + __ffs(tmp); >> + return size; >> } >> EXPORT_SYMBOL(find_first_bit); >> #endif >> @@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit); >> */ >> unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long >> size) >> { >> - const unsigned long *p = addr; >> - unsigned long result = 0; >> - unsigned long tmp; >> + unsigned long idx; >> >> - while (size & ~(BITS_PER_LONG-1)) { >> - if (~(tmp = *(p++))) >> - goto found; >> - result += BITS_PER_LONG; >> - size -= BITS_PER_LONG; >> + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { >> + if (addr[idx] != ULONG_MAX) >> + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); >> } >> - if (!size) >> - return result; >> >> - tmp = (*p) | (~0UL << size); >> - if (tmp == ~0UL) /* Are any bits zero? */ >> - return result + size; /* Nope. */ >> -found: >> - return result + ffz(tmp); >> + return size; >> } >> EXPORT_SYMBOL(find_first_zero_bit); >> #endif >> @@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit); >> #ifdef __BIG_ENDIAN >> >> /* include/linux/byteorder does not support "unsigned long" type */ >> -static inline unsigned long ext2_swabp(const unsigned long * x) >> -{ >> -#if BITS_PER_LONG == 64 >> - return (unsigned long) __swab64p((u64 *) x); >> -#elif BITS_PER_LONG == 32 >> - return (unsigned long) __swab32p((u32 *) x); >> -#else >> -#error BITS_PER_LONG not defined >> -#endif >> -} >> - >> -/* include/linux/byteorder doesn't support "unsigned long" type */ >> static inline unsigned long ext2_swab(const unsigned long y) >> { >> #if BITS_PER_LONG == 64 >> @@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned >> long y) >> #endif >> } >> >> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) >> +static unsigned long _find_next_bit_le(const unsigned long *addr, >> + unsigned long nbits, unsigned long start, bool set) >> +{ >> + unsigned long tmp = set ? addr[start / BITS_PER_LONG] >> + : ~addr[start / BITS_PER_LONG]; >> + >> + /* Handle 1st word. */ >> + if (!IS_ALIGNED(start, BITS_PER_LONG)) { >> + tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG)); >> + start = round_down(start, BITS_PER_LONG); >> + } >> + >> + while (!tmp) { >> + start += BITS_PER_LONG; >> + if (start >= nbits) >> + return nbits; >> + >> + tmp = set ? addr[start / BITS_PER_LONG] >> + : ~addr[start / BITS_PER_LONG]; >> + } >> + >> + return start + __ffs(ext2_swab(tmp)); >> +} >> +#endif >> + >> #ifndef find_next_zero_bit_le >> unsigned long find_next_zero_bit_le(const void *addr, unsigned >> long size, unsigned long offset) >> { >> - const unsigned long *p = addr; >> - unsigned long result = offset & ~(BITS_PER_LONG - 1); >> - unsigned long tmp; >> - >> if (offset >= size) >> return size; > Again, I think this should be moved to the common implementation in > _find_next_bit_le, and similarly for find_next_bit_le below. > >> - p += BITOP_WORD(offset); >> - size -= result; >> - offset &= (BITS_PER_LONG - 1UL); >> - if (offset) { >> - tmp = ext2_swabp(p++); >> - tmp |= (~0UL >> (BITS_PER_LONG - offset)); >> - if (size < BITS_PER_LONG) >> - goto found_first; >> - if (~tmp) >> - goto found_middle; >> - size -= BITS_PER_LONG; >> - result += BITS_PER_LONG; >> - } >> >> - while (size & ~(BITS_PER_LONG - 1)) { >> - if (~(tmp = *(p++))) >> - goto found_middle_swap; >> - result += BITS_PER_LONG; >> - size -= BITS_PER_LONG; >> - } >> - if (!size) >> - return result; >> - tmp = ext2_swabp(p); >> -found_first: >> - tmp |= ~0UL << size; >> - if (tmp == ~0UL) /* Are any bits zero? */ >> - return result + size; /* Nope. Skip ffz */ >> -found_middle: >> - return result + ffz(tmp); >> - >> -found_middle_swap: >> - return result + ffz(ext2_swab(tmp)); >> + return min(_find_next_bit_le(addr, size, offset, 0), size); >> } >> EXPORT_SYMBOL(find_next_zero_bit_le); >> #endif >> @@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le); >> unsigned long find_next_bit_le(const void *addr, unsigned >> long size, unsigned long offset) >> { >> - const unsigned long *p = addr; >> - unsigned long result = offset & ~(BITS_PER_LONG - 1); >> - unsigned long tmp; >> - >> if (offset >= size) >> return size; >> - p += BITOP_WORD(offset); >> - size -= result; >> - offset &= (BITS_PER_LONG - 1UL); >> - if (offset) { >> - tmp = ext2_swabp(p++); >> - tmp &= (~0UL << offset); >> - if (size < BITS_PER_LONG) >> - goto found_first; >> - if (tmp) >> - goto found_middle; >> - size -= BITS_PER_LONG; >> - result += BITS_PER_LONG; >> - } >> - >> - while (size & ~(BITS_PER_LONG - 1)) { >> - tmp = *(p++); >> - if (tmp) >> - goto found_middle_swap; >> - result += BITS_PER_LONG; >> - size -= BITS_PER_LONG; >> - } >> - if (!size) >> - return result; >> - tmp = ext2_swabp(p); >> -found_first: >> - tmp &= (~0UL >> (BITS_PER_LONG - size)); >> - if (tmp == 0UL) /* Are any bits set? */ >> - return result + size; /* Nope. */ >> -found_middle: >> - return result + __ffs(tmp); >> >> -found_middle_swap: >> - return result + __ffs(ext2_swab(tmp)); >> + return min(_find_next_bit_le(addr, size, offset, 1), size); >> } >> EXPORT_SYMBOL(find_next_bit_le); >> #endif -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/