On 02.02.2015 13:43, Rasmus Villemoes wrote:
> On Sat, Jan 31 2015, yury.no...@gmail.com wrote:
>
>> From: Yury Norov <y.no...@samsung.com>
>>
>> New implementations takes less space in source file (see diffstat)
>> and in object. For me it's 710 vs 453 bytes of text.
>>
> New version generally looks good. Please include a summary of the
> changes between the versions either below the --- line or in a 0/n cover
> letter, especially since you've now expanded the scope of the series.
>
> Comments below.
>
>> Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
>> Performance tests were ran on userspace with code like this:
>>
>>      /* addr[] is filled from /dev/urandom */
>>      start = clock();
>>      while (ret < nbits)
>>              ret = find_next_bit(addr, nbits, ret + 1);
>>
>>      end = clock();
>>      printf("%ld\t", (unsigned long) end - start);
>>
>> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
>> (for find_next_bit, nbits is 8M, for find_first_bit - 80K)
>>
>>      find_next_bit:          find_first_bit:
>>      new     current         new     current
>>      26932   43151           14777   14925
>>      26947   43182           14521   15423
>>      26507   43824           15053   14705
>>      27329   43759           14473   14777
>>      26895   43367           14847   15023
>>      26990   43693           15103   15163
>>      26775   43299           15067   15232
>>      27282   42752           14544   15121
>>      27504   43088           14644   14858
>>      26761   43856           14699   15193
>>      26692   43075           14781   14681
>>      27137   42969           14451   15061
>>      ...                     ...
>>
>> find_next_bit performance gain is 35-40%;
>> find_first_bit - no measurable difference.
>>
>> Signed-off-by: Yury Norov <yury.no...@gmail.com>
>> ---
>>  lib/find_last_bit.c |  31 ++-----
>>  lib/find_next_bit.c | 254 
>> +++++++++++++++-------------------------------------
>>  2 files changed, 79 insertions(+), 206 deletions(-)
>>
>> diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
>> index 91ca09f..e67e970 100644
>> --- a/lib/find_last_bit.c
>> +++ b/lib/find_last_bit.c
>> @@ -4,44 +4,29 @@
>>   * Written by Rusty Russell <ru...@rustcorp.com.au>
>>   * (Inspired by David Howell's find_next_bit implementation)
>>   *
>> + * Rewritten by Yury Norov <yury.no...@gmail.com> to decrease
>> + * size and improve performance, 2015.
>> + *
>>   * This program is free software; you can redistribute it and/or
>>   * modify it under the terms of the GNU General Public License
>>   * as published by the Free Software Foundation; either version
>>   * 2 of the License, or (at your option) any later version.
>>   */
>>  
>> -#include <linux/bitops.h>
> Why do you remove that #include? It is rather important that the header
> and implementation don't get out of sync. I know that kernel.h includes
> bitops.h, but please don't rely on such things. Quoting SubmitChecklist:
>
> 1: If you use a facility then #include the file that defines/declares
>    that facility.  Don't depend on other header files pulling in ones
>    that you use.
>
>
>>  #include <linux/export.h>
>> -#include <asm/types.h>
>> -#include <asm/byteorder.h>
> However, getting rid of includes that are no longer needed is certainly
> a good thing.
Yes, linux/bitops.h are to get back.
>> +#include <linux/kernel.h>
>>  
>>  #ifndef find_last_bit
>>  
>>  unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -    unsigned long words;
>> -    unsigned long tmp;
>> -
>> -    /* Start at final word. */
>> -    words = size / BITS_PER_LONG;
>> -
>> -    /* Partial final word? */
>> -    if (size & (BITS_PER_LONG-1)) {
>> -            tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
>> -                                     - (size & (BITS_PER_LONG-1)))));
>> -            if (tmp)
>> -                    goto found;
>> -    }
>> +    unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
>>  
>> -    while (words) {
>> -            tmp = addr[--words];
>> -            if (tmp) {
>> -found:
>> -                    return words * BITS_PER_LONG + __fls(tmp);
>> -            }
>> +    while (idx--) {
>> +            if (addr[idx])
>> +                    return min(idx * BITS_PER_LONG + __fls(addr[idx]), 
>> size);
>>      }
>>  
>> -    /* Not found */
>>      return size;
>>  }
>>  EXPORT_SYMBOL(find_last_bit);
>> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
>> index 0cbfc0b..ebfb3dc 100644
>> --- a/lib/find_next_bit.c
>> +++ b/lib/find_next_bit.c
>> @@ -3,18 +3,45 @@
>>   * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
>>   * Written by David Howells (dhowe...@redhat.com)
>>   *
>> + * Rewritten by Yury Norov <yury.no...@gmail.com> to decrease
>> + * size and improve performance, 2015.
>> + *
>>   * This program is free software; you can redistribute it and/or
>>   * modify it under the terms of the GNU General Public License
>>   * as published by the Free Software Foundation; either version
>>   * 2 of the License, or (at your option) any later version.
>>   */
>>  
>> -#include <linux/bitops.h>
>>  #include <linux/export.h>
>> -#include <asm/types.h>
>> -#include <asm/byteorder.h>
>> +#include <linux/kernel.h>
> Same as above.
>
>> +#define HIGH_BITS_MASK(nr)          (ULONG_MAX << (nr))
>> +
>> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
>> +static unsigned long _find_next_bit(const unsigned long *addr,
>> +            unsigned long nbits, unsigned long start, bool set)
>> +{
>> +    unsigned long tmp = set ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +
>> +    /* Handle 1st word. */
>> +    if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +            tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
>> +            start = round_down(start, BITS_PER_LONG);
>> +    }
>>  
>> -#define BITOP_WORD(nr)              ((nr) / BITS_PER_LONG)
>> +    while (!tmp) {
>> +            start += BITS_PER_LONG;
>> +            if (start >= nbits)
>> +                    return nbits;
>> +
>> +            tmp = set ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +    }
>> +
>> +    return start + __ffs(tmp);
>> +}
>> +#endif
>>  
>>  #ifndef find_next_bit
>>  /*
>> @@ -23,86 +50,22 @@
>>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>>                          unsigned long offset)
>>  {
>> -    const unsigned long *p = addr + BITOP_WORD(offset);
>> -    unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -    unsigned long tmp;
>> -
>>      if (offset >= size)
>>              return size;
> Why can't this ...
>
>
>> -    size -= result;
>> -    offset %= BITS_PER_LONG;
>> -    if (offset) {
>> -            tmp = *(p++);
>> -            tmp &= (~0UL << offset);
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> -    }
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if ((tmp = *(p++)))
>> -                    goto found_middle;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = *p;
>>  
>> -found_first:
>> -    tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -    if (tmp == 0UL)         /* Are any bits set? */
>> -            return result + size;   /* Nope. */
>> -found_middle:
>> -    return result + __ffs(tmp);
>> +    return min(_find_next_bit(addr, size, offset, 1), size);
> ... and this be part of _find_next_bit? Can find_next_bit not be simply
> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
> find_next_zero_bit? Btw., passing true and false for the boolean
> parameter may be a little clearer.
I moved size checkers out of '_find_next_bit' to let user call it from his code
if he knows for sure that size/offset pair is valid. This may help save a couple
of clocks. I think, I'll walk over the code to find how many such places we 
have.
If not too much / not in critical paths, checks may be moved into the function.

Or maybe leave as is and place some comment for future?...
>
>>  }
>>  EXPORT_SYMBOL(find_next_bit);
>>  #endif
>>  
>>  #ifndef find_next_zero_bit
>> -/*
>> - * This implementation of find_{first,next}_zero_bit was stolen from
>> - * Linus' asm-alpha/bitops.h.
>> - */
>>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long 
>> size,
>>                               unsigned long offset)
>>  {
>> -    const unsigned long *p = addr + BITOP_WORD(offset);
>> -    unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -    unsigned long tmp;
>> -
>>      if (offset >= size)
>>              return size;
> See above.
>
>> -    size -= result;
>> -    offset %= BITS_PER_LONG;
>> -    if (offset) {
>> -            tmp = *(p++);
>> -            tmp |= ~0UL >> (BITS_PER_LONG - offset);
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (~tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> -    }
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if (~(tmp = *(p++)))
>> -                    goto found_middle;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = *p;
>>  
>> -found_first:
>> -    tmp |= ~0UL << size;
>> -    if (tmp == ~0UL)        /* Are any bits zero? */
>> -            return result + size;   /* Nope. */
>> -found_middle:
>> -    return result + ffz(tmp);
>> +    return min(_find_next_bit(addr, size, offset, 0), size);
> See above.
>
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit);
>>  #endif
>> @@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>>   */
>>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = 0;
>> -    unsigned long tmp;
>> +    unsigned long idx;
>>  
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if ((tmp = *(p++)))
>> -                    goto found;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> +    for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +            if (addr[idx])
>> +                    return min(idx * BITS_PER_LONG + __ffs(addr[idx]), 
>> size);
>>      }
>> -    if (!size)
>> -            return result;
>>  
>> -    tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>> -    if (tmp == 0UL)         /* Are any bits set? */
>> -            return result + size;   /* Nope. */
>> -found:
>> -    return result + __ffs(tmp);
>> +    return size;
>>  }
>>  EXPORT_SYMBOL(find_first_bit);
>>  #endif
>> @@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
>>   */
>>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long 
>> size)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = 0;
>> -    unsigned long tmp;
>> +    unsigned long idx;
>>  
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if (~(tmp = *(p++)))
>> -                    goto found;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> +    for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +            if (addr[idx] != ULONG_MAX)
>> +                    return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
>>      }
>> -    if (!size)
>> -            return result;
>>  
>> -    tmp = (*p) | (~0UL << size);
>> -    if (tmp == ~0UL)        /* Are any bits zero? */
>> -            return result + size;   /* Nope. */
>> -found:
>> -    return result + ffz(tmp);
>> +    return size;
>>  }
>>  EXPORT_SYMBOL(find_first_zero_bit);
>>  #endif
>> @@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>>  #ifdef __BIG_ENDIAN
>>  
>>  /* include/linux/byteorder does not support "unsigned long" type */
>> -static inline unsigned long ext2_swabp(const unsigned long * x)
>> -{
>> -#if BITS_PER_LONG == 64
>> -    return (unsigned long) __swab64p((u64 *) x);
>> -#elif BITS_PER_LONG == 32
>> -    return (unsigned long) __swab32p((u32 *) x);
>> -#else
>> -#error BITS_PER_LONG not defined
>> -#endif
>> -}
>> -
>> -/* include/linux/byteorder doesn't support "unsigned long" type */
>>  static inline unsigned long ext2_swab(const unsigned long y)
>>  {
>>  #if BITS_PER_LONG == 64
>> @@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned 
>> long y)
>>  #endif
>>  }
>>  
>> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
>> +static unsigned long _find_next_bit_le(const unsigned long *addr,
>> +            unsigned long nbits, unsigned long start, bool set)
>> +{
>> +    unsigned long tmp = set ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +
>> +    /* Handle 1st word. */
>> +    if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +            tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
>> +            start = round_down(start, BITS_PER_LONG);
>> +    }
>> +
>> +    while (!tmp) {
>> +            start += BITS_PER_LONG;
>> +            if (start >= nbits)
>> +                    return nbits;
>> +
>> +            tmp = set ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +    }
>> +
>> +    return start + __ffs(ext2_swab(tmp));
>> +}
>> +#endif
>> +
>>  #ifndef find_next_zero_bit_le
>>  unsigned long find_next_zero_bit_le(const void *addr, unsigned
>>              long size, unsigned long offset)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -    unsigned long tmp;
>> -
>>      if (offset >= size)
>>              return size;
> Again, I think this should be moved to the common implementation in
> _find_next_bit_le, and similarly for find_next_bit_le below.
>
>> -    p += BITOP_WORD(offset);
>> -    size -= result;
>> -    offset &= (BITS_PER_LONG - 1UL);
>> -    if (offset) {
>> -            tmp = ext2_swabp(p++);
>> -            tmp |= (~0UL >> (BITS_PER_LONG - offset));
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (~tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> -    }
>>  
>> -    while (size & ~(BITS_PER_LONG - 1)) {
>> -            if (~(tmp = *(p++)))
>> -                    goto found_middle_swap;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = ext2_swabp(p);
>> -found_first:
>> -    tmp |= ~0UL << size;
>> -    if (tmp == ~0UL)        /* Are any bits zero? */
>> -            return result + size; /* Nope. Skip ffz */
>> -found_middle:
>> -    return result + ffz(tmp);
>> -
>> -found_middle_swap:
>> -    return result + ffz(ext2_swab(tmp));
>> +    return min(_find_next_bit_le(addr, size, offset, 0), size);
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit_le);
>>  #endif
>> @@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
>>  unsigned long find_next_bit_le(const void *addr, unsigned
>>              long size, unsigned long offset)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -    unsigned long tmp;
>> -
>>      if (offset >= size)
>>              return size;
>> -    p += BITOP_WORD(offset);
>> -    size -= result;
>> -    offset &= (BITS_PER_LONG - 1UL);
>> -    if (offset) {
>> -            tmp = ext2_swabp(p++);
>> -            tmp &= (~0UL << offset);
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> -    }
>> -
>> -    while (size & ~(BITS_PER_LONG - 1)) {
>> -            tmp = *(p++);
>> -            if (tmp)
>> -                    goto found_middle_swap;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = ext2_swabp(p);
>> -found_first:
>> -    tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -    if (tmp == 0UL)         /* Are any bits set? */
>> -            return result + size; /* Nope. */
>> -found_middle:
>> -    return result + __ffs(tmp);
>>  
>> -found_middle_swap:
>> -    return result + __ffs(ext2_swab(tmp));
>> +    return min(_find_next_bit_le(addr, size, offset, 1), size);
>>  }
>>  EXPORT_SYMBOL(find_next_bit_le);
>>  #endif

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to