Re: [PATCH] lib: find_*_bit reimplementation

Yury Sun, 25 Jan 2015 15:33:59 -0800

On 24.01.2015 03:45, Rasmus Villemoes wrote:
> On Mon, Jan 19 2015, Yury Norov <yury.no...@gmail.com> wrote:
>
>> New implementation takes less space, and, I hope, easier to understand.
>>
>> Signed-off-by: Yury Norov <yury.no...@gmail.com>
>> ---
>>  lib/find_next_bit.c | 265 
>> +++++++++++++++-------------------------------------
>>  1 file changed, 73 insertions(+), 192 deletions(-)
>>
> That diffstat is certainly nice. Do you also have numbers for the
> size of the generated code, and do you know if there is a
> measurable performance difference? Have you tested whether the new and
> old code give the same results, also in corner cases?
Hello, Rasmus. Thank you for your time.


> Do you also have numbers for the size of the generated code

Before text section is 817 bytes, after - 533.
Comparing to this version, I have the patch modified now, and numbers may 
little differ.

> Have you tested whether the new and old code give the same results, also in 
> corner cases?

I tested new version together with old one, and if new did return different 
value,
it was printed to system log. I didn't measure performance because I don't 
expect
significant gain here. But now I think it's good idea to write tests for 
performance and
corner cases. Maybe someone already did it... Brief googling didn't help. 
Anyway, with
new version of patch I will show my measures.
>
> Some remarks inline below.
>
>> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
>> index 0cbfc0b..a5c915f 100644
>> --- a/lib/find_next_bit.c
>> +++ b/lib/find_next_bit.c
>> @@ -11,10 +11,39 @@
>>  
>>  #include <linux/bitops.h>
>>  #include <linux/export.h>
>> +#include <linux/kernel.h>
>>  #include <asm/types.h>
>>  #include <asm/byteorder.h>
>>  
>> -#define BITOP_WORD(nr)              ((nr) / BITS_PER_LONG)
>> +#define HIGH_BITS_MASK(nr)          (ULONG_MAX << (nr))
>> +#define MIN(a, b)                   ((a) < (b) ? (a) : (b))
>> +
> Please don't duplicate min/max macros. kernel.h already provides everything 
> you need.
Ok.
>
>> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
>> +static unsigned long _find_next_bit(const unsigned long *addr,
>> +            unsigned long end, unsigned long start, unsigned long flags)
> Having two parameters called end and start appearing in that
> order is slightly confusing. Why not keep the name 'size' for
> end, or maybe 'nbits' to make the unit clear. Also, I think flags
> should just be a bool and maybe renamed to something more meaningful.
You're right, Something like this:
 static unsigned long _find_next_bit(const unsigned long *addr,
                        unsigned long nbits, unsigned long start_bit, bool set)
looks better.
>
>> +{
>> +    unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +
>> +    /* Handle 1st word. */
>> +    if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +            tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
>> +            start = round_down(start, BITS_PER_LONG);
>> +    }
>> +
>> +    do {
>> +            if (tmp)
>> +                    return MIN(start + __ffs(tmp), end);
>> +
>> +            start += BITS_PER_LONG;
>> +            if (start >= end)
>> +                    return end;
>> +
>> +            tmp = flags ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +    } while (1);
>> +}
>> +#endif
>>  
>>  #ifndef find_next_bit
>>  /*
>> @@ -23,86 +52,16 @@
>>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>>                          unsigned long offset)
>>  {
>> -    const unsigned long *p = addr + BITOP_WORD(offset);
>> -    unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -    unsigned long tmp;
>> -
>> -    if (offset >= size)
>> -            return size;
> The previous versions handled this, but your code will always access
> the word at addr[start/BITS_PER_LONG]. Are you sure no caller ever
> passes start >= size?
You're right. Fixed.
>
>> -    size -= result;
>> -    offset %= BITS_PER_LONG;
>> -    if (offset) {
>> -            tmp = *(p++);
>> -            tmp &= (~0UL << offset);
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> -    }
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if ((tmp = *(p++)))
>> -                    goto found_middle;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = *p;
>> -
>> -found_first:
>> -    tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -    if (tmp == 0UL)         /* Are any bits set? */
>> -            return result + size;   /* Nope. */
>> -found_middle:
>> -    return result + __ffs(tmp);
>> +    return _find_next_bit(addr, size, offset, 1);
>>  }
>>  EXPORT_SYMBOL(find_next_bit);
>>  #endif
>>  
>>  #ifndef find_next_zero_bit
>> -/*
>> - * This implementation of find_{first,next}_zero_bit was stolen from
>> - * Linus' asm-alpha/bitops.h.
>> - */
>>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long 
>> size,
>>                               unsigned long offset)
>>  {
>> -    const unsigned long *p = addr + BITOP_WORD(offset);
>> -    unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -    unsigned long tmp;
>> -
>> -    if (offset >= size)
>> -            return size;
>> -    size -= result;
>> -    offset %= BITS_PER_LONG;
>> -    if (offset) {
>> -            tmp = *(p++);
>> -            tmp |= ~0UL >> (BITS_PER_LONG - offset);
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (~tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> -    }
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if (~(tmp = *(p++)))
>> -                    goto found_middle;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = *p;
>> -
>> -found_first:
>> -    tmp |= ~0UL << size;
>> -    if (tmp == ~0UL)        /* Are any bits zero? */
>> -            return result + size;   /* Nope. */
>> -found_middle:
>> -    return result + ffz(tmp);
>> +    return _find_next_bit(addr, size, offset, 0);
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit);
>>  #endif
>> @@ -113,24 +72,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>>   */
>>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = 0;
>> -    unsigned long tmp;
>> +    unsigned int idx;
>>  
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if ((tmp = *(p++)))
>> -                    goto found;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> +    for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +            if (addr[idx])
>> +                    return idx * BITS_PER_LONG + __ffs(addr[idx]);
>>      }
> I'm afraid this is wrong. It doesn't take garbage bits in the last
> (partial) word into account. You either need to loop over the full
> words and handle the last separately, masking off the garbage bits, or
> wrap min(, size) around the above expression, just as you've done in
> _find_next_bit. I think I prefer the latter.
Yes. Done.
>
> Does anyone use insane size bitmaps with 2^32 bits or more? If so,
> you'd have to watch out for overflow and use unsigned long for idx.
Yes.
>
>> -    if (!size)
>> -            return result;
>>  
>> -    tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>> -    if (tmp == 0UL)         /* Are any bits set? */
>> -            return result + size;   /* Nope. */
>> -found:
>> -    return result + __ffs(tmp);
>> +    return size;
>>  }
>>  EXPORT_SYMBOL(find_first_bit);
>>  #endif
>> @@ -141,24 +90,14 @@ EXPORT_SYMBOL(find_first_bit);
>>   */
>>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long 
>> size)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = 0;
>> -    unsigned long tmp;
>> +    unsigned int idx;
>>  
>> -    while (size & ~(BITS_PER_LONG-1)) {
>> -            if (~(tmp = *(p++)))
>> -                    goto found;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> +    for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +            if (addr[idx] != ULONG_MAX)
>> +                    return idx * BITS_PER_LONG + ffz(addr[idx]);
>>      }
> The same of course applies here.
>
>> -    if (!size)
>> -            return result;
>>  
>> -    tmp = (*p) | (~0UL << size);
>> -    if (tmp == ~0UL)        /* Are any bits zero? */
>> -            return result + size;   /* Nope. */
>> -found:
>> -    return result + ffz(tmp);
>> +    return size;
>>  }
>>  EXPORT_SYMBOL(find_first_zero_bit);
>>  #endif
>> @@ -166,18 +105,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>>  #ifdef __BIG_ENDIAN
>>  
>>  /* include/linux/byteorder does not support "unsigned long" type */
>> -static inline unsigned long ext2_swabp(const unsigned long * x)
>> -{
>> -#if BITS_PER_LONG == 64
>> -    return (unsigned long) __swab64p((u64 *) x);
>> -#elif BITS_PER_LONG == 32
>> -    return (unsigned long) __swab32p((u32 *) x);
>> -#else
>> -#error BITS_PER_LONG not defined
>> -#endif
>> -}
>> -
>> -/* include/linux/byteorder doesn't support "unsigned long" type */
>>  static inline unsigned long ext2_swab(const unsigned long y)
>>  {
>>  #if BITS_PER_LONG == 64
>> @@ -189,48 +116,40 @@ static inline unsigned long ext2_swab(const unsigned 
>> long y)
>>  #endif
>>  }
>>  
>> -#ifndef find_next_zero_bit_le
>> -unsigned long find_next_zero_bit_le(const void *addr, unsigned
>> -            long size, unsigned long offset)
>> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
>> +static unsigned long _find_next_bit_le(const unsigned long *addr,
>> +            unsigned long end, unsigned long start, unsigned long flags)
>>  {
>> -    const unsigned long *p = addr;
>> -    unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -    unsigned long tmp;
>> -
>> -    if (offset >= size)
>> -            return size;
>> -    p += BITOP_WORD(offset);
>> -    size -= result;
>> -    offset &= (BITS_PER_LONG - 1UL);
>> -    if (offset) {
>> -            tmp = ext2_swabp(p++);
>> -            tmp |= (~0UL >> (BITS_PER_LONG - offset));
>> -            if (size < BITS_PER_LONG)
>> -                    goto found_first;
>> -            if (~tmp)
>> -                    goto found_middle;
>> -            size -= BITS_PER_LONG;
>> -            result += BITS_PER_LONG;
>> +    unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +
>> +    /* Handle 1st word. */
>> +    if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +            tmp &= ext2_swab(HIGH_BITS_MASK(start
>> +                                    % BITS_PER_LONG));
> Nit: There's no reason to break that line.
>
>> +            start = round_down(start, BITS_PER_LONG);
>>      }
>>  
>> -    while (size & ~(BITS_PER_LONG - 1)) {
>> -            if (~(tmp = *(p++)))
>> -                    goto found_middle_swap;
>> -            result += BITS_PER_LONG;
>> -            size -= BITS_PER_LONG;
>> -    }
>> -    if (!size)
>> -            return result;
>> -    tmp = ext2_swabp(p);
>> -found_first:
>> -    tmp |= ~0UL << size;
>> -    if (tmp == ~0UL)        /* Are any bits zero? */
>> -            return result + size; /* Nope. Skip ffz */
>> -found_middle:
>> -    return result + ffz(tmp);
>> +    do {
>> +            if (tmp)
>> +                    return MIN(start +
>> +                            __ffs(ext2_swab(tmp)), end);
> Again, no reason to break the line.
The reason is limitation of 80 bytes per line, but anyway, I did it bad way
>   
>> -found_middle_swap:
>> -    return result + ffz(ext2_swab(tmp));
>> +            start += BITS_PER_LONG;
>> +            if (start >= end)
>> +                    return end;
>> +
>> +            tmp = flags ? addr[start / BITS_PER_LONG]
>> +                    : ~addr[start / BITS_PER_LONG];
>> +    } while (1);
>> +}
>> +#endif
>> +
>> +#ifndef find_next_zero_bit_le
>> +unsigned long find_next_zero_bit_le(const void *addr, unsigned
>> +            long size, unsigned long offset)
>> +{
>> +    return _find_next_bit_le(addr, size, offset, o);
> I'm assuming you only compile-tested this on little-endian. The o
> wants to be a 0.
Yes, shame on me. I think I'd bring up SPARK or big-endian ARM environment
on my QEMU before uploading new version.
>
> Rasmus
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] lib: find_*_bit reimplementation

Reply via email to