Thanks, pushed!

        Jarno

On Dec 11, 2013, at 9:25 PM, Simon Horman <ho...@verge.net.au> wrote:

> On Wed, Dec 11, 2013 at 03:41:19PM -0800, Jarno Rajahalme wrote:
>> - Use the GCC predefined macro __POPCNT__ to detect the availability
>>  of fast __builtin_popcnt function.
>> - Use portable preprocessor macros to detect 64-bit build.
>> - Only define the 32-bit parts when needed and declare the
>>  count_1bits_8 at file scope to silence a warning.
>> 
>> This time I have tested all code paths to make sure no warnigns are
>> generated.
>> 
>> Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com>
> 
> No objections here.
> 
> Reviewed-by: Simon Horman <ho...@verge.net.au>
> 
>> ---
>> lib/util.c |    2 +-
>> lib/util.h |   62 
>> +++++++++++++++++++++++++++++++++---------------------------
>> 2 files changed, 35 insertions(+), 29 deletions(-)
>> 
>> diff --git a/lib/util.c b/lib/util.c
>> index 13d41a7..000504c 100644
>> --- a/lib/util.c
>> +++ b/lib/util.c
>> @@ -901,7 +901,7 @@ raw_clz64(uint64_t n)
>> }
>> #endif
>> 
>> -#if !(__GNUC__ >= 4 && defined(__corei7))
>> +#if NEED_COUNT_1BITS_8
>> #define INIT1(X)                                \
>>     ((((X) & (1 << 0)) != 0) +                  \
>>      (((X) & (1 << 1)) != 0) +                  \
>> diff --git a/lib/util.h b/lib/util.h
>> index 8d810c2..0327ab0 100644
>> --- a/lib/util.h
>> +++ b/lib/util.h
>> @@ -371,49 +371,55 @@ log_2_ceil(uint64_t n)
>>     return log_2_floor(n) + !is_pow2(n);
>> }
>> 
>> -extern const uint8_t count_1bits_8[256];
>> -
>> -/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
>> +/* unsigned int count_1bits(uint64_t x):
>> + *
>> + * Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
>> +#if UINTPTR_MAX == UINT64_MAX
>> +static inline unsigned int
>> +count_1bits(uint64_t x)
>> +{
>> +#if __GNUC__ >= 4 && __POPCNT__
>> +    return __builtin_popcountll(x);
>> +#else
>> +    /* This portable implementation is the fastest one we know of for 64
>> +     * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
>> +    const uint64_t h55 = UINT64_C(0x5555555555555555);
>> +    const uint64_t h33 = UINT64_C(0x3333333333333333);
>> +    const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
>> +    const uint64_t h01 = UINT64_C(0x0101010101010101);
>> +    x -= (x >> 1) & h55;               /* Count of each 2 bits in-place. */
>> +    x = (x & h33) + ((x >> 2) & h33);  /* Count of each 4 bits in-place. */
>> +    x = (x + (x >> 4)) & h0F;          /* Count of each 8 bits in-place. */
>> +    return (x * h01) >> 56;            /* Sum of all bytes. */
>> +#endif
>> +}
>> +#else /* Not 64-bit. */
>> +#if __GNUC__ >= 4 && __POPCNT__
>> static inline unsigned int
>> -count_1bits_32(uint32_t x)
>> +count_1bits_32__(uint32_t x)
>> {
>> -#if __GNUC__ >= 4 && defined(__corei7)
>> -    /* __builtin_popcount() is fast only when supported by the CPU. */
>>     return __builtin_popcount(x);
>> +}
>> #else
>> +#define NEED_COUNT_1BITS_8 1
>> +extern const uint8_t count_1bits_8[256];
>> +static inline unsigned int
>> +count_1bits_32__(uint32_t x)
>> +{
>>     /* This portable implementation is the fastest one we know of for 32 
>> bits,
>>      * and faster than GCC __builtin_popcount(). */
>>     return (count_1bits_8[x & 0xff] +
>>             count_1bits_8[(x >> 8) & 0xff] +
>>             count_1bits_8[(x >> 16) & 0xff] +
>>             count_1bits_8[x >> 24]);
>> -#endif
>> }
>> -
>> -/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
>> +#endif
>> static inline unsigned int
>> count_1bits(uint64_t x)
>> {
>> -    if (sizeof(void *) == 8) { /* 64-bit CPU */
>> -#if __GNUC__ >= 4 && defined(__corei7)
>> -        /* __builtin_popcountll() is fast only when supported by the CPU. */
>> -        return __builtin_popcountll(x);
>> -#else
>> -        /* This portable implementation is the fastest one we know of for 64
>> -         * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
>> -        const uint64_t h55 = UINT64_C(0x5555555555555555);
>> -        const uint64_t h33 = UINT64_C(0x3333333333333333);
>> -        const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
>> -        const uint64_t h01 = UINT64_C(0x0101010101010101);
>> -        x -= (x >> 1) & h55;               /* Count of each 2 bits 
>> in-place. */
>> -        x = (x & h33) + ((x >> 2) & h33);  /* Count of each 4 bits 
>> in-place. */
>> -        x = (x + (x >> 4)) & h0F;          /* Count of each 8 bits 
>> in-place. */
>> -        return (x * h01) >> 56;            /* Sum of all bytes. */
>> -#endif
>> -    } else { /* 32-bit CPU */
>> -        return count_1bits_32(x) + count_1bits_32(x >> 32);
>> -    }
>> +    return count_1bits_32__(x) + count_1bits_32__(x >> 32);
>> }
>> +#endif
>> 
>> /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 
>> 'x'
>>  * is 0. */
>> -- 
>> 1.7.10.4
>> 
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to