On Wed, Dec 11, 2013 at 03:41:19PM -0800, Jarno Rajahalme wrote:
> - Use the GCC predefined macro __POPCNT__ to detect the availability
>   of fast __builtin_popcnt function.
> - Use portable preprocessor macros to detect 64-bit build.
> - Only define the 32-bit parts when needed and declare the
>   count_1bits_8 at file scope to silence a warning.
> 
> This time I have tested all code paths to make sure no warnigns are
> generated.
> 
> Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com>

No objections here.

Reviewed-by: Simon Horman <ho...@verge.net.au>

> ---
>  lib/util.c |    2 +-
>  lib/util.h |   62 
> +++++++++++++++++++++++++++++++++---------------------------
>  2 files changed, 35 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/util.c b/lib/util.c
> index 13d41a7..000504c 100644
> --- a/lib/util.c
> +++ b/lib/util.c
> @@ -901,7 +901,7 @@ raw_clz64(uint64_t n)
>  }
>  #endif
>  
> -#if !(__GNUC__ >= 4 && defined(__corei7))
> +#if NEED_COUNT_1BITS_8
>  #define INIT1(X)                                \
>      ((((X) & (1 << 0)) != 0) +                  \
>       (((X) & (1 << 1)) != 0) +                  \
> diff --git a/lib/util.h b/lib/util.h
> index 8d810c2..0327ab0 100644
> --- a/lib/util.h
> +++ b/lib/util.h
> @@ -371,49 +371,55 @@ log_2_ceil(uint64_t n)
>      return log_2_floor(n) + !is_pow2(n);
>  }
>  
> -extern const uint8_t count_1bits_8[256];
> -
> -/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
> +/* unsigned int count_1bits(uint64_t x):
> + *
> + * Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
> +#if UINTPTR_MAX == UINT64_MAX
> +static inline unsigned int
> +count_1bits(uint64_t x)
> +{
> +#if __GNUC__ >= 4 && __POPCNT__
> +    return __builtin_popcountll(x);
> +#else
> +    /* This portable implementation is the fastest one we know of for 64
> +     * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
> +    const uint64_t h55 = UINT64_C(0x5555555555555555);
> +    const uint64_t h33 = UINT64_C(0x3333333333333333);
> +    const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
> +    const uint64_t h01 = UINT64_C(0x0101010101010101);
> +    x -= (x >> 1) & h55;               /* Count of each 2 bits in-place. */
> +    x = (x & h33) + ((x >> 2) & h33);  /* Count of each 4 bits in-place. */
> +    x = (x + (x >> 4)) & h0F;          /* Count of each 8 bits in-place. */
> +    return (x * h01) >> 56;            /* Sum of all bytes. */
> +#endif
> +}
> +#else /* Not 64-bit. */
> +#if __GNUC__ >= 4 && __POPCNT__
>  static inline unsigned int
> -count_1bits_32(uint32_t x)
> +count_1bits_32__(uint32_t x)
>  {
> -#if __GNUC__ >= 4 && defined(__corei7)
> -    /* __builtin_popcount() is fast only when supported by the CPU. */
>      return __builtin_popcount(x);
> +}
>  #else
> +#define NEED_COUNT_1BITS_8 1
> +extern const uint8_t count_1bits_8[256];
> +static inline unsigned int
> +count_1bits_32__(uint32_t x)
> +{
>      /* This portable implementation is the fastest one we know of for 32 
> bits,
>       * and faster than GCC __builtin_popcount(). */
>      return (count_1bits_8[x & 0xff] +
>              count_1bits_8[(x >> 8) & 0xff] +
>              count_1bits_8[(x >> 16) & 0xff] +
>              count_1bits_8[x >> 24]);
> -#endif
>  }
> -
> -/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
> +#endif
>  static inline unsigned int
>  count_1bits(uint64_t x)
>  {
> -    if (sizeof(void *) == 8) { /* 64-bit CPU */
> -#if __GNUC__ >= 4 && defined(__corei7)
> -        /* __builtin_popcountll() is fast only when supported by the CPU. */
> -        return __builtin_popcountll(x);
> -#else
> -        /* This portable implementation is the fastest one we know of for 64
> -         * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
> -        const uint64_t h55 = UINT64_C(0x5555555555555555);
> -        const uint64_t h33 = UINT64_C(0x3333333333333333);
> -        const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
> -        const uint64_t h01 = UINT64_C(0x0101010101010101);
> -        x -= (x >> 1) & h55;               /* Count of each 2 bits in-place. 
> */
> -        x = (x & h33) + ((x >> 2) & h33);  /* Count of each 4 bits in-place. 
> */
> -        x = (x + (x >> 4)) & h0F;          /* Count of each 8 bits in-place. 
> */
> -        return (x * h01) >> 56;            /* Sum of all bytes. */
> -#endif
> -    } else { /* 32-bit CPU */
> -        return count_1bits_32(x) + count_1bits_32(x >> 32);
> -    }
> +    return count_1bits_32__(x) + count_1bits_32__(x >> 32);
>  }
> +#endif
>  
>  /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 
> 'x'
>   * is 0. */
> -- 
> 1.7.10.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
> 
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to