Thanks, pushed! Jarno
On Dec 11, 2013, at 9:25 PM, Simon Horman <ho...@verge.net.au> wrote: > On Wed, Dec 11, 2013 at 03:41:19PM -0800, Jarno Rajahalme wrote: >> - Use the GCC predefined macro __POPCNT__ to detect the availability >> of fast __builtin_popcnt function. >> - Use portable preprocessor macros to detect 64-bit build. >> - Only define the 32-bit parts when needed and declare the >> count_1bits_8 at file scope to silence a warning. >> >> This time I have tested all code paths to make sure no warnigns are >> generated. >> >> Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> > > No objections here. > > Reviewed-by: Simon Horman <ho...@verge.net.au> > >> --- >> lib/util.c | 2 +- >> lib/util.h | 62 >> +++++++++++++++++++++++++++++++++--------------------------- >> 2 files changed, 35 insertions(+), 29 deletions(-) >> >> diff --git a/lib/util.c b/lib/util.c >> index 13d41a7..000504c 100644 >> --- a/lib/util.c >> +++ b/lib/util.c >> @@ -901,7 +901,7 @@ raw_clz64(uint64_t n) >> } >> #endif >> >> -#if !(__GNUC__ >= 4 && defined(__corei7)) >> +#if NEED_COUNT_1BITS_8 >> #define INIT1(X) \ >> ((((X) & (1 << 0)) != 0) + \ >> (((X) & (1 << 1)) != 0) + \ >> diff --git a/lib/util.h b/lib/util.h >> index 8d810c2..0327ab0 100644 >> --- a/lib/util.h >> +++ b/lib/util.h >> @@ -371,49 +371,55 @@ log_2_ceil(uint64_t n) >> return log_2_floor(n) + !is_pow2(n); >> } >> >> -extern const uint8_t count_1bits_8[256]; >> - >> -/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */ >> +/* unsigned int count_1bits(uint64_t x): >> + * >> + * Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */ >> +#if UINTPTR_MAX == UINT64_MAX >> +static inline unsigned int >> +count_1bits(uint64_t x) >> +{ >> +#if __GNUC__ >= 4 && __POPCNT__ >> + return __builtin_popcountll(x); >> +#else >> + /* This portable implementation is the fastest one we know of for 64 >> + * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */ >> + const uint64_t h55 = UINT64_C(0x5555555555555555); >> + const uint64_t h33 = UINT64_C(0x3333333333333333); >> + const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F); >> + const uint64_t h01 = UINT64_C(0x0101010101010101); >> + x -= (x >> 1) & h55; /* Count of each 2 bits in-place. */ >> + x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. */ >> + x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */ >> + return (x * h01) >> 56; /* Sum of all bytes. */ >> +#endif >> +} >> +#else /* Not 64-bit. */ >> +#if __GNUC__ >= 4 && __POPCNT__ >> static inline unsigned int >> -count_1bits_32(uint32_t x) >> +count_1bits_32__(uint32_t x) >> { >> -#if __GNUC__ >= 4 && defined(__corei7) >> - /* __builtin_popcount() is fast only when supported by the CPU. */ >> return __builtin_popcount(x); >> +} >> #else >> +#define NEED_COUNT_1BITS_8 1 >> +extern const uint8_t count_1bits_8[256]; >> +static inline unsigned int >> +count_1bits_32__(uint32_t x) >> +{ >> /* This portable implementation is the fastest one we know of for 32 >> bits, >> * and faster than GCC __builtin_popcount(). */ >> return (count_1bits_8[x & 0xff] + >> count_1bits_8[(x >> 8) & 0xff] + >> count_1bits_8[(x >> 16) & 0xff] + >> count_1bits_8[x >> 24]); >> -#endif >> } >> - >> -/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */ >> +#endif >> static inline unsigned int >> count_1bits(uint64_t x) >> { >> - if (sizeof(void *) == 8) { /* 64-bit CPU */ >> -#if __GNUC__ >= 4 && defined(__corei7) >> - /* __builtin_popcountll() is fast only when supported by the CPU. */ >> - return __builtin_popcountll(x); >> -#else >> - /* This portable implementation is the fastest one we know of for 64 >> - * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */ >> - const uint64_t h55 = UINT64_C(0x5555555555555555); >> - const uint64_t h33 = UINT64_C(0x3333333333333333); >> - const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F); >> - const uint64_t h01 = UINT64_C(0x0101010101010101); >> - x -= (x >> 1) & h55; /* Count of each 2 bits >> in-place. */ >> - x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits >> in-place. */ >> - x = (x + (x >> 4)) & h0F; /* Count of each 8 bits >> in-place. */ >> - return (x * h01) >> 56; /* Sum of all bytes. */ >> -#endif >> - } else { /* 32-bit CPU */ >> - return count_1bits_32(x) + count_1bits_32(x >> 32); >> - } >> + return count_1bits_32__(x) + count_1bits_32__(x >> 32); >> } >> +#endif >> >> /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if >> 'x' >> * is 0. */ >> -- >> 1.7.10.4 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev