Ben, I like this, I’ll post a new patch ASAP,
Jarno On Dec 11, 2013, at 11:33 AM, Ben Pfaff <b...@nicira.com> wrote: > On Wed, Dec 11, 2013 at 10:54:00AM -0800, Jarno Rajahalme wrote: >> util.h declares this when needed, make sure the definition is compiled >> in only in that case. >> >> Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> > > With this, I still get: > ../lib/util.c:921:15: warning: symbol 'count_1bits_8' was not > declared. Should it be static? > because the previous declaration of count_1bits_8[] is not in scope at > the point of definition in util.c. > > How about something like this? (Not tested outside of 32-bit without > __POPCNT__.) > > diff --git a/lib/util.c b/lib/util.c > index 13d41a7..000504c 100644 > --- a/lib/util.c > +++ b/lib/util.c > @@ -901,7 +901,7 @@ raw_clz64(uint64_t n) > } > #endif > > -#if !(__GNUC__ >= 4 && defined(__corei7)) > +#if NEED_COUNT_1BITS_8 > #define INIT1(X) \ > ((((X) & (1 << 0)) != 0) + \ > (((X) & (1 << 1)) != 0) + \ > diff --git a/lib/util.h b/lib/util.h > index 8d810c2..b158c2f 100644 > --- a/lib/util.h > +++ b/lib/util.h > @@ -371,49 +371,55 @@ log_2_ceil(uint64_t n) > return log_2_floor(n) + !is_pow2(n); > } > > -extern const uint8_t count_1bits_8[256]; > - > -/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */ > +/* unsigned int count_1bits(uint64_t x): > + * > + * Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */ > +#if UINTPTR_MAX == UINT64_MAX > +static inline unsigned int > +count_1bits(uint64_t x) > +{ > +#if __GNUC__ >= 4 && __POPCNT__ > + return __builtin_popcountll(x); > +#else > + /* This portable implementation is the fastest one we know of for 64 > + * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */ > + const uint64_t h55 = UINT64_C(0x5555555555555555); > + const uint64_t h33 = UINT64_C(0x3333333333333333); > + const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F); > + const uint64_t h01 = UINT64_C(0x0101010101010101); > + x -= (x >> 1) & h55; /* Count of each 2 bits in-place. */ > + x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. */ > + x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */ > + return (x * h01) >> 56; /* Sum of all bytes. */ > +#endif > +} > +#else /* not 64-bit */ > +#if __GNUC__ >= 4 && __POPCNT__ > static inline unsigned int > count_1bits_32(uint32_t x) > { > -#if __GNUC__ >= 4 && defined(__corei7) > - /* __builtin_popcount() is fast only when supported by the CPU. */ > return __builtin_popcount(x); > +} > #else > +#define NEED_COUNT_1BITS_8 1 > +extern const uint8_t count_1bits_8[256]; > +static inline unsigned int > +count_1bits_32(uint32_t x) > +{ > /* This portable implementation is the fastest one we know of for 32 bits, > * and faster than GCC __builtin_popcount(). */ > return (count_1bits_8[x & 0xff] + > count_1bits_8[(x >> 8) & 0xff] + > count_1bits_8[(x >> 16) & 0xff] + > count_1bits_8[x >> 24]); > -#endif > } > - > -/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */ > +#endif > static inline unsigned int > count_1bits(uint64_t x) > { > - if (sizeof(void *) == 8) { /* 64-bit CPU */ > -#if __GNUC__ >= 4 && defined(__corei7) > - /* __builtin_popcountll() is fast only when supported by the CPU. */ > - return __builtin_popcountll(x); > -#else > - /* This portable implementation is the fastest one we know of for 64 > - * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */ > - const uint64_t h55 = UINT64_C(0x5555555555555555); > - const uint64_t h33 = UINT64_C(0x3333333333333333); > - const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F); > - const uint64_t h01 = UINT64_C(0x0101010101010101); > - x -= (x >> 1) & h55; /* Count of each 2 bits in-place. > */ > - x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. > */ > - x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. > */ > - return (x * h01) >> 56; /* Sum of all bytes. */ > -#endif > - } else { /* 32-bit CPU */ > - return count_1bits_32(x) + count_1bits_32(x >> 32); > - } > + return count_1bits_32(x) + count_1bits_32(x >> 32); > } > +#endif > > /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 'x' > * is 0. */ _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev