On Fri, 2 Mar 2012, Bruce Evans wrote:
On Thu, 1 Mar 2012, Tijl Coosemans wrote:
On Wednesday 29 February 2012 08:44:54 Bruce Evans wrote:
...
So any macro version must use gcc features to be safe. The following
seems to work:
#define __bswap16(x) __extension__ ({ __uint16_t __x = x;
(__uint16_t)(__x << 8 | __x >> 8); })
clang now produces "rolw $8,x" when x is volatile. This seems to
violate volatile semantics. gcc produces load-rolw-store then. Both
produce "rolw $8,x" when x is not volatile.
I'll have a closer look at the patch tomorrow, but the Intel
documentation for the bswap instruction recommends to use xchg for
bswap16.
That's what i386 used to do, using an asm (see for example the RELENG_4
version), but the asm was replaced by C code and the compiler apparently
knows better. This might depend on -mtune. I tested with the default,
...
Here is another version. It has now been tested a bit at runtime.
I had to restore almost all of the complications, so the following
reduces to mostly style changes including comments about surprising
details. It has 1 fix for clang (the comitted version doesn't
work for clang).
% Index: endian.h
% ===================================================================
% RCS file: /home/ncvs/src/sys/x86/include/endian.h,v
% retrieving revision 1.1
% diff -u -2 -r1.1 endian.h
% --- endian.h 28 Feb 2012 19:39:54 -0000 1.1
% +++ endian.h 2 Mar 2012 10:43:53 -0000
% @@ -37,8 +37,4 @@
% #include <sys/_types.h>
%
% -#ifdef __cplusplus
% -extern "C" {
% -#endif
% -
% /*
% * Define the order of 32-bit words in 64-bit words.
% @@ -68,25 +64,17 @@
% #endif
%
% -#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
% -
% -#define __bswap16_const(_x) (__uint16_t)((_x) << 8 | (_x) >> 8)
% +#define __bswap16_gen(x) (__uint16_t)((x) << 8 | (x) >> 8)
% +#define __bswap32_gen(x) \
% + (((__uint32_t)__bswap16(x) << 16) | __bswap16((x) >> 16))
% +#define __bswap64_gen(x) \
% + (((__uint64_t)__bswap32(x) << 32) | __bswap32((x) >> 32))
%
% -#define __bswap16(_x) \
% - (__builtin_constant_p(_x) ? \
% - __bswap16_const((__uint16_t)(_x)) : __bswap16_var(_x))
% -
% -#define __bswap32_const(_x) \
% - (((__uint32_t)__bswap16(_x) << 16) | __bswap16((_x) >> 16))
% -
% -#define __bswap32(_x) \
% - (__builtin_constant_p(_x) ? \
% - __bswap32_const((__uint32_t)(_x)) : __bswap32_var(_x))
% +#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
%
% -#define __bswap64_const(_x) \
% - (((__uint64_t)__bswap32(_x) << 32) | __bswap32((_x) >> 32))
% +/* The following mess is to avoid multiple evaluation of x. */
%
% -#define __bswap64(_x) \
% - (__builtin_constant_p(_x) ? \
% - __bswap64_const((__uint64_t)(_x)) : __bswap64_var(_x))
% +#define __bswap16(x) \
% + (__builtin_constant_p(x) ? \
% + __bswap16_gen((__uint16_t)(x)) : __bswap16_var((__uint16_t)(x)))
For clang, x must be cast in the `var' clause too, since when x is
something like 0x12345678 (having been passed here without conversion
by __bswap32()), clang parses the `var' clause and warns about the
implicit truncation of 0x12345678 to 0x5678 in it. This is a bug
in clang -- when warning about fine details, it is important not to
do it for non-problems.
%
% static __inline __uint16_t
% @@ -94,7 +82,19 @@
% {
%
% - return (__bswap16_const(_x));
% + return (__bswap16_gen(_x));
% }
%
% +/*
% + * The following messes are old optimizations for gcc. The messes have
% + * been reduced significantly, but I don't know how to implement the
% + * preferred way of defining defaults above and overriding them cleanly
% + * here. Even clang needs help for the 64-bit case, so to keep the
% + * ifdefs sane we use this for the 32-bit case with clang too.
% + */
% +
% +#define __bswap32(x) \
% + (__builtin_constant_p(x) ? \
% + __bswap32_gen((__uint32_t)(x)) : __bswap32_var(x))
% +
% static __inline __uint32_t
% __bswap32_var(__uint32_t _x)
% @@ -105,34 +105,37 @@
% }
%
% +#define __bswap64(x) \
% + (__builtin_constant_p(x) ? \
% + __bswap64_gen((__uint64_t)(x)) : __bswap64_var(x))
% +
% static __inline __uint64_t
% __bswap64_var(__uint64_t _x)
% {
% -#ifdef _LP64
% +
% +#ifdef _LP64
% __asm ("bswap %0" : "+r" (_x));
% return (_x);
% #else
% - return (__bswap64_const(_x));
% + /*
% + * It is important for the optimizations that the following is not
% + * really generic, but expands to 2 __bswap32_var()'s.
% + */
% + return (__bswap64_gen(_x));
% #endif
% }
%
% -#define __htonl(x) __bswap32(x)
% -#define __htons(x) __bswap16(x)
% -#define __ntohl(x) __bswap32(x)
% -#define __ntohs(x) __bswap16(x)
% +#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
%
% -#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P) */
% -
% -/*
% - * No optimizations are available for this compiler. Fall back to
% - * non-optimized functions by defining the constant usually used to prevent
% - * redefinition.
% - */
% -#define _BYTEORDER_FUNC_DEFINED
% +/* XXX these are broken, since they evaluate x more than once. */
% +#define __bswap16(x) (__bswap16_gen((__uint16_t)(x)))
% +#define __bswap32(x) (__bswap32_gen((__uint32_t)(x)))
% +#define __bswap64(x) (__bswap64_gen((__uint64_t)(x)))
%
% #endif /* __GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
%
% -#ifdef __cplusplus
% -}
% -#endif
% +#define __htonl(x) __bswap32(x)
% +#define __htons(x) __bswap16(x)
% +#define __ntohl(x) __bswap32(x)
% +#define __ntohs(x) __bswap16(x)
%
% #endif /* !_MACHINE_ENDIAN_H_ */
Here is a cut down version showing the clang bug:
% static inline int
% unused(short x)
% {
% return (x);
% }
%
% int
% foo(void)
% {
% return (1 ? 0 : unused(0x12345678));
% }
% w.c:10:25: warning: implicit conversion from 'int' to 'short' changes value
from 305419896 to 22136 [-Wconstant-conversion]
% return (1 ? 0 : unused(0x12345678));
% ~~~~~~ ^~~~~~~~~~
% 1 warning generated.
This shows another bug: the hex constants are obfuscated in the error message
by printing them in decimal.
Bruce
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"