Hi! The ia32intrin.h rotate intrinsics require the second argument to be in between 1 and 31 (or 63), otherwise they invoke UB. But, we can do much better while generating the same instruction when optimizing, so the following patch uses the patterns we pattern recognize well and where the second argument can be any value.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-10-12 Jakub Jelinek <ja...@redhat.com> PR target/82498 * config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow any values of __C while still being pattern recognizable as a simple rotate instruction. * gcc.dg/ubsan/pr82498.c: New test. --- gcc/config/i386/ia32intrin.h.jj 2017-01-01 12:45:42.000000000 +0100 +++ gcc/config/i386/ia32intrin.h 2017-10-12 09:55:24.235602737 +0200 @@ -147,7 +147,8 @@ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rold (unsigned int __X, int __C) { - return (__X << __C) | (__X >> (32 - __C)); + __C &= 31; + return (__X << __C) | (__X >> (-__C & 31)); } /* 8bit ror */ @@ -171,7 +172,8 @@ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rord (unsigned int __X, int __C) { - return (__X >> __C) | (__X << (32 - __C)); + __C &= 31; + return (__X >> __C) | (__X << (-__C & 31)); } /* Pause */ @@ -239,7 +241,8 @@ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rolq (unsigned long long __X, int __C) { - return (__X << __C) | (__X >> (64 - __C)); + __C &= 63; + return (__X << __C) | (__X >> (-__C & 63)); } /* 64bit ror */ @@ -247,7 +250,8 @@ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rorq (unsigned long long __X, int __C) { - return (__X >> __C) | (__X << (64 - __C)); + __C &= 63; + return (__X >> __C) | (__X << (-__C & 63)); } /* Read flags register */ --- gcc/testsuite/gcc.dg/ubsan/pr82498.c.jj 2017-10-12 09:40:36.025438511 +0200 +++ gcc/testsuite/gcc.dg/ubsan/pr82498.c 2017-10-12 10:06:06.636790077 +0200 @@ -0,0 +1,159 @@ +/* PR target/82498 */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" } */ + +#include <x86intrin.h> + +volatile unsigned int a; +volatile unsigned long long b; +volatile int c; + +int +main () +{ + a = 0x12345678U; + a = __rold (a, 0); + if (a != 0x12345678U) + __builtin_abort (); + a = __rold (a, 32); + if (a != 0x12345678U) + __builtin_abort (); + a = __rold (a, -32); + if (a != 0x12345678U) + __builtin_abort (); + a = __rold (a, 37); + if (a != 0x468acf02U) + __builtin_abort (); + a = __rold (a, -5); + if (a != 0x12345678U) + __builtin_abort (); + a = __rord (a, 0); + if (a != 0x12345678U) + __builtin_abort (); + a = __rord (a, 32); + if (a != 0x12345678U) + __builtin_abort (); + a = __rord (a, -32); + if (a != 0x12345678U) + __builtin_abort (); + a = __rord (a, -37); + if (a != 0x468acf02U) + __builtin_abort (); + a = __rord (a, 5); + if (a != 0x12345678U) + __builtin_abort (); + c = 0; + a = __rold (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = 32; + a = __rold (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = -32; + a = __rold (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = 37; + a = __rold (a, c); + if (a != 0x468acf02U) + __builtin_abort (); + c = -5; + a = __rold (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = 0; + a = __rord (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = 32; + a = __rord (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = -32; + a = __rord (a, c); + if (a != 0x12345678U) + __builtin_abort (); + c = -37; + a = __rord (a, c); + if (a != 0x468acf02U) + __builtin_abort (); + c = 5; + a = __rord (a, c); + if (a != 0x12345678U) + __builtin_abort (); +#ifdef __x86_64__ + b = 0x123456789abcdef1ULL; + b = __rolq (b, 0); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rolq (b, 64); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rolq (b, -64); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rolq (b, 69); + if (b != 0x468acf13579bde22ULL) + __builtin_abort (); + b = __rolq (b, -5); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rorq (b, 0); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rorq (b, 64); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rorq (b, -64); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + b = __rorq (b, -69); + if (b != 0x468acf13579bde22ULL) + __builtin_abort (); + b = __rorq (b, 5); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = 0; + b = __rolq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = 64; + b = __rolq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = -64; + b = __rolq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = 69; + b = __rolq (b, c); + if (b != 0x468acf13579bde22ULL) + __builtin_abort (); + c = -5; + b = __rolq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = 0; + b = __rorq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = 64; + b = __rorq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = -64; + b = __rorq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); + c = -69; + b = __rorq (b, c); + if (b != 0x468acf13579bde22ULL) + __builtin_abort (); + c = 5; + b = __rorq (b, c); + if (b != 0x123456789abcdef1ULL) + __builtin_abort (); +#endif + return 0; +} Jakub