Hi!

The ia32intrin.h rotate intrinsics require the second argument to be
in between 1 and 31 (or 63), otherwise they invoke UB.  But, we can do much
better while generating the same instruction when optimizing, so the
following patch uses the patterns we pattern recognize well and where
the second argument can be any value.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-10-12  Jakub Jelinek  <ja...@redhat.com>

        PR target/82498
        * config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow
        any values of __C while still being pattern recognizable as a simple
        rotate instruction.

        * gcc.dg/ubsan/pr82498.c: New test.

--- gcc/config/i386/ia32intrin.h.jj     2017-01-01 12:45:42.000000000 +0100
+++ gcc/config/i386/ia32intrin.h        2017-10-12 09:55:24.235602737 +0200
@@ -147,7 +147,8 @@ extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rold (unsigned int __X, int __C)
 {
-  return (__X << __C) | (__X >> (32 - __C));
+  __C &= 31;
+  return (__X << __C) | (__X >> (-__C & 31));
 }
 
 /* 8bit ror */
@@ -171,7 +172,8 @@ extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rord (unsigned int __X, int __C)
 {
-  return (__X >> __C) | (__X << (32 - __C));
+  __C &= 31;
+  return (__X >> __C) | (__X << (-__C & 31));
 }
 
 /* Pause */
@@ -239,7 +241,8 @@ extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rolq (unsigned long long __X, int __C)
 {
-  return (__X << __C) | (__X >> (64 - __C));
+  __C &= 63;
+  return (__X << __C) | (__X >> (-__C & 63));
 }
 
 /* 64bit ror */
@@ -247,7 +250,8 @@ extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rorq (unsigned long long __X, int __C)
 {
-  return (__X >> __C) | (__X << (64 - __C));
+  __C &= 63;
+  return (__X >> __C) | (__X << (-__C & 63));
 }
 
 /* Read flags register */
--- gcc/testsuite/gcc.dg/ubsan/pr82498.c.jj     2017-10-12 09:40:36.025438511 
+0200
+++ gcc/testsuite/gcc.dg/ubsan/pr82498.c        2017-10-12 10:06:06.636790077 
+0200
@@ -0,0 +1,159 @@
+/* PR target/82498 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" } */
+
+#include <x86intrin.h>
+
+volatile unsigned int a;
+volatile unsigned long long b;
+volatile int c;
+
+int
+main ()
+{
+  a = 0x12345678U;
+  a = __rold (a, 0);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rold (a, 32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rold (a, -32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rold (a, 37);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  a = __rold (a, -5);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, 0);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, 32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, -32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, -37);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  a = __rord (a, 5);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 0;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 32;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = -32;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 37;
+  a = __rold (a, c);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  c = -5;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 0;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 32;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = -32;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = -37;
+  a = __rord (a, c);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  c = 5;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+#ifdef __x86_64__
+  b = 0x123456789abcdef1ULL;
+  b = __rolq (b, 0);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rolq (b, 64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rolq (b, -64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rolq (b, 69);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  b = __rolq (b, -5);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, 0);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, 64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, -64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, -69);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  b = __rorq (b, 5);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 0;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 64;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = -64;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 69;
+  b = __rolq (b, c);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  c = -5;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 0;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 64;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = -64;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = -69;
+  b = __rorq (b, c);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  c = 5;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+#endif
+  return 0;
+}

        Jakub

Reply via email to