On Fri, Sep 18, 2020 at 01:39:16PM +0200, Richard Biener wrote:
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -2456,7 +2456,6 @@ vect_recog_rotate_pattern (vec_info *vinfo,
>        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
>      }
>    stype = TREE_TYPE (def);
> -  scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
>  
>    if (TREE_CODE (def) == INTEGER_CST)
>      {
> @@ -2485,7 +2484,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
>       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
>  
>        def2 = vect_recog_temp_ssa_var (stype, NULL);
> -      tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
> +      tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
>        def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
>                                     gimple_assign_lhs (def_stmt), mask);
>        if (ext_def)

The bug didn't affect just the > 32 precision type rotates, but also the
narrower ones where we'd end up with UB on the vector shifts and some arches
like x86 would end up with 0 as the result of the shift rather than the
shift count being masked with the operand width.

So I think it is worth adding testcase for that too.  Tested on x86_64-linux
-m32/-m64 without and with your fix, committed to trunk as obvious:

2020-09-18  Jakub Jelinek  <ja...@redhat.com>
    
        PR tree-optimization/97081
        * gcc.dg/vect/pr97081-2.c: New test.

--- gcc/testsuite/gcc.dg/vect/pr97081-2.c
+++ gcc/testsuite/gcc.dg/vect/pr97081-2.c
@@ -0,0 +1,32 @@
+/* PR tree-optimization/97081 */
+
+#include "tree-vect.h"
+
+unsigned short s[1024];
+unsigned char c[1024];
+
+__attribute__((noipa)) void
+foo (int n)
+{
+  for (int i = 0; i < 1024; i++)
+    s[i] = (s[i] << n) | (s[i] >> (__SIZEOF_SHORT__ * __CHAR_BIT__ - n));
+  for (int i = 0; i < 1024; i++)
+    c[i] = (c[i] << n) | (c[i] >> (__CHAR_BIT__ - n));
+}
+
+int
+main ()
+{
+  check_vect ();
+  for (int i = 0; i < 1024; i++)
+    {
+      s[i] = i;
+      c[i] = i;
+    }
+  foo (3);
+  for (int i = 0; i < 1024; i++)
+    if (s[i] != (unsigned short) ((i << 3) | (i >> (__SIZEOF_SHORT__ * 
__CHAR_BIT__ - 3)))
+        || c[i] != (unsigned char) ((((unsigned char) i) << 3) | (((unsigned 
char) i) >> (__CHAR_BIT__ - 3))))
+      __builtin_abort ();
+  return 0;
+}


        Jakub

Reply via email to