On Fri, Sep 18, 2020 at 01:39:16PM +0200, Richard Biener wrote: > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -2456,7 +2456,6 @@ vect_recog_rotate_pattern (vec_info *vinfo, > append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); > } > stype = TREE_TYPE (def); > - scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype); > > if (TREE_CODE (def) == INTEGER_CST) > { > @@ -2485,7 +2484,7 @@ vect_recog_rotate_pattern (vec_info *vinfo, > append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); > > def2 = vect_recog_temp_ssa_var (stype, NULL); > - tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1); > + tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1); > def_stmt = gimple_build_assign (def2, BIT_AND_EXPR, > gimple_assign_lhs (def_stmt), mask); > if (ext_def)
The bug didn't affect just the > 32 precision type rotates, but also the narrower ones where we'd end up with UB on the vector shifts and some arches like x86 would end up with 0 as the result of the shift rather than the shift count being masked with the operand width. So I think it is worth adding testcase for that too. Tested on x86_64-linux -m32/-m64 without and with your fix, committed to trunk as obvious: 2020-09-18 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/97081 * gcc.dg/vect/pr97081-2.c: New test. --- gcc/testsuite/gcc.dg/vect/pr97081-2.c +++ gcc/testsuite/gcc.dg/vect/pr97081-2.c @@ -0,0 +1,32 @@ +/* PR tree-optimization/97081 */ + +#include "tree-vect.h" + +unsigned short s[1024]; +unsigned char c[1024]; + +__attribute__((noipa)) void +foo (int n) +{ + for (int i = 0; i < 1024; i++) + s[i] = (s[i] << n) | (s[i] >> (__SIZEOF_SHORT__ * __CHAR_BIT__ - n)); + for (int i = 0; i < 1024; i++) + c[i] = (c[i] << n) | (c[i] >> (__CHAR_BIT__ - n)); +} + +int +main () +{ + check_vect (); + for (int i = 0; i < 1024; i++) + { + s[i] = i; + c[i] = i; + } + foo (3); + for (int i = 0; i < 1024; i++) + if (s[i] != (unsigned short) ((i << 3) | (i >> (__SIZEOF_SHORT__ * __CHAR_BIT__ - 3))) + || c[i] != (unsigned char) ((((unsigned char) i) << 3) | (((unsigned char) i) >> (__CHAR_BIT__ - 3)))) + __builtin_abort (); + return 0; +} Jakub