Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > The following makes sure to limit the shift operand when vectorizing > (short)((int)x >> 31) via (short)x >> 31 as the out of bounds shift > operand otherwise invokes undefined behavior. When we determine > whether we can demote the operand we know we at most shift in the > sign bit so we can adjust the shift amount. > > Note this has the possibility of un-CSEing common shift operands > as there's no good way to share pattern stmts between patterns. > We'd have to separately pattern recognize the definition. > > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. > > Not sure about LSHIFT_EXPR, it probably has the same issue but > the fallback optimistic zero for out-of-range shifts is at least > "corrrect". Not sure we ever try to demote rotates (probably not).
I guess you mean "correct" for x86? But that's just a quirk of x86. IMO the behaviour is equally wrong for LSHIFT_EXPR. Richard > OK? > > Thanks, > Richard. > > PR tree-optimization/110838 > * tree-vect-patterns.cc (vect_recog_over_widening_pattern): > Adjust the shift operand of RSHIFT_EXPRs. > > * gcc.dg/torture/pr110838.c: New testcase. > --- > gcc/testsuite/gcc.dg/torture/pr110838.c | 43 +++++++++++++++++++++++++ > gcc/tree-vect-patterns.cc | 24 ++++++++++++++ > 2 files changed, 67 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/torture/pr110838.c > > diff --git a/gcc/testsuite/gcc.dg/torture/pr110838.c > b/gcc/testsuite/gcc.dg/torture/pr110838.c > new file mode 100644 > index 00000000000..f039bd6c8ea > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/torture/pr110838.c > @@ -0,0 +1,43 @@ > +/* { dg-do run } */ > + > +typedef __UINT32_TYPE__ uint32_t; > +typedef __UINT8_TYPE__ uint8_t; > +typedef __INT8_TYPE__ int8_t; > +typedef uint8_t pixel; > + > +/* get the sign of input variable (TODO: this is a dup, make common) */ > +static inline int8_t signOf(int x) > +{ > + return (x >> 31) | ((int)((((uint32_t)-x)) >> 31)); > +} > + > +__attribute__((noipa)) > +static void calSign_bug(int8_t *dst, const pixel *src1, const pixel *src2, > const int endX) > +{ > + for (int x = 0; x < endX; x++) > + dst[x] = signOf(src1[x] - src2[x]); > +} > + > +__attribute__((noipa, optimize(0))) > +static void calSign_ok(int8_t *dst, const pixel *src1, const pixel *src2, > const int endX) > +{ > + for (int x = 0; x < endX; x++) > + dst[x] = signOf(src1[x] - src2[x]); > +} > + > +__attribute__((noipa, optimize(0))) > +int main() > +{ > + const pixel s1[9] = { 0xcd, 0x33, 0xd4, 0x3e, 0xb0, 0xfb, 0x95, 0x64, > 0x70, }; > + const pixel s2[9] = { 0xba, 0x9f, 0xab, 0xa1, 0x3b, 0x29, 0xb1, 0xbd, > 0x64, }; > + int endX = 9; > + int8_t dst[9]; > + int8_t dst_ok[9]; > + > + calSign_bug(dst, s1, s2, endX); > + calSign_ok(dst_ok, s1, s2, endX); > + > + if (__builtin_memcmp(dst, dst_ok, endX) != 0) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index ef806e2346e..e4ab8c2d65b 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -3099,9 +3099,33 @@ vect_recog_over_widening_pattern (vec_info *vinfo, > tree ops[3] = {}; > for (unsigned int i = 1; i < first_op; ++i) > ops[i - 1] = gimple_op (last_stmt, i); > + /* For right shifts limit the shift operand. */ > vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1], > op_type, &unprom[0], op_vectype); > > + /* Limit shift operands. */ > + if (code == RSHIFT_EXPR) > + { > + wide_int min_value, max_value; > + if (TREE_CODE (ops[1]) == INTEGER_CST) > + ops[1] = wide_int_to_tree (op_type, > + wi::bit_and (wi::to_wide (ops[1]), > + new_precision - 1)); > + else if (!vect_get_range_info (ops[1], &min_value, &max_value) > + || wi::ge_p (max_value, new_precision, TYPE_SIGN (op_type))) > + { > + /* ??? Note the following bad for SLP as that only supports > + same argument widened shifts and it un-CSEs same arguments. */ > + tree new_var = vect_recog_temp_ssa_var (op_type, NULL); > + gimple *pattern_stmt > + = gimple_build_assign (new_var, BIT_AND_EXPR, ops[1], > + build_int_cst (op_type, new_precision - 1)); > + ops[1] = new_var; > + gimple_set_location (pattern_stmt, gimple_location (last_stmt)); > + append_pattern_def_seq (vinfo, last_stmt_info, pattern_stmt); > + } > + } > + > /* Use the operation to produce a result of type OP_TYPE. */ > tree new_var = vect_recog_temp_ssa_var (op_type, NULL); > gimple *pattern_stmt = gimple_build_assign (new_var, code,