https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113281
--- Comment #21 from Jakub Jelinek <jakub at gcc dot gnu.org> --- So, I think --- gcc/tree-vect-patterns.cc.jj 2024-01-03 11:51:37.487648527 +0100 +++ gcc/tree-vect-patterns.cc 2024-01-24 14:05:55.911356481 +0100 @@ -3002,6 +3002,12 @@ vect_recog_over_widening_pattern (vec_in if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def) return NULL; + /* FIXME: Aren't actually most operations incompatible with the truncation? + Like division/modulo, ... Shouldn't this punt on + !vect_truncatable_operation_p (code) or that plus some exceptions? */ + if (code == LROTATE_EXPR || code == RROTATE_EXPR) + return NULL; + /* Keep the first operand of a COND_EXPR as-is: only the other two operands are interesting. */ unsigned int first_op = (code == COND_EXPR ? 2 : 1); @@ -3169,22 +3175,35 @@ vect_recog_over_widening_pattern (vec_in op_type, &unprom[0], op_vectype); /* Limit shift operands. */ - if (code == RSHIFT_EXPR) + if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) { wide_int min_value, max_value; if (TREE_CODE (ops[1]) == INTEGER_CST) - ops[1] = wide_int_to_tree (op_type, - wi::umin (wi::to_wide (ops[1]), - new_precision - 1)); + { + if (wi::ge_p (wi::to_wide (ops[1]), new_precision, + TYPE_SIGN (TREE_TYPE (ops[1])))) + { + if (code == LSHIFT_EXPR || unsigned_p) + return NULL; + ops[1] = build_int_cst (TREE_TYPE (ops[1]), new_precision - 1); + } + } else if (!vect_get_range_info (ops[1], &min_value, &max_value) - || wi::ge_p (max_value, new_precision, TYPE_SIGN (op_type))) + || wi::ge_p (max_value, new_precision, + TYPE_SIGN (TREE_TYPE (ops[1])))) { + /* LSHIFT_EXPR or logical RSHIFT_EXPR can be handled as + (op1 >= new_precision ? 0 : op0) << (op1 & (new_precision - 1)) + but perhaps it is too costly. */ + if (code == LSHIFT_EXPR || unsigned_p) + return NULL; /* ??? Note the following bad for SLP as that only supports same argument widened shifts and it un-CSEs same arguments. */ - tree new_var = vect_recog_temp_ssa_var (op_type, NULL); + tree new_var = vect_recog_temp_ssa_var (TREE_TYPE (ops[1]), NULL); gimple *pattern_stmt = gimple_build_assign (new_var, MIN_EXPR, ops[1], - build_int_cst (op_type, new_precision - 1)); + build_int_cst (TREE_TYPE (ops[1]), + new_precision - 1)); gimple_set_location (pattern_stmt, gimple_location (last_stmt)); if (ops[1] == unprom[1].op && unprom[1].dt == vect_external_def) { should fix it (given Richi's comments about the conditionals being too costly it instead punts for the truncated left shifts or right logical shifts unless VRP proves the shift count is in range; I've also changed all the op_type uses on the shift count operand to TREE_TYPE (ops[1]), because I believe the two types can and most often are different. But as the first comment suggests, I wonder how comes this doesn't miscompile rotates, or say divisions/multiplications etc. While vect_determine_precisions_from_range and vect_determine_precisions_from_users call vect_truncatable_operation_p and the latter allows casts and LSHIFT_EXPR/RSHIFT_EXPR next to it, the former seems for !vect_truncatable_operation_p to just compute minimum/maximum from ranges across all operands and lhs, is that a safe thing for any non-truncatable operations? Richard S., can you please have a look, this was added in r9-1590-g370c2ebe8fa20e0812cd2d533d4ed38ee2d37c85 I believe.