On Tue, Jul 16, 2019 at 10:45 AM Kewen.Lin <li...@linux.ibm.com> wrote: > > Hi all, > > Based on the previous comments (thank you!), I tried to update the > handling in expander and vectorizer. Middle-end optimizes lrotate > with const rotation count to rrotate all the time, it makes vectorizer > fail to vectorize if rrotate isn't supported on the target. We can at > least teach it on const rotation count, the cost should be the same? > At the same time, the expander already tries to use the opposite > rotation optable for scalar, we can teach it to deal with vector as well. > > Is it on the right track and reasonable?
So you're basically fixing this up in the expander. I think on the GIMPLE level you then miss to update tree-vect-generic.c? I'm not sure if it makes sense to have both LROTATE_EXPR and RROTATE_EXPR on the GIMPLE level then (that CPUs only support one direction is natural though). So maybe simply get rid of one? Its semantics are also nowhere documented (do we allow negative rotation amounts? how are non-mode-precision entities rotated? etc.). Richard. > > Thanks, > Kewen > > -------------- > > One informal patch to help describing this new thought: > > > diff --git a/gcc/optabs.c b/gcc/optabs.c > index a0e361b8bfe..ebebb0ad145 100644 > --- a/gcc/optabs.c > +++ b/gcc/optabs.c > @@ -1273,6 +1273,7 @@ expand_binop (machine_mode mode, optab binoptab, rtx > op0, rtx op1, > if (mclass == MODE_VECTOR_INT) > { > optab otheroptab = unknown_optab; > + optab otheroptab1 = unknown_optab; > > if (binoptab == ashl_optab) > otheroptab = vashl_optab; > @@ -1281,23 +1282,50 @@ expand_binop (machine_mode mode, optab binoptab, rtx > op0, rtx op1, > else if (binoptab == lshr_optab) > otheroptab = vlshr_optab; > else if (binoptab == rotl_optab) > - otheroptab = vrotl_optab; > + { > + otheroptab = vrotl_optab; > + otheroptab1 = vrotr_optab; > + } > else if (binoptab == rotr_optab) > - otheroptab = vrotr_optab; > + { > + otheroptab = vrotr_optab; > + otheroptab1 = vrotl_optab; > + } > + > + bool other_ok = (otheroptab && (icode = optab_handler (otheroptab, > mode)) != CODE_FOR_nothing); > + bool other1_ok = false; > + if (!other_ok && otheroptab1) > + other1_ok > + = ((icode = optab_handler (otheroptab1, mode)) != CODE_FOR_nothing) > + && SCALAR_INT_MODE_P (GET_MODE_INNER (mode)); > > - if (otheroptab > - && (icode = optab_handler (otheroptab, mode)) != CODE_FOR_nothing) > + if (other_ok || other1_ok) > { > /* The scalar may have been extended to be too wide. Truncate > it back to the proper size to fit in the broadcast vector. */ > scalar_mode inner_mode = GET_MODE_INNER (mode); > - if (!CONST_INT_P (op1) > - && (GET_MODE_BITSIZE (as_a <scalar_int_mode> (GET_MODE (op1))) > + rtx newop1 = op1; > + if (other1_ok) > + { > + unsigned int bits = GET_MODE_PRECISION (inner_mode); > + > + if (CONST_INT_P (op1)) > + newop1 = gen_int_shift_amount (int_mode, bits - INTVAL (op1)); > + else if (targetm.shift_truncation_mask (int_mode) == bits - 1) > + newop1 = negate_rtx (GET_MODE (op1), op1); > + else > + newop1 = expand_binop (GET_MODE (op1), sub_optab, > + gen_int_mode (bits, GET_MODE (op1)), > op1, > + NULL_RTX, unsignedp, OPTAB_DIRECT); > + } > + if (!CONST_INT_P (newop1) > + && (GET_MODE_BITSIZE (as_a<scalar_int_mode> (GET_MODE (newop1))) > > GET_MODE_BITSIZE (inner_mode))) > - op1 = force_reg (inner_mode, > - simplify_gen_unary (TRUNCATE, inner_mode, op1, > - GET_MODE (op1))); > - rtx vop1 = expand_vector_broadcast (mode, op1); > + newop1 = force_reg (inner_mode, > + simplify_gen_unary (TRUNCATE, inner_mode, > + newop1, GET_MODE > (newop1))); > + > + rtx vop1 = expand_vector_broadcast (mode, newop1); > if (vop1) > { > temp = expand_binop_directly (icode, mode, otheroptab, op0, > vop1, > > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index ff952d6f464..c05ce1acba4 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -2039,6 +2039,15 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, > tree *type_out) > if (optab1 > && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) > return NULL; > + /* middle-end canonicalizing LROTATE to RROTATE with const rotation count, > + let's try the LROTATE as well. */ > + if (rhs_code == RROTATE_EXPR && TREE_CODE(oprnd1) == INTEGER_CST) > + { > + optab1 = optab_for_tree_code (LROTATE_EXPR, vectype, optab_vector); > + if (optab1 > + && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) > + return NULL; > + } > > if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def) > { > @@ -2046,6 +2055,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, > tree *type_out) > if (optab2 > && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) > return NULL; > + if (rhs_code == RROTATE_EXPR && TREE_CODE(oprnd1) == INTEGER_CST) > + { > + optab2 = optab_for_tree_code (LROTATE_EXPR, vectype, optab_scalar); > + if (optab2 > + && optab_handler (optab2, TYPE_MODE (vectype)) > + != CODE_FOR_nothing) > + return NULL; > + } > } > > /* If vector/vector or vector/scalar shifts aren't supported by the target, > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 21046931243..9e1a2f971a1 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -5665,6 +5665,12 @@ vectorizable_shift (stmt_vec_info stmt_info, > gimple_stmt_iterator *gsi, > if (!scalar_shift_arg) > { > optab = optab_for_tree_code (code, vectype, optab_vector); > + > + if (TREE_CODE (op1) == INTEGER_CST && code == RROTATE_EXPR > + && !(optab > + && optab_handler (optab, TYPE_MODE (vectype)) > + != CODE_FOR_nothing)) > + optab = optab_for_tree_code (LROTATE_EXPR, vectype, optab_vector); > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "vector/vector shift/rotate found.\n"); > @@ -5696,7 +5702,10 @@ vectorizable_shift (stmt_vec_info stmt_info, > gimple_stmt_iterator *gsi, > else > { > optab = optab_for_tree_code (code, vectype, optab_vector); > - if (optab > + if (TREE_CODE (op1) == INTEGER_CST && code == RROTATE_EXPR > + && !(optab && optab_handler (optab, TYPE_MODE (vectype)))) > + optab = optab_for_tree_code (LROTATE_EXPR, vectype, optab_vector); > + if (optab > && (optab_handler (optab, TYPE_MODE (vectype)) > != CODE_FOR_nothing)) > { >