On Sat, Aug 11, 2012 at 2:25 PM, Marc Glisse <marc.gli...@inria.fr> wrote: > On Fri, 10 Aug 2012, Marc Glisse wrote: > >> this patch detects permutations of permutations and merges them. It also >> canonicalizes permutations a bit more. >> >> There are several issues with this patch: >> >> 1) I am not sure we always want to combine permutations. Indeed, someone >> (user? vectorizer?) may have written 2 permutations to help the backend >> generate optimal code, whereas it will do a bad job on the complicated >> combined permutation. At tree level, I am not sure what can be done except >> possibly restrict the optimization to the case where the combined >> permutation is the identity. At the RTL level, we could compare what insns >> are generated, but that's going to be painful (doing anything with >> permutations at the RTL level is painful). >> >> 2) I don't understand how the cleanup works in tree-ssa-forwprop.c. I >> copied some fold/update/remove lines from other simplifications, but I don't >> know if they are right. >> >> 3) In a first version of the patch, where I had SSA_NAME_DEF_STMT instead >> of get_prop_source_stmt, I got an ICE in one of the torture vectorization >> testcases. It happened in expand_expr_real_2, because it asserts that >> expand_vec_perm will never fail. However, expand_vec_perm does return >> NULL_RTX sometimes. Here it was in V16QImode, with the permutation >> {0,0,2,2,4,...,14,14}. maybe_expand_insn can't handle it directly (that's ok >> I guess), but then expand_vec_perm sees VOIDmode and exits instead of trying >> other ways. I don't know if there is a latent bug or if (more likely) my >> patch may produce trees with wrong modes. >> >> 4) Is this the right place? >> >> This isn't the transformation I am most interested in, but it is a first >> step to see if the direction is right. > > > Hello, > > there was yet another issue with the version I posted: the testcase was > trivial so I didn't notice that it didn't perform the optimization at all... > > Here is a new version. It seems a bit strange to me that there are plenty of > functions that check for single-use variables, but there isn't any that > checks for variables used in a single statement (but possibly twice). So I > may be doing something strange, but it seems to be the natural test here. > > Happily passed bootstrap+regtest. > > 2012-08-11 Marc Glisse <marc.gli...@inria.fr> > > > gcc/ > * fold-const.c (fold_ternary_loc): Detect identity permutations. > Canonicalize permutations more. > * tree-ssa-forwprop.c (is_only_used_in): New function. > (simplify_permutation): Likewise. > > (ssa_forward_propagate_and_combine): Call it. > > gcc/testsuite/ > * gcc.dg/tree-ssa/forwprop-19.c: New testcase. > > -- > Marc Glisse > > Index: gcc/tree-ssa-forwprop.c > =================================================================== > --- gcc/tree-ssa-forwprop.c (revision 190311) > +++ gcc/tree-ssa-forwprop.c (working copy) > @@ -2569,20 +2569,97 @@ combine_conversions (gimple_stmt_iterato > gimple_assign_set_rhs_code (stmt, CONVERT_EXPR); > update_stmt (stmt); > return remove_prop_source_from_use (op0) ? 2 : 1; > } > } > } > > return 0; > } > > +/* Return true if VAR has no nondebug use but in stmt. */ > +static bool > +is_only_used_in (const_tree var, const_gimple stmt) > +{ > + const ssa_use_operand_t *const ptr0 = &(SSA_NAME_IMM_USE_NODE (var)); > + const ssa_use_operand_t *ptr = ptr0->next; > + > + for (; ptr != ptr0; ptr = ptr->next) > + { > + const_gimple use_stmt = USE_STMT (ptr); > + if (is_gimple_debug (use_stmt)) > + continue; > + if (use_stmt != stmt) > + return false; > + } > + return true; > +}
if (!single_imm_use (var, &use, &use_stmt) || use_stmt != stmt) instead of the above. > +/* Combine two shuffles in a row. Returns 1 if there were any changes > + made, 2 if cfg-cleanup needs to run. Else it returns 0. */ > + > +static int > +simplify_permutation (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op0, op1, op2, op3, mask; > + enum tree_code code = gimple_assign_rhs_code (stmt); > + enum tree_code code2; > + location_t loc = gimple_location (stmt); > + > + gcc_checking_assert (code == VEC_PERM_EXPR); > + > + op0 = gimple_assign_rhs1 (stmt); > + op1 = gimple_assign_rhs2 (stmt); > + op2 = gimple_assign_rhs3 (stmt); > + > + if (TREE_CODE (op0) != SSA_NAME) > + return 0; > + > + if (TREE_CODE (op2) != VECTOR_CST) > + return 0; > + > + def_stmt = SSA_NAME_DEF_STMT (op0); > + if (!def_stmt || !is_gimple_assign (def_stmt) > + || !can_propagate_from (def_stmt) > + || !is_only_used_in (op0, stmt)) Or rather than the above, simply check || !has_single_use (op0) here. > + return 0; > + > + /* Check that it is only used here. We cannot use has_single_use > + since the expression is using it twice itself... */ Ah ... so then || num_imm_uses (op0) != 2 > + code2 = gimple_assign_rhs_code (def_stmt); > + > + /* Two consecutive shuffles. */ > + if (code2 == VEC_PERM_EXPR) > + { > + op3 = gimple_assign_rhs3 (def_stmt); > + if (TREE_CODE (op3) != VECTOR_CST) > + return 0; > + mask = fold_build3_loc (loc, VEC_PERM_EXPR, TREE_TYPE (op3), > + op3, op3, op2); > + gcc_assert (TREE_CODE (mask) == VECTOR_CST); which means you can use mask = fold_ternary (loc, ...); Ok with that changes. Thanks, Richard. > + gimple_assign_set_rhs1 (stmt, gimple_assign_rhs1 (def_stmt)); > + gimple_assign_set_rhs2 (stmt, gimple_assign_rhs2 (def_stmt)); > + gimple_assign_set_rhs3 (stmt, mask); > + fold_stmt_inplace (gsi); > + update_stmt (stmt); > + return remove_prop_source_from_use (op0) ? 2 : 1; > + } > + > + return false; > +} > + > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; > > cfg_changed = false; > @@ -2731,20 +2808,27 @@ ssa_forward_propagate_and_combine (void) > changed = associate_pointerplus (&gsi); > else if (CONVERT_EXPR_CODE_P (code) > || code == FLOAT_EXPR > || code == FIX_TRUNC_EXPR) > { > int did_something = combine_conversions (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > + else if (code == VEC_PERM_EXPR) > + { > + int did_something = simplify_permutation (&gsi); > + if (did_something == 2) > + cfg_changed = true; > + changed = did_something != 0; > + } > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; > Index: gcc/fold-const.c > =================================================================== > --- gcc/fold-const.c (revision 190311) > +++ gcc/fold-const.c (working copy) > @@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t > return fold_build2_loc (loc, PLUS_EXPR, type, > const_binop (MULT_EXPR, arg0, arg1), arg2); > if (integer_zerop (arg2)) > return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1); > > return fold_fma (loc, type, arg0, arg1, arg2); > > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; > + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask; > unsigned char *sel = XALLOCAVEC (unsigned char, nelts); > tree t; > bool need_mask_canon = false; > + bool all_in_vec0 = true; > + bool all_in_vec1 = true; > + bool maybe_identity = true; > + bool single_arg = (op0 == op1); > + bool changed = false; > > + mask = single_arg ? (nelts - 1) : (2 * nelts - 1); > gcc_assert (nelts == VECTOR_CST_NELTS (arg2)); > for (i = 0; i < nelts; i++) > { > tree val = VECTOR_CST_ELT (arg2, i); > if (TREE_CODE (val) != INTEGER_CST) > return NULL_TREE; > > - sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1); > + sel[i] = TREE_INT_CST_LOW (val) & mask; > if (TREE_INT_CST_HIGH (val) > || ((unsigned HOST_WIDE_INT) > TREE_INT_CST_LOW (val) != sel[i])) > need_mask_canon = true; > + > + if (sel[i] < nelts) > + all_in_vec1 = false; > + else > + all_in_vec0 = false; > + > + if ((sel[i] & (nelts-1)) != i) > + maybe_identity = false; > + } > + > + if (maybe_identity) > + { > + if (all_in_vec0) > + return op0; > + if (all_in_vec1) > + return op1; > } > > if ((TREE_CODE (arg0) == VECTOR_CST > || TREE_CODE (arg0) == CONSTRUCTOR) > && (TREE_CODE (arg1) == VECTOR_CST > || TREE_CODE (arg1) == CONSTRUCTOR)) > { > t = fold_vec_perm (type, arg0, arg1, sel); > if (t != NULL_TREE) > return t; > } > > + if (all_in_vec0) > + op1 = op0; > + else if (all_in_vec1) > + { > + op0 = op1; > + for (i = 0; i < nelts; i++) > + sel[i] -= nelts; > + need_mask_canon = true; > + } > + > + if (op0 == op1 && !single_arg) > + changed = true; > + > if (need_mask_canon && arg2 == op2) > { > tree *tsel = XALLOCAVEC (tree, nelts); > tree eltype = TREE_TYPE (TREE_TYPE (arg2)); > for (i = 0; i < nelts; i++) > tsel[i] = build_int_cst (eltype, sel[i]); > - t = build_vector (TREE_TYPE (arg2), tsel); > - return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t); > + op2 = build_vector (TREE_TYPE (arg2), tsel); > + changed = true; > } > + > + if (changed) > + return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2); > } > return NULL_TREE; > > default: > return NULL_TREE; > } /* switch (code) */ > } > > /* Perform constant folding and related simplification of EXPR. > The related simplifications include x*1 => x, x*0 => 0, etc., > Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0) > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-forwprop2" } */ > + > +typedef int vec __attribute__((vector_size (2 * sizeof (int)))); > +void f (vec *x1, vec *x2) > +{ > + vec m = { 3, 1 }; > + vec n = { 1, 8 }; > + vec y = __builtin_shuffle (*x1, *x2, n); > + vec z = __builtin_shuffle (y, m); > + *x1 = z; > +} > + > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0 }" "forwprop2" } } */ > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop2" } } */ > +/* { dg-final { scan-tree-dump-times "x2" 1 "forwprop2" } } */ > +/* { dg-final { cleanup-tree-dump "forwprop2" } } */ > > Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c > ___________________________________________________________________ > Added: svn:eol-style > + native > Added: svn:keywords > + Author Date Id Revision URL > >