This detects 1:1 permutations and avoids asking the target if it can create those as well as generating VEC_PERM_EXPR in the vectorized code.
Bootstrap / regtest in progress on x86_64-unknown-linux-gnu. Richard. 2015-11-27 Richard Biener <rguent...@suse.de> PR tree-optimization/68553 * tree-vect-slp.c (vect_create_mask_and_perm): Skip VEC_PERM_EXPR generation for 1:1 permutations. (vect_transform_slp_perm_load): Detect 1:1 permutations. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 230993) --- gcc/tree-vect-slp.c (working copy) *************** vect_create_mask_and_perm (gimple *stmt, *** 3224,3235 **** first_vec = dr_chain[first_vec_indx]; second_vec = dr_chain[second_vec_indx]; ! /* Generate the permute statement. */ ! perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, ! first_vec, second_vec, mask); ! data_ref = make_ssa_name (perm_dest, perm_stmt); ! gimple_set_lhs (perm_stmt, data_ref); ! vect_finish_stmt_generation (stmt, perm_stmt, gsi); /* Store the vector statement in NODE. */ SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter] --- 3253,3270 ---- first_vec = dr_chain[first_vec_indx]; second_vec = dr_chain[second_vec_indx]; ! /* Generate the permute statement if necessary. */ ! if (mask) ! { ! perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, ! first_vec, second_vec, mask); ! data_ref = make_ssa_name (perm_dest, perm_stmt); ! gimple_set_lhs (perm_stmt, data_ref); ! vect_finish_stmt_generation (stmt, perm_stmt, gsi); ! } ! else ! /* If mask was NULL_TREE generate the requested identity transform. */ ! perm_stmt = SSA_NAME_DEF_STMT (first_vec); /* Store the vector statement in NODE. */ SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter] *************** vect_transform_slp_perm_load (slp_tree n *** 3315,3320 **** --- 3350,3356 ---- int index = 0; int first_vec_index = -1; int second_vec_index = -1; + bool noop_p = true; for (int j = 0; j < unroll_factor; j++) { *************** vect_transform_slp_perm_load (slp_tree n *** 3351,3361 **** gcc_assert (mask_element >= 0 && mask_element < 2 * nunits); mask[index++] = mask_element; if (index == nunits) { ! if (!can_vec_perm_p (mode, false, mask)) { if (dump_enabled_p ()) { --- 3387,3400 ---- gcc_assert (mask_element >= 0 && mask_element < 2 * nunits); + if (mask_element != index) + noop_p = false; mask[index++] = mask_element; if (index == nunits) { ! if (! noop_p ! && ! can_vec_perm_p (mode, false, mask)) { if (dump_enabled_p ()) { *************** vect_transform_slp_perm_load (slp_tree n *** 3371,3381 **** if (!analyze_only) { ! tree mask_vec, *mask_elts; ! mask_elts = XALLOCAVEC (tree, nunits); ! for (int l = 0; l < nunits; ++l) ! mask_elts[l] = build_int_cst (mask_element_type, mask[l]); ! mask_vec = build_vector (mask_type, mask_elts); if (second_vec_index == -1) second_vec_index = first_vec_index; --- 3410,3425 ---- if (!analyze_only) { ! tree mask_vec = NULL_TREE; ! ! if (! noop_p) ! { ! tree *mask_elts = XALLOCAVEC (tree, nunits); ! for (int l = 0; l < nunits; ++l) ! mask_elts[l] = build_int_cst (mask_element_type, ! mask[l]); ! mask_vec = build_vector (mask_type, mask_elts); ! } if (second_vec_index == -1) second_vec_index = first_vec_index; *************** vect_transform_slp_perm_load (slp_tree n *** 3388,3393 **** --- 3432,3438 ---- index = 0; first_vec_index = -1; second_vec_index = -1; + noop_p = true; } } }