This detects 1:1 permutations and avoids asking the target if it can
create those as well as generating VEC_PERM_EXPR in the vectorized code.

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2015-11-27  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/68553
        * tree-vect-slp.c (vect_create_mask_and_perm): Skip VEC_PERM_EXPR
        generation for 1:1 permutations.
        (vect_transform_slp_perm_load): Detect 1:1 permutations.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 230993)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_create_mask_and_perm (gimple *stmt,
*** 3224,3235 ****
        first_vec = dr_chain[first_vec_indx];
        second_vec = dr_chain[second_vec_indx];
  
!       /* Generate the permute statement.  */
!       perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
!                                      first_vec, second_vec, mask);
!       data_ref = make_ssa_name (perm_dest, perm_stmt);
!       gimple_set_lhs (perm_stmt, data_ref);
!       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
  
        /* Store the vector statement in NODE.  */
        SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
--- 3253,3270 ----
        first_vec = dr_chain[first_vec_indx];
        second_vec = dr_chain[second_vec_indx];
  
!       /* Generate the permute statement if necessary.  */
!       if (mask)
!       {
!         perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
!                                          first_vec, second_vec, mask);
!         data_ref = make_ssa_name (perm_dest, perm_stmt);
!         gimple_set_lhs (perm_stmt, data_ref);
!         vect_finish_stmt_generation (stmt, perm_stmt, gsi);
!       }
!       else
!       /* If mask was NULL_TREE generate the requested identity transform.  */
!       perm_stmt = SSA_NAME_DEF_STMT (first_vec);
  
        /* Store the vector statement in NODE.  */
        SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
*************** vect_transform_slp_perm_load (slp_tree n
*** 3315,3320 ****
--- 3350,3356 ----
    int index = 0;
    int first_vec_index = -1;
    int second_vec_index = -1;
+   bool noop_p = true;
  
    for (int j = 0; j < unroll_factor; j++)
      {
*************** vect_transform_slp_perm_load (slp_tree n
*** 3351,3361 ****
  
          gcc_assert (mask_element >= 0
                      && mask_element < 2 * nunits);
          mask[index++] = mask_element;
  
          if (index == nunits)
            {
!             if (!can_vec_perm_p (mode, false, mask))
                {
                  if (dump_enabled_p ())
                    {
--- 3387,3400 ----
  
          gcc_assert (mask_element >= 0
                      && mask_element < 2 * nunits);
+         if (mask_element != index)
+           noop_p = false;
          mask[index++] = mask_element;
  
          if (index == nunits)
            {
!             if (! noop_p
!                 && ! can_vec_perm_p (mode, false, mask))
                {
                  if (dump_enabled_p ())
                    {
*************** vect_transform_slp_perm_load (slp_tree n
*** 3371,3381 ****
  
              if (!analyze_only)
                {
!                 tree mask_vec, *mask_elts;
!                 mask_elts = XALLOCAVEC (tree, nunits);
!                 for (int l = 0; l < nunits; ++l)
!                   mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
!                 mask_vec = build_vector (mask_type, mask_elts);
  
                  if (second_vec_index == -1)
                    second_vec_index = first_vec_index;
--- 3410,3425 ----
  
              if (!analyze_only)
                {
!                 tree mask_vec = NULL_TREE;
!                 
!                 if (! noop_p)
!                   {
!                     tree *mask_elts = XALLOCAVEC (tree, nunits);
!                     for (int l = 0; l < nunits; ++l)
!                       mask_elts[l] = build_int_cst (mask_element_type,
!                                                     mask[l]);
!                     mask_vec = build_vector (mask_type, mask_elts);
!                   }
  
                  if (second_vec_index == -1)
                    second_vec_index = first_vec_index;
*************** vect_transform_slp_perm_load (slp_tree n
*** 3388,3393 ****
--- 3432,3438 ----
              index = 0;
              first_vec_index = -1;
              second_vec_index = -1;
+             noop_p = true;
            }
        }
      }

Reply via email to