On Sun, Mar 03, 2019 at 06:40:09AM -0800, Andrew Pinski wrote:
> )
> ,On Sun, Mar 3, 2019 at 6:32 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > For vector init constructor:
> >
> > ---
> > typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> >
> > __v4sf
> > foo (__v4sf x, float f)
> > {
> >   __v4sf y = { f, x[1], x[2], x[3] };
> >   return y;
> > }
> > ---
> >
> > we can optimize vector init constructor with vector copy or permute
> > followed by a single scalar insert:
> >
> >   __v4sf D.1912;
> >   __v4sf D.1913;
> >   __v4sf D.1914;
> >   __v4sf y;
> >
> >   x.0_1 = x;
> >   D.1912 = x.0_1;
> >   _2 = D.1912;
> >   D.1913 = _2;
> >   BIT_FIELD_REF <D.1913, 32, 0> = f;
> >   y = D.1913;
> >   D.1914 = y;
> >   return D.1914;
> >
> > instead of
> >
> >   __v4sf D.1962;
> >   __v4sf y;
> >
> >   _1 = BIT_FIELD_REF <x, 32, 32>;
> >   _2 = BIT_FIELD_REF <x, 32, 64>;
> >   _3 = BIT_FIELD_REF <x, 32, 96>;
> >   y = {f, _1, _2, _3};
> >   D.1962 = y;
> >   return D.1962;
> >
> > gcc/
> >
> >         PR tree-optimization/88828
> >         * gimplify.c (gimplify_init_constructor): Optimize vector init
> >         constructor with vector copy or permute followed by a single
> >         scalar insert.
> 
> 
> Doing this here does not catch things like:
> typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> 
> 
> __v4sf
> vector_init (float f0,float f1, float f2,float f3)
> {
>   __v4sf y = { f, x[1], x[2], x[3] };
>    return y;
> }
> 
> __v4sf
> foo (__v4sf x, float f)
> {
>   return vector_init (f, x[1], x[2], x[3]) ;
> }
> 

Here is a patch for simplify_vector_constructor to optimize vector init
constructor with vector copy or permute followed by a single scalar
insert.  But this doesn't work correcly:

[hjl@gnu-cfl-2 pr88828]$ cat bar.i
typedef float __v4sf __attribute__ ((__vector_size__ (16)));

static __v4sf
vector_init (float f0,float f1, float f2,float f3)
{
  __v4sf y = { f0, f1, f2, f3 };
   return y;
}

__v4sf
foo (__v4sf x, float f)
{
  return vector_init (f, x[1], x[2], x[3]) ;
}
[hjl@gnu-cfl-2 pr88828]$ make bar.s
/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc 
-B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -O2 -S 
bar.i
[hjl@gnu-cfl-2 pr88828]$ cat bar.s
        .file   "bar.i"
        .text
        .p2align 4
        .globl  foo
        .type   foo, @function
foo:
.LFB1:
        .cfi_startproc
        ret
        .cfi_endproc
.LFE1:
        .size   foo, .-foo
        .ident  "GCC: (GNU) 9.0.1 20190303 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-2 pr88828]$

Scalar insert is missing.
---
 gcc/tree-ssa-forwprop.c | 77 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index eeb6281c652..b10cfccf7b8 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2008,7 +2008,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
-  constructor_elt *elt;
+  constructor_elt *ce;
   bool maybe_ident;
 
   gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
@@ -2027,18 +2027,41 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   orig[1] = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
-  FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
+
+  tree rhs_vector = NULL;
+  /* The single scalar element.  */
+  tree scalar_element = NULL;
+  unsigned int scalar_idx = 0;
+  bool insert = false;
+  unsigned int nscalars = 0;
+  unsigned int nvectors = 0;
+  FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, ce)
     {
       tree ref, op1;
 
       if (i >= nelts)
        return false;
 
-      if (TREE_CODE (elt->value) != SSA_NAME)
+      if (TREE_CODE (ce->value) != SSA_NAME)
        return false;
-      def_stmt = get_prop_source_stmt (elt->value, false, NULL);
+      def_stmt = get_prop_source_stmt (ce->value, false, NULL);
       if (!def_stmt)
-       return false;
+       {
+         if ( gimple_nop_p (SSA_NAME_DEF_STMT (ce->value)))
+           {
+             /* Only allow one single scalar insert.  */
+             if (nscalars != 0)
+               return false;
+
+             nscalars = 1;
+             insert = true;
+             scalar_idx = i;
+             scalar_element = ce->value;
+             continue;
+           }
+         else
+           return false;
+       }
       code = gimple_assign_rhs_code (def_stmt);
       if (code == FLOAT_EXPR
          || code == FIX_TRUNC_EXPR)
@@ -2046,7 +2069,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
          op1 = gimple_assign_rhs1 (def_stmt);
          if (conv_code == ERROR_MARK)
            {
-             if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))),
+             if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (ce->value))),
                            GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1)))))
                return false;
              conv_code = code;
@@ -2095,6 +2118,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
        elt += nelts;
       if (elt != i)
        maybe_ident = false;
+
+       if (type == TREE_TYPE (ref))
+        {
+          /* The RHS vector has the same type as LHS.  */
+          if (rhs_vector == NULL)
+            rhs_vector = ref;
+          /* Check if all RHS vector elements come fome the same
+             vector.  */
+          if (rhs_vector == ref)
+            nvectors++;
+        }
+
       sel.quick_push (elt);
     }
   if (i < nelts)
@@ -2113,6 +2148,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
          || conv_code == CALL_EXPR))
     return false;
 
+  /* Replace the scalar element with the vector element.  */
+  if (insert
+      && (TYPE_VECTOR_SUBPARTS (type).to_constant ()
+         == (nscalars + nvectors)))
+    sel.quick_push (scalar_idx);
+
   if (maybe_ident)
     {
       if (conv_code == ERROR_MARK)
@@ -2127,14 +2168,22 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 
       vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
       if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
-       return false;
+       {
+         if (insert)
+           gcc_unreachable ();
+         return false;
+       }
       mask_type
        = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
                             nelts);
       if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
          || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)),
                       GET_MODE_SIZE (TYPE_MODE (type))))
-       return false;
+       {
+         if (insert)
+           gcc_unreachable ();
+         return false;
+       }
       op2 = vec_perm_indices_to_tree (mask_type, indices);
       if (!orig[1])
        orig[1] = orig[0];
@@ -2153,6 +2202,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
        }
     }
   update_stmt (gsi_stmt (*gsi));
+  if (insert)
+    {
+      /* Generate a single scalar insert.  */
+      /* FIXME: This doesn't work correctly.  */
+      tree lhs = gimple_assign_lhs (stmt);
+      tree bitfield = build3 (BIT_FIELD_REF, elem_type, lhs,
+                             bitsize_int (elem_size),
+                             bitsize_int (scalar_idx * elem_size));
+      gimple *new_stmt = gimple_build_assign (bitfield, scalar_element);
+      gsi_insert_after (gsi, new_stmt, GSI_SAME_STMT);
+      update_stmt (gsi_stmt (*gsi));
+    }
   return true;
 }
 
-- 
2.20.1

Reply via email to