On Sun, Mar 03, 2019 at 06:40:09AM -0800, Andrew Pinski wrote: > ) > ,On Sun, Mar 3, 2019 at 6:32 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > For vector init constructor: > > > > --- > > typedef float __v4sf __attribute__ ((__vector_size__ (16))); > > > > __v4sf > > foo (__v4sf x, float f) > > { > > __v4sf y = { f, x[1], x[2], x[3] }; > > return y; > > } > > --- > > > > we can optimize vector init constructor with vector copy or permute > > followed by a single scalar insert: > > > > __v4sf D.1912; > > __v4sf D.1913; > > __v4sf D.1914; > > __v4sf y; > > > > x.0_1 = x; > > D.1912 = x.0_1; > > _2 = D.1912; > > D.1913 = _2; > > BIT_FIELD_REF <D.1913, 32, 0> = f; > > y = D.1913; > > D.1914 = y; > > return D.1914; > > > > instead of > > > > __v4sf D.1962; > > __v4sf y; > > > > _1 = BIT_FIELD_REF <x, 32, 32>; > > _2 = BIT_FIELD_REF <x, 32, 64>; > > _3 = BIT_FIELD_REF <x, 32, 96>; > > y = {f, _1, _2, _3}; > > D.1962 = y; > > return D.1962; > > > > gcc/ > > > > PR tree-optimization/88828 > > * gimplify.c (gimplify_init_constructor): Optimize vector init > > constructor with vector copy or permute followed by a single > > scalar insert. > > > Doing this here does not catch things like: > typedef float __v4sf __attribute__ ((__vector_size__ (16))); > > > __v4sf > vector_init (float f0,float f1, float f2,float f3) > { > __v4sf y = { f, x[1], x[2], x[3] }; > return y; > } > > __v4sf > foo (__v4sf x, float f) > { > return vector_init (f, x[1], x[2], x[3]) ; > } >
Here is a patch for simplify_vector_constructor to optimize vector init constructor with vector copy or permute followed by a single scalar insert. But this doesn't work correcly: [hjl@gnu-cfl-2 pr88828]$ cat bar.i typedef float __v4sf __attribute__ ((__vector_size__ (16))); static __v4sf vector_init (float f0,float f1, float f2,float f3) { __v4sf y = { f0, f1, f2, f3 }; return y; } __v4sf foo (__v4sf x, float f) { return vector_init (f, x[1], x[2], x[3]) ; } [hjl@gnu-cfl-2 pr88828]$ make bar.s /export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -O2 -S bar.i [hjl@gnu-cfl-2 pr88828]$ cat bar.s .file "bar.i" .text .p2align 4 .globl foo .type foo, @function foo: .LFB1: .cfi_startproc ret .cfi_endproc .LFE1: .size foo, .-foo .ident "GCC: (GNU) 9.0.1 20190303 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-cfl-2 pr88828]$ Scalar insert is missing. --- gcc/tree-ssa-forwprop.c | 77 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index eeb6281c652..b10cfccf7b8 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2008,7 +2008,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) unsigned elem_size, i; unsigned HOST_WIDE_INT nelts; enum tree_code code, conv_code; - constructor_elt *elt; + constructor_elt *ce; bool maybe_ident; gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); @@ -2027,18 +2027,41 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) orig[1] = NULL; conv_code = ERROR_MARK; maybe_ident = true; - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) + + tree rhs_vector = NULL; + /* The single scalar element. */ + tree scalar_element = NULL; + unsigned int scalar_idx = 0; + bool insert = false; + unsigned int nscalars = 0; + unsigned int nvectors = 0; + FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, ce) { tree ref, op1; if (i >= nelts) return false; - if (TREE_CODE (elt->value) != SSA_NAME) + if (TREE_CODE (ce->value) != SSA_NAME) return false; - def_stmt = get_prop_source_stmt (elt->value, false, NULL); + def_stmt = get_prop_source_stmt (ce->value, false, NULL); if (!def_stmt) - return false; + { + if ( gimple_nop_p (SSA_NAME_DEF_STMT (ce->value))) + { + /* Only allow one single scalar insert. */ + if (nscalars != 0) + return false; + + nscalars = 1; + insert = true; + scalar_idx = i; + scalar_element = ce->value; + continue; + } + else + return false; + } code = gimple_assign_rhs_code (def_stmt); if (code == FLOAT_EXPR || code == FIX_TRUNC_EXPR) @@ -2046,7 +2069,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) op1 = gimple_assign_rhs1 (def_stmt); if (conv_code == ERROR_MARK) { - if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))), + if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (ce->value))), GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1))))) return false; conv_code = code; @@ -2095,6 +2118,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elt += nelts; if (elt != i) maybe_ident = false; + + if (type == TREE_TYPE (ref)) + { + /* The RHS vector has the same type as LHS. */ + if (rhs_vector == NULL) + rhs_vector = ref; + /* Check if all RHS vector elements come fome the same + vector. */ + if (rhs_vector == ref) + nvectors++; + } + sel.quick_push (elt); } if (i < nelts) @@ -2113,6 +2148,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) || conv_code == CALL_EXPR)) return false; + /* Replace the scalar element with the vector element. */ + if (insert + && (TYPE_VECTOR_SUBPARTS (type).to_constant () + == (nscalars + nvectors))) + sel.quick_push (scalar_idx); + if (maybe_ident) { if (conv_code == ERROR_MARK) @@ -2127,14 +2168,22 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts); if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) - return false; + { + if (insert) + gcc_unreachable (); + return false; + } mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), nelts); if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)), GET_MODE_SIZE (TYPE_MODE (type)))) - return false; + { + if (insert) + gcc_unreachable (); + return false; + } op2 = vec_perm_indices_to_tree (mask_type, indices); if (!orig[1]) orig[1] = orig[0]; @@ -2153,6 +2202,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) } } update_stmt (gsi_stmt (*gsi)); + if (insert) + { + /* Generate a single scalar insert. */ + /* FIXME: This doesn't work correctly. */ + tree lhs = gimple_assign_lhs (stmt); + tree bitfield = build3 (BIT_FIELD_REF, elem_type, lhs, + bitsize_int (elem_size), + bitsize_int (scalar_idx * elem_size)); + gimple *new_stmt = gimple_build_assign (bitfield, scalar_element); + gsi_insert_after (gsi, new_stmt, GSI_SAME_STMT); + update_stmt (gsi_stmt (*gsi)); + } return true; } -- 2.20.1