On Mon, Mar 04, 2019 at 12:55:04PM +0100, Richard Biener wrote: > On Sun, Mar 3, 2019 at 10:13 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Sun, Mar 03, 2019 at 06:40:09AM -0800, Andrew Pinski wrote: > > > ) > > > ,On Sun, Mar 3, 2019 at 6:32 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > For vector init constructor: > > > > > > > > --- > > > > typedef float __v4sf __attribute__ ((__vector_size__ (16))); > > > > > > > > __v4sf > > > > foo (__v4sf x, float f) > > > > { > > > > __v4sf y = { f, x[1], x[2], x[3] }; > > > > return y; > > > > } > > > > --- > > > > > > > > we can optimize vector init constructor with vector copy or permute > > > > followed by a single scalar insert:
> and you want to advance to the _1 = BIT_INSERT_EXPR here. The easiest way > is to emit a new stmt for _2 = copy ...; and do the set_rhs with the > BIT_INSERT_EXPR. Thanks for BIT_INSERT_EXPR suggestion. I am testing this patch. H.J. --- We can optimize vector constructor with vector copy or permute followed by a single scalar insert: __v4sf y; __v4sf D.1930; float _1; float _2; float _3; <bb 2> : _1 = BIT_FIELD_REF <x_9(D), 32, 96>; _2 = BIT_FIELD_REF <x_9(D), 32, 64>; _3 = BIT_FIELD_REF <x_9(D), 32, 32>; y_6 = {f_5(D), _3, _2, _1}; return y_6; with __v4sf y; __v4sf D.1930; float _1; float _2; float _3; vector(4) float _8; <bb 2> : _1 = BIT_FIELD_REF <x_9(D), 32, 96>; _2 = BIT_FIELD_REF <x_9(D), 32, 64>; _3 = BIT_FIELD_REF <x_9(D), 32, 32>; _8 = x_9(D); y_6 = BIT_INSERT_EXPR <x_9(D), f_5(D), 0 (32 bits)>; return y_6; gcc/ PR tree-optimization/88828 * tree-ssa-forwprop.c (simplify_vector_constructor): Optimize vector init constructor with vector copy or permute followed by a single scalar insert. gcc/testsuite/ PR tree-optimization/88828 * gcc.target/i386/pr88828-1a.c: New test. * gcc.target/i386/pr88828-2b.c: Likewise. * gcc.target/i386/pr88828-2.c: Likewise. * gcc.target/i386/pr88828-3a.c: Likewise. * gcc.target/i386/pr88828-3b.c: Likewise. * gcc.target/i386/pr88828-3c.c: Likewise. * gcc.target/i386/pr88828-3d.c: Likewise. * gcc.target/i386/pr88828-4a.c: Likewise. * gcc.target/i386/pr88828-4b.c: Likewise. * gcc.target/i386/pr88828-5a.c: Likewise. * gcc.target/i386/pr88828-5b.c: Likewise. * gcc.target/i386/pr88828-6a.c: Likewise. * gcc.target/i386/pr88828-6b.c: Likewise. --- gcc/testsuite/gcc.target/i386/pr88828-1a.c | 16 +++++ gcc/testsuite/gcc.target/i386/pr88828-1b.c | 22 ++++++ gcc/testsuite/gcc.target/i386/pr88828-2.c | 17 +++++ gcc/testsuite/gcc.target/i386/pr88828-3a.c | 16 +++++ gcc/testsuite/gcc.target/i386/pr88828-3b.c | 18 +++++ gcc/testsuite/gcc.target/i386/pr88828-3c.c | 22 ++++++ gcc/testsuite/gcc.target/i386/pr88828-3d.c | 24 +++++++ gcc/testsuite/gcc.target/i386/pr88828-4a.c | 17 +++++ gcc/testsuite/gcc.target/i386/pr88828-4b.c | 20 ++++++ gcc/testsuite/gcc.target/i386/pr88828-5a.c | 16 +++++ gcc/testsuite/gcc.target/i386/pr88828-5b.c | 18 +++++ gcc/testsuite/gcc.target/i386/pr88828-6a.c | 17 +++++ gcc/testsuite/gcc.target/i386/pr88828-6b.c | 19 +++++ gcc/testsuite/gcc.target/i386/pr88828-7.c | 22 ++++++ gcc/tree-ssa-forwprop.c | 84 +++++++++++++++++++--- 15 files changed, 338 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3d.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-6a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-6b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-7.c diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1a.c b/gcc/testsuite/gcc.target/i386/pr88828-1a.c new file mode 100644 index 00000000000..4ef1feab389 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { f, x[1], x[2], x[3] }; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1b.c b/gcc/testsuite/gcc.target/i386/pr88828-1b.c new file mode 100644 index 00000000000..2cddf4263f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-1b.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__v4sf +foo (__v4sf x, float f) +{ + return vector_init (f, x[1], x[2], x[3]) ; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-2.c b/gcc/testsuite/gcc.target/i386/pr88828-2.c new file mode 100644 index 00000000000..6dc482b6f4b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = x; + y[0] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-3a.c b/gcc/testsuite/gcc.target/i386/pr88828-3a.c new file mode 100644 index 00000000000..97eb8e7162a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-3a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { f, x[0], x[2], x[3] }; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-3b.c b/gcc/testsuite/gcc.target/i386/pr88828-3b.c new file mode 100644 index 00000000000..ab2ba730716 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-3b.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { f, x[0], x[2], x[3] }; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-3c.c b/gcc/testsuite/gcc.target/i386/pr88828-3c.c new file mode 100644 index 00000000000..0db7f9e145b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-3c.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__v4sf +foo (__v4sf x, float f) +{ + return vector_init (f, x[0], x[1], x[3]) ; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-3d.c b/gcc/testsuite/gcc.target/i386/pr88828-3d.c new file mode 100644 index 00000000000..33e2b6e5881 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-3d.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__v4sf +foo (__v4sf x, float f) +{ + return vector_init (f, x[0], x[1], x[3]) ; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4a.c b/gcc/testsuite/gcc.target/i386/pr88828-4a.c new file mode 100644 index 00000000000..a54689be701 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-4a.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[1] }; + y[0] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4b.c b/gcc/testsuite/gcc.target/i386/pr88828-4b.c new file mode 100644 index 00000000000..0c3a1024d93 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-4b.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vshufps" } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[1] }; + y[0] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5a.c b/gcc/testsuite/gcc.target/i386/pr88828-5a.c new file mode 100644 index 00000000000..534808d3cd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-5a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 2 } } */ +/* { dg-final { scan-assembler-times "movaps" 1 } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], f }; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5b.c b/gcc/testsuite/gcc.target/i386/pr88828-5b.c new file mode 100644 index 00000000000..aebea790979 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-5b.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertps" 1 } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ +/* { dg-final { scan-assembler-not "vshufps" } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], f }; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-6a.c b/gcc/testsuite/gcc.target/i386/pr88828-6a.c new file mode 100644 index 00000000000..d43a36d9137 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-6a.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 2 } } */ +/* { dg-final { scan-assembler-times "movaps" 1 } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[0] }; + y[3] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-6b.c b/gcc/testsuite/gcc.target/i386/pr88828-6b.c new file mode 100644 index 00000000000..6856fe6500e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-6b.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertps" 1 } } */ +/* { dg-final { scan-assembler-not "vshufps" } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[0] }; + y[3] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7.c b/gcc/testsuite/gcc.target/i386/pr88828-7.c new file mode 100644 index 00000000000..2cddf4263f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-7.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__v4sf +foo (__v4sf x, float f) +{ + return vector_init (f, x[1], x[2], x[3]) ; +} diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index eeb6281c652..ce00c43d7e7 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2008,7 +2008,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) unsigned elem_size, i; unsigned HOST_WIDE_INT nelts; enum tree_code code, conv_code; - constructor_elt *elt; + constructor_elt *ce; bool maybe_ident; gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); @@ -2027,18 +2027,41 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) orig[1] = NULL; conv_code = ERROR_MARK; maybe_ident = true; - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) + + tree rhs_vector = NULL; + /* The single scalar element. */ + tree scalar_element = NULL; + unsigned int scalar_idx = 0; + bool insert = false; + unsigned int nscalars = 0; + unsigned int nvectors = 0; + FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, ce) { tree ref, op1; if (i >= nelts) return false; - if (TREE_CODE (elt->value) != SSA_NAME) + if (TREE_CODE (ce->value) != SSA_NAME) return false; - def_stmt = get_prop_source_stmt (elt->value, false, NULL); + def_stmt = get_prop_source_stmt (ce->value, false, NULL); if (!def_stmt) - return false; + { + if (gimple_nop_p (SSA_NAME_DEF_STMT (ce->value))) + { + /* Only allow one scalar insert. */ + if (nscalars != 0) + return false; + + nscalars = 1; + insert = true; + scalar_idx = i; + scalar_element = ce->value; + continue; + } + else + return false; + } code = gimple_assign_rhs_code (def_stmt); if (code == FLOAT_EXPR || code == FIX_TRUNC_EXPR) @@ -2046,7 +2069,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) op1 = gimple_assign_rhs1 (def_stmt); if (conv_code == ERROR_MARK) { - if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))), + if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (ce->value))), GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1))))) return false; conv_code = code; @@ -2095,6 +2118,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elt += nelts; if (elt != i) maybe_ident = false; + + if (type == TREE_TYPE (ref)) + { + /* The RHS vector has the same type as LHS. */ + if (rhs_vector == NULL) + rhs_vector = ref; + /* Check if all RHS vector elements come fome the same + vector. */ + if (rhs_vector == ref) + nvectors++; + } + sel.quick_push (elt); } if (i < nelts) @@ -2113,6 +2148,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) || conv_code == CALL_EXPR)) return false; + /* Replace the scalar element with the vector element. */ + if (insert + && (TYPE_VECTOR_SUBPARTS (type).to_constant () + == (nscalars + nvectors))) + sel.quick_push (scalar_idx); + if (maybe_ident) { if (conv_code == ERROR_MARK) @@ -2127,18 +2168,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts); if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) - return false; + { + if (insert) + gcc_unreachable (); + return false; + } mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), nelts); if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)), GET_MODE_SIZE (TYPE_MODE (type)))) - return false; + { + if (insert) + gcc_unreachable (); + return false; + } op2 = vec_perm_indices_to_tree (mask_type, indices); if (!orig[1]) orig[1] = orig[0]; - if (conv_code == ERROR_MARK) + if (conv_code == ERROR_MARK && !insert) gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0], orig[1], op2); else @@ -2148,10 +2197,25 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) VEC_PERM_EXPR, orig[0], orig[1], op2); orig[0] = gimple_assign_lhs (perm); gsi_insert_before (gsi, perm, GSI_SAME_STMT); - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], + gimple_assign_set_rhs_with_ops (gsi, + (conv_code != ERROR_MARK + ? conv_code + : NOP_EXPR), + orig[0], NULL_TREE, NULL_TREE); } } + if (insert) + { + /* Generate a single scalar insert. */ + tree var = make_ssa_name (type); + tree val = gimple_assign_rhs1 (stmt); + gimple *copy = gimple_build_assign (var, val); + gsi_insert_before (gsi, copy, GSI_SAME_STMT); + tree bitpos = bitsize_int (scalar_idx * elem_size); + gimple_assign_set_rhs_with_ops (gsi, BIT_INSERT_EXPR, var, + scalar_element, bitpos); + } update_stmt (gsi_stmt (*gsi)); return true; } -- 2.20.1