On Sun, Aug 22, 2021 at 8:54 PM H.J. Lu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > In vetor move pattern, replace nonimmediate_or_sse_const_operand with > nonimmediate_or_sse_const_vector_operand to allow vector load from > non-uniform CONST_VECTOR. Non-uniform CONST_VECTOR is enabled only in > the combine pass since other RTL optimizers work better with constant > pool. > > gcc/ > > PR target/43147 > * config/i386/i386-expand.c (ix86_constant_broadcast): New > function. Extracted from ix86_broadcast_from_constant. > (ix86_broadcast_from_constant): Call ix86_constant_broadcast. > (non_uniform_const_vector_p): New function. > * config/i386/i386-protos.h (non_uniform_const_vector_p): New > prototype. > * config/i386/predicates.md > (nonimmediate_or_sse_const_vector_operand): New predicate. > * config/i386/sse.md (mov<mode>_internal): Replace > nonimmediate_or_sse_const_operand with > nonimmediate_or_sse_const_vector_operand. > > gcc/testsuite/ > > PR target/43147 > * gcc.target/i386/pr43147.c: New test. > --- > gcc/config/i386/i386-expand.c | 74 ++++++++++++++----------- > gcc/config/i386/i386-protos.h | 1 + > gcc/config/i386/predicates.md | 7 +++ > gcc/config/i386/sse.md | 2 +- > gcc/testsuite/gcc.target/i386/pr43147.c | 15 +++++ > 5 files changed, 67 insertions(+), 32 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr43147.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 9bf13dbfa92..1d8f3110310 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -453,31 +453,12 @@ ix86_expand_move (machine_mode mode, rtx operands[]) > emit_insn (gen_rtx_SET (op0, op1)); > } > > -/* OP is a memref of CONST_VECTOR, return scalar constant mem > - if CONST_VECTOR is a vec_duplicate, else return NULL. */ > +/* CONSTANT is a CONST_VECTOR, return scalar constant if CONST_VECTOR is > + a vec_duplicate, else return nullptr. */ > + > static rtx > -ix86_broadcast_from_constant (machine_mode mode, rtx op) > +ix86_constant_broadcast (rtx constant, machine_mode mode) > { > - int nunits = GET_MODE_NUNITS (mode); > - if (nunits < 2) > - return nullptr; > - > - /* Don't use integer vector broadcast if we can't move from GPR to SSE > - register directly. */ > - if (!TARGET_INTER_UNIT_MOVES_TO_VEC > - && INTEGRAL_MODE_P (mode)) > - return nullptr; > - > - /* Convert CONST_VECTOR to a non-standard SSE constant integer > - broadcast only if vector broadcast is available. */ > - if (!(TARGET_AVX2 > - || (TARGET_AVX > - && (GET_MODE_INNER (mode) == SImode > - || GET_MODE_INNER (mode) == DImode)) > - || FLOAT_MODE_P (mode)) > - || standard_sse_constant_p (op, mode)) > - return nullptr; > - This part is dropped in the new ix86_broadcast_from_constant,why? > /* Don't broadcast from a 64-bit integer constant in 32-bit mode. > We can still put 64-bit integer constant in memory when > avx512 embed broadcast is available. */ > @@ -486,13 +467,6 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) > || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL))) > return nullptr; > > - if (GET_MODE_INNER (mode) == TImode) > - return nullptr; > - > - rtx constant = get_pool_constant (XEXP (op, 0)); > - if (GET_CODE (constant) != CONST_VECTOR) > - return nullptr; > - > /* There could be some rtx like > (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) > but with "*.LC1" refer to V2DI constant vector. */ > @@ -506,7 +480,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) > > rtx first = XVECEXP (constant, 0, 0); > > - for (int i = 1; i < nunits; ++i) > + for (int i = 1; i < GET_MODE_NUNITS (mode); ++i) > { > rtx tmp = XVECEXP (constant, 0, i); > /* Vector duplicate value. */ > @@ -517,6 +491,44 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) > return first; > } > > +/* OP is a memref of CONST_VECTOR, return scalar constant mem > + if CONST_VECTOR is a vec_duplicate, else return NULL. */ > +static rtx > +ix86_broadcast_from_constant (machine_mode mode, rtx op) > +{ > + int nunits = GET_MODE_NUNITS (mode); > + if (nunits < 2) > + return nullptr; > + > + /* Don't use integer vector broadcast if we can't move from GPR to SSE > + register directly. */ > + if (!TARGET_INTER_UNIT_MOVES_TO_VEC > + && INTEGRAL_MODE_P (mode)) > + return nullptr; > + > + if (GET_MODE_INNER (mode) == TImode) > + return nullptr; > + > + rtx constant = get_pool_constant (XEXP (op, 0)); > + if (GET_CODE (constant) != CONST_VECTOR) > + return nullptr; > + > + return ix86_constant_broadcast (constant, mode); > +} > + > +/* Return true if OP is a non-uniform CONST_VECTOR. */ > + Drop empty line. > +bool > +non_uniform_const_vector_p (rtx op, machine_mode mode) > +{ > + /* Allow non-uniform CONST_VECTOR only in the combine pass since other > + RTL optimizers work better with constant pool. */ > + return (current_pass > + && current_pass->tv_id == TV_COMBINE > + && GET_CODE (op) == CONST_VECTOR > + && ix86_constant_broadcast (op, mode) == nullptr); > +} I'm not sure it's a good idea to add a specific pass condition to the backend's predicate. If it's for combine, should we add define_split alternatively? > + > void > ix86_expand_vector_move (machine_mode mode, rtx operands[]) > { > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index 2fd13074c81..91d4c7cefb8 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -58,6 +58,7 @@ extern int standard_80387_constant_p (rtx); > extern const char *standard_80387_constant_opcode (rtx); > extern rtx standard_80387_constant_rtx (int); > extern int standard_sse_constant_p (rtx, machine_mode); > +extern bool non_uniform_const_vector_p (rtx, machine_mode); > extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *); > extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx); > extern bool ix86_pre_reload_split (void); > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index 9321f332ef9..435b22855c8 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -1176,6 +1176,13 @@ (define_predicate "nonimmediate_or_sse_const_operand" > (ior (match_operand 0 "nonimmediate_operand") > (match_test "standard_sse_constant_p (op, mode)"))) > > +;; Return true when OP is nonimmediate, standard SSE constant, or > +;; non-uniform CONST_VECTOR. > +(define_predicate "nonimmediate_or_sse_const_vector_operand" > + (ior (match_operand 0 "nonimmediate_operand") > + (match_test "standard_sse_constant_p (op, mode)") > + (match_test "non_uniform_const_vector_p (op, mode)"))) > + > ;; Return true if OP is a register or a zero. > (define_predicate "reg_or_0_operand" > (ior (match_operand 0 "register_operand") > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 13889687793..416ddcfcacc 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -1075,7 +1075,7 @@ (define_expand "mov<mode>" > (define_insn "mov<mode>_internal" > [(set (match_operand:VMOVE 0 "nonimmediate_operand" > "=v,v ,v ,m") > - (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" > + (match_operand:VMOVE 1 "nonimmediate_or_sse_const_vector_operand" > " C,<sseconstm1>,vm,v"))] > "TARGET_SSE > && (register_operand (operands[0], <MODE>mode) > diff --git a/gcc/testsuite/gcc.target/i386/pr43147.c > b/gcc/testsuite/gcc.target/i386/pr43147.c > new file mode 100644 > index 00000000000..3c30f917c06 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr43147.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2" } */ > +/* { dg-final { scan-assembler "movaps" } } */ > +/* { dg-final { scan-assembler-not "shufps" } } */ > + > +#include <x86intrin.h> > + > +__m128 > +foo (void) > +{ > + __m128 m = _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f); > + m = _mm_shuffle_ps(m, m, 0xC9); > + m = _mm_shuffle_ps(m, m, 0x2D); > + return m; > +} > -- > 2.31.1 >
-- BR, Hongtao