In vetor move pattern, replace nonimmediate_or_sse_const_operand with nonimmediate_or_sse_const_vector_operand to allow vector load from non-uniform CONST_VECTOR. Non-uniform CONST_VECTOR is enabled only in the combine pass since other RTL optimizers work better with constant pool.
gcc/ PR target/43147 * config/i386/i386-expand.c (ix86_constant_broadcast): New function. Extracted from ix86_broadcast_from_constant. (ix86_broadcast_from_constant): Call ix86_constant_broadcast. (non_uniform_const_vector_p): New function. * config/i386/i386-protos.h (non_uniform_const_vector_p): New prototype. * config/i386/predicates.md (nonimmediate_or_sse_const_vector_operand): New predicate. * config/i386/sse.md (mov<mode>_internal): Replace nonimmediate_or_sse_const_operand with nonimmediate_or_sse_const_vector_operand. gcc/testsuite/ PR target/43147 * gcc.target/i386/pr43147.c: New test. --- gcc/config/i386/i386-expand.c | 74 ++++++++++++++----------- gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/predicates.md | 7 +++ gcc/config/i386/sse.md | 2 +- gcc/testsuite/gcc.target/i386/pr43147.c | 15 +++++ 5 files changed, 67 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr43147.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 9bf13dbfa92..1d8f3110310 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -453,31 +453,12 @@ ix86_expand_move (machine_mode mode, rtx operands[]) emit_insn (gen_rtx_SET (op0, op1)); } -/* OP is a memref of CONST_VECTOR, return scalar constant mem - if CONST_VECTOR is a vec_duplicate, else return NULL. */ +/* CONSTANT is a CONST_VECTOR, return scalar constant if CONST_VECTOR is + a vec_duplicate, else return nullptr. */ + static rtx -ix86_broadcast_from_constant (machine_mode mode, rtx op) +ix86_constant_broadcast (rtx constant, machine_mode mode) { - int nunits = GET_MODE_NUNITS (mode); - if (nunits < 2) - return nullptr; - - /* Don't use integer vector broadcast if we can't move from GPR to SSE - register directly. */ - if (!TARGET_INTER_UNIT_MOVES_TO_VEC - && INTEGRAL_MODE_P (mode)) - return nullptr; - - /* Convert CONST_VECTOR to a non-standard SSE constant integer - broadcast only if vector broadcast is available. */ - if (!(TARGET_AVX2 - || (TARGET_AVX - && (GET_MODE_INNER (mode) == SImode - || GET_MODE_INNER (mode) == DImode)) - || FLOAT_MODE_P (mode)) - || standard_sse_constant_p (op, mode)) - return nullptr; - /* Don't broadcast from a 64-bit integer constant in 32-bit mode. We can still put 64-bit integer constant in memory when avx512 embed broadcast is available. */ @@ -486,13 +467,6 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL))) return nullptr; - if (GET_MODE_INNER (mode) == TImode) - return nullptr; - - rtx constant = get_pool_constant (XEXP (op, 0)); - if (GET_CODE (constant) != CONST_VECTOR) - return nullptr; - /* There could be some rtx like (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) but with "*.LC1" refer to V2DI constant vector. */ @@ -506,7 +480,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) rtx first = XVECEXP (constant, 0, 0); - for (int i = 1; i < nunits; ++i) + for (int i = 1; i < GET_MODE_NUNITS (mode); ++i) { rtx tmp = XVECEXP (constant, 0, i); /* Vector duplicate value. */ @@ -517,6 +491,44 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) return first; } +/* OP is a memref of CONST_VECTOR, return scalar constant mem + if CONST_VECTOR is a vec_duplicate, else return NULL. */ +static rtx +ix86_broadcast_from_constant (machine_mode mode, rtx op) +{ + int nunits = GET_MODE_NUNITS (mode); + if (nunits < 2) + return nullptr; + + /* Don't use integer vector broadcast if we can't move from GPR to SSE + register directly. */ + if (!TARGET_INTER_UNIT_MOVES_TO_VEC + && INTEGRAL_MODE_P (mode)) + return nullptr; + + if (GET_MODE_INNER (mode) == TImode) + return nullptr; + + rtx constant = get_pool_constant (XEXP (op, 0)); + if (GET_CODE (constant) != CONST_VECTOR) + return nullptr; + + return ix86_constant_broadcast (constant, mode); +} + +/* Return true if OP is a non-uniform CONST_VECTOR. */ + +bool +non_uniform_const_vector_p (rtx op, machine_mode mode) +{ + /* Allow non-uniform CONST_VECTOR only in the combine pass since other + RTL optimizers work better with constant pool. */ + return (current_pass + && current_pass->tv_id == TV_COMBINE + && GET_CODE (op) == CONST_VECTOR + && ix86_constant_broadcast (op, mode) == nullptr); +} + void ix86_expand_vector_move (machine_mode mode, rtx operands[]) { diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 2fd13074c81..91d4c7cefb8 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -58,6 +58,7 @@ extern int standard_80387_constant_p (rtx); extern const char *standard_80387_constant_opcode (rtx); extern rtx standard_80387_constant_rtx (int); extern int standard_sse_constant_p (rtx, machine_mode); +extern bool non_uniform_const_vector_p (rtx, machine_mode); extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *); extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx); extern bool ix86_pre_reload_split (void); diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 9321f332ef9..435b22855c8 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1176,6 +1176,13 @@ (define_predicate "nonimmediate_or_sse_const_operand" (ior (match_operand 0 "nonimmediate_operand") (match_test "standard_sse_constant_p (op, mode)"))) +;; Return true when OP is nonimmediate, standard SSE constant, or +;; non-uniform CONST_VECTOR. +(define_predicate "nonimmediate_or_sse_const_vector_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_test "standard_sse_constant_p (op, mode)") + (match_test "non_uniform_const_vector_p (op, mode)"))) + ;; Return true if OP is a register or a zero. (define_predicate "reg_or_0_operand" (ior (match_operand 0 "register_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 13889687793..416ddcfcacc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1075,7 +1075,7 @@ (define_expand "mov<mode>" (define_insn "mov<mode>_internal" [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,v ,m") - (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" + (match_operand:VMOVE 1 "nonimmediate_or_sse_const_vector_operand" " C,<sseconstm1>,vm,v"))] "TARGET_SSE && (register_operand (operands[0], <MODE>mode) diff --git a/gcc/testsuite/gcc.target/i386/pr43147.c b/gcc/testsuite/gcc.target/i386/pr43147.c new file mode 100644 index 00000000000..3c30f917c06 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr43147.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-final { scan-assembler "movaps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +#include <x86intrin.h> + +__m128 +foo (void) +{ + __m128 m = _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f); + m = _mm_shuffle_ps(m, m, 0xC9); + m = _mm_shuffle_ps(m, m, 0x2D); + return m; +} -- 2.31.1