From: "dragan.mladjenovic" <dragan.mladjeno...@rt-rk.com> This workaround adds mfuse-vect-init option which causes the back-end to emit a single load for the vect_init if all the init elements come from the consecutive memory locations and are in the right order.
gcc/ * config/mips/mips.cc (mips_fuse_vect_init_p): New function. (mips_expand_vector_init): Detect init sequence that can be fused into a single load. * config/mips/mips.opt (mfuse-vect-init): New option. gcc/testsuite/ * gcc.target/mips/msa-fuse-vect-init.c: New file. Cherry-picked 4f440a87ad32b3549be8a0b89900d656ac70d4f8 and 1eb9d22dc480c962027eed522e0b26d0ebbd3e0b from https://github.com/MIPS/gcc Signed-off-by: Dragan Mladjenovic <dragan.mladjeno...@rt-rk.com> Signed-off-by: Faraz Shahbazker <fshahbaz...@wavecomp.com> Signed-off-by: Aleksandar Rakic <aleksandar.ra...@htecgroup.com> --- gcc/config/mips/mips.cc | 61 +++++++++++++++++++ gcc/config/mips/mips.opt | 3 + .../gcc.target/mips/msa-fuse-vect-init.c | 18 ++++++ 3 files changed, 82 insertions(+) create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index bd62b8b7823..51d9812151a 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -24873,6 +24873,57 @@ mips_expand_vi_general (machine_mode vmode, machine_mode imode, emit_move_insn (target, mem); } +/* Return true if elements of vector initialization list should be loaded + via single "fused" vector load. */ + +bool +mips_fuse_vect_init_p (machine_mode imode, unsigned nelt, rtx vals) +{ + unsigned i; + rtx base; + rtx base1; + rtx first; + rtx next; + HOST_WIDE_INT offset; + HOST_WIDE_INT offset1; + unsigned min_align = GET_MODE_BITSIZE (imode); + unsigned step_size = GET_MODE_SIZE (imode); + + if (!flag_fuse_vect_init) + return false; + + first = XVECEXP (vals, 0, 0); + + if (MEM_VOLATILE_P (first)) + return false; + + if (MEM_ALIGN (first) < min_align) + return false; + + if (GET_MODE (first) != imode) + return false; + + mips_split_plus (XEXP (first, 0), &base, &offset); + + if (!REG_P (base)) + return false; + + for (i = 1; i < nelt; ++i) + { + next = XVECEXP (vals, 0, i); + if (MEM_VOLATILE_P (next) + || MEM_ALIGN (next) < min_align + || GET_MODE (next) != imode) + return false; + mips_split_plus (XEXP (next, 0), &base1, &offset1); + if (!rtx_equal_p (base, base1) || (offset1 - offset) != step_size) + return false; + offset = offset1; + } + + return true; +} + /* Expand a vector initialization. */ void @@ -24883,6 +24934,7 @@ mips_expand_vector_init (rtx target, rtx vals) unsigned i, nelt = GET_MODE_NUNITS (vmode); unsigned nvar = 0, one_var = -1u; bool all_same = true; + bool all_mem = true; rtx x; for (i = 0; i < nelt; ++i) @@ -24890,6 +24942,8 @@ mips_expand_vector_init (rtx target, rtx vals) x = XVECEXP (vals, 0, i); if (!mips_constant_elt_p (x)) nvar++, one_var = i; + if (!MEM_P (x)) + all_mem = false; if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) all_same = false; } @@ -24950,6 +25004,13 @@ mips_expand_vector_init (rtx target, rtx vals) } else { + if (all_mem && mips_fuse_vect_init_p (imode, nelt, vals)) + { + rtx mem = widen_memory_access (XVECEXP (vals, 0, 0), vmode, 0); + emit_move_insn (target, mem); + return; + } + emit_move_insn (target, CONST0_RTX (vmode)); for (i = 0; i < nelt; ++i) diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index d162702c220..be347155286 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -576,3 +576,6 @@ Allow inlining even if the compression flags differ between caller and callee. msched-weight Target Var(TARGET_SCHED_WEIGHT) Undocumented + +mfuse-vect-init +Target Var(flag_fuse_vect_init) Undocumented Init(-1) diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c new file mode 100644 index 00000000000..faa1ff4eee6 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-mfp64 -mhard-float -mmsa" } */ +/* { dg-additional-options "-mfuse-vect-init" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */ + +typedef int v4i32 __attribute__ ((vector_size(16))); + +void +copy (int* src, v4i32* dst) +{ + v4i32 chunk = (v4i32){src[0], src[1], src[2], src[3]}; + dst[0] = chunk; +} + +/* { dg-final { scan-assembler-not "insert" } } */ +/* { dg-final { scan-assembler-times "\tld\\\.w" 1 } } */ +/* { dg-final { scan-assembler-times "\tst\\\.w" 1 } } */ + -- 2.34.1