>On 9/27/23 03:38, juzhe.zh...@rivai.ai wrote: >> >> Why add `can_create_pseudo_p ()` here? this will split after reload, >>>>but we forbid that pattern between reload and split2? >> >> I have no ideal. Some fortran tests just need recognization of >> mem-to-mem pattern before RA >> I don't know the reason. >But isn't that the key to understanding what's going on here?
Jeff law: >There is nothing special about Fortran here. Whatever problem this is >working around will almost certainly show up again in other, >non-Fortran, contexts. I also ran into the problem of the mov<mode>_mem_to_mem pattern making ira combine the instructions output by my cpymem patch into an unsplittable must-split pattern. And just plain removing the mem-to-mem pattern gives a newlib build failure. The underlying problem is in the declaration of riscv_vector::legitimize_move . The function gets passed by value a source and destination, and it either emits (instructions for) a move and returns true, or does checks and/or preparation statements and a modifications of its *copy of* src and returns. IIRC, we don't want C++ pass-by-reference syntax in GCC source, so the solution should be the tried-and trusted method of passing an explicit pointer to rtl that we want modified. I have attached a patch, regression tested for: riscv-sim riscv-sim/-march=rv32gcv_zfh/-mabi=ilp32d/-ftree-vectorize/--param=riscv-autovec-preference=scalable riscv-sim/-march=rv32imac/-mabi=ilp32 riscv-sim/-march=rv64gcv_zfh_zvfh_zba_zbb_zbc_zicond_zicboz_zawrs/-mabi=lp64d/-ftree-vectorize/--param=riscv-autovec-preference=scalable riscv-sim/-march=rv64imac/-mabi=lp64 Incidentally, the optimization that the mov<mode>_mem_to_mem made was invalid, as it didn't check alignments, nor that the target supports unaligned accesses with a fast hardware implementation. I think this optimization - with the appropriate check for hardware support - should be put into the non-vector path of the cpymem expander, simply as a relaxation of the alignment test for using scalars values spanning multiple addressable units.
Make riscv_vector::legitimize_move adjust SRC in the caller. 2023-09-29 Joern Rennecke <joern.renne...@embecosm.com> Juzhe-Zhong <juzhe.zh...@rivai.ai> PR target/111566 gcc/ * config/riscv/riscv-protos.h (riscv_vector::legitimize_move): Change second parameter to rtx *. * config/riscv/riscv-v.cc (risv_vector::legitimize_move): Likewise. * config/riscv/vector.md: Changed callers of riscv_vector::legitimize_move. * config/riscv/vector.md (*mov<mode>_mem_to_mem): Remove. gcc/testsuite/ * gcc.target/riscv/rvv/autovec/vls/mov-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/mov-10.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-7.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-8.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/mov-9.c: Ditto.1 * gcc.target/riscv/rvv/autovec/vls/mov-2.c: Removed. * gcc.target/riscv/rvv/autovec/vls/mov-4.c: Removed. * gcc.target/riscv/rvv/autovec/vls/mov-6.c: Removed. * gcc.target/riscv/rvv/fortran/pr111566.f90: New test. Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 368982a447b..af5baf37e6a 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -421,7 +421,7 @@ rtx expand_builtin (unsigned int, tree, rtx); bool check_builtin_call (location_t, vec<location_t>, unsigned int, tree, unsigned int, tree *); bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); -bool legitimize_move (rtx, rtx); +bool legitimize_move (rtx, rtx *); void emit_vlmax_vsetvl (machine_mode, rtx); void emit_hard_vlmax_vsetvl (machine_mode, rtx); void emit_vlmax_insn (unsigned, unsigned, rtx *); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 26700cfc732..097457562bd 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1217,10 +1217,12 @@ get_frm_mode (rtx operand) } /* Expand a pre-RA RVV data move from SRC to DEST. - It expands move for RVV fractional vector modes. */ + It expands move for RVV fractional vector modes. + Return true if the move as already been emitted. */ bool -legitimize_move (rtx dest, rtx src) +legitimize_move (rtx dest, rtx *srcp) { + rtx src = *srcp; machine_mode mode = GET_MODE (dest); if (CONST_VECTOR_P (src)) { @@ -1238,7 +1240,7 @@ legitimize_move (rtx dest, rtx src) { /* Need to force register if mem <- !reg. */ if (MEM_P (dest) && !REG_P (src)) - src = force_reg (mode, src); + *srcp = force_reg (mode, src); return false; } @@ -1269,7 +1271,7 @@ legitimize_move (rtx dest, rtx src) { /* Need to force register if mem <- !reg. */ if (MEM_P (dest) && !REG_P (src)) - src = force_reg (mode, src); + *srcp = force_reg (mode, src); return false; } diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index d5300a33946..cf5c0a40257 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1037,7 +1037,7 @@ before spilling. The clobber scratch is used by spilling fractional registers in IRA/LRA so it's too early. */ - if (riscv_vector::legitimize_move (operands[0], operands[1])) + if (riscv_vector::legitimize_move (operands[0], &operands[1])) DONE; }) @@ -1093,7 +1093,7 @@ (match_operand:VB 1 "general_operand"))] "TARGET_VECTOR" { - if (riscv_vector::legitimize_move (operands[0], operands[1])) + if (riscv_vector::legitimize_move (operands[0], &operands[1])) DONE; }) @@ -1218,47 +1218,10 @@ (match_operand:VLS_AVL_IMM 1 "general_operand"))] "TARGET_VECTOR" { - if (riscv_vector::legitimize_move (operands[0], operands[1])) + if (riscv_vector::legitimize_move (operands[0], &operands[1])) DONE; }) -(define_insn_and_split "*mov<mode>_mem_to_mem" - [(set (match_operand:VLS_AVL_IMM 0 "memory_operand") - (match_operand:VLS_AVL_IMM 1 "memory_operand"))] - "TARGET_VECTOR && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] - { - if (GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD) - { - /* Opitmize the following case: - - typedef int8_t v2qi __attribute__ ((vector_size (2))); - v2qi v = *(v2qi*)in; - *(v2qi*)out = v; - - We prefer scalar load/store instead of vle.v/vse.v when - the VLS modes size is smaller scalar mode. */ - machine_mode mode; - unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant (); - if (FLOAT_MODE_P (<MODE>mode)) - mode = mode_for_size (size, MODE_FLOAT, 0).require (); - else - mode = mode_for_size (size, MODE_INT, 0).require (); - emit_move_insn (gen_lowpart (mode, operands[0]), - gen_lowpart (mode, operands[1])); - } - else - { - operands[1] = force_reg (<MODE>mode, operands[1]); - emit_move_insn (operands[0], operands[1]); - } - DONE; - } - [(set_attr "type" "vmov")] -) - (define_insn_and_split "*mov<mode>" [(set (match_operand:VLS_AVL_IMM 0 "reg_or_mem_operand" "=vr, m, vr") (match_operand:VLS_AVL_IMM 1 "reg_or_mem_operand" " m,vr, vr"))] @@ -1274,7 +1237,7 @@ || !register_operand (operands[1], <MODE>mode))" [(const_int 0)] { - bool ok_p = riscv_vector::legitimize_move (operands[0], operands[1]); + bool ok_p = riscv_vector::legitimize_move (operands[0], &operands[1]); gcc_assert (ok_p); DONE; } @@ -1286,7 +1249,7 @@ (match_operand:VLS_AVL_REG 1 "general_operand"))] "TARGET_VECTOR" { - bool ok_p = riscv_vector::legitimize_move (operands[0], operands[1]); + bool ok_p = riscv_vector::legitimize_move (operands[0], &operands[1]); gcc_assert (ok_p); DONE; }) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-1.c index aedf98819bb..24bb7240db8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-1.c @@ -4,54 +4,6 @@ #include "def.h" -/* -** mov0: -** lbu\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sb\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (int8_t *in, int8_t *out) -{ - v1qi v = *(v1qi*)in; - *(v1qi*)out = v; -} - -/* -** mov1: -** lhu\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sh\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov1 (int8_t *in, int8_t *out) -{ - v2qi v = *(v2qi*)in; - *(v2qi*)out = v; -} - -/* -** mov2: -** lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov2 (int8_t *in, int8_t *out) -{ - v4qi v = *(v4qi*)in; - *(v4qi*)out = v; -} - -/* -** mov3: -** ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov3 (int8_t *in, int8_t *out) -{ - v8qi v = *(v8qi*)in; - *(v8qi*)out = v; -} - /* ** mov4: ** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-10.c index 5e9615412b7..cae96b3be3f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-10.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-10.c @@ -4,18 +4,6 @@ #include "def.h" -/* -** mov0: -** fld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** fsd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (double *in, double *out) -{ - v1df v = *(v1df*)in; - *(v1df*)out = v; -} - /* ** mov1: ** vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-2.c deleted file mode 100644 index 10ae1972db7..00000000000 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-2.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ -/* { dg-final { check-function-bodies "**" "" } } */ - -#include "def.h" - -/* -** mov: -** lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** lw\s+[a-x0-9]+,4\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,4\s*\([a-x0-9]+\) -** ret -*/ -void mov (int8_t *in, int8_t *out) -{ - v8qi v = *(v8qi*)in; - *(v8qi*)out = v; -} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-3.c index f2880ae5e77..86ce22896c5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-3.c @@ -4,42 +4,6 @@ #include "def.h" -/* -** mov0: -** lhu\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sh\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (int16_t *in, int16_t *out) -{ - v1hi v = *(v1hi*)in; - *(v1hi*)out = v; -} - -/* -** mov1: -** lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov1 (int16_t *in, int16_t *out) -{ - v2hi v = *(v2hi*)in; - *(v2hi*)out = v; -} - -/* -** mov2: -** ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov2 (int16_t *in, int16_t *out) -{ - v4hi v = *(v4hi*)in; - *(v4hi*)out = v; -} - /* ** mov3: ** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-4.c deleted file mode 100644 index f81f1697d65..00000000000 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-4.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ -/* { dg-final { check-function-bodies "**" "" } } */ - -#include "def.h" - -/* -** mov: -** lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** lw\s+[a-x0-9]+,4\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,4\s*\([a-x0-9]+\) -** ret -*/ -void mov (int16_t *in, int16_t *out) -{ - v4hi v = *(v4hi*)in; - *(v4hi*)out = v; -} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-5.c index c30ed8f76f5..04475207966 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-5.c @@ -4,30 +4,6 @@ #include "def.h" -/* -** mov0: -** lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (int32_t *in, int32_t *out) -{ - v1si v = *(v1si*)in; - *(v1si*)out = v; -} - -/* -** mov1: -** ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov1 (int32_t *in, int32_t *out) -{ - v2si v = *(v2si*)in; - *(v2si*)out = v; -} - /* ** mov2: ** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-6.c deleted file mode 100644 index d6dbff1caa9..00000000000 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-6.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ -/* { dg-final { check-function-bodies "**" "" } } */ - -#include "def.h" - -/* -** mov: -** lw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** lw\s+[a-x0-9]+,4\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sw\s+[a-x0-9]+,4\s*\([a-x0-9]+\) -** ret -*/ -void mov (int32_t *in, int32_t *out) -{ - v2si v = *(v2si*)in; - *(v2si*)out = v; -} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-7.c index 46509e367c3..d0674a47a14 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-7.c @@ -4,18 +4,6 @@ #include "def.h" -/* -** mov0: -** ld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** sd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (int64_t *in, int64_t *out) -{ - v1di v = *(v1di*)in; - *(v1di*)out = v; -} - /* ** mov1: ** vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-8.c index 1cba7ddad94..b905c74d43b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-8.c @@ -4,42 +4,6 @@ #include "def.h" -/* -** mov0: -** flh\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** fsh\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (_Float16 *in, _Float16 *out) -{ - v1hf v = *(v1hf*)in; - *(v1hf*)out = v; -} - -/* -** mov1: -** flw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** fsw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov1 (_Float16 *in, _Float16 *out) -{ - v2hf v = *(v2hf*)in; - *(v2hf*)out = v; -} - -/* -** mov2: -** fld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** fsd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov2 (_Float16 *in, _Float16 *out) -{ - v4hf v = *(v4hf*)in; - *(v4hf*)out = v; -} - /* ** mov3: ** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-9.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-9.c index 0773f6a70f3..5f9bc052e97 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-9.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/mov-9.c @@ -4,30 +4,6 @@ #include "def.h" -/* -** mov0: -** flw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** fsw\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov0 (float *in, float *out) -{ - v1sf v = *(v1sf*)in; - *(v1sf*)out = v; -} - -/* -** mov1: -** fld\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** fsd\s+[a-x0-9]+,0\s*\([a-x0-9]+\) -** ret -*/ -void mov1 (float *in, float *out) -{ - v2sf v = *(v2sf*)in; - *(v2sf*)out = v; -} - /* ** mov2: ** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 new file mode 100644 index 00000000000..265e913b299 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 @@ -0,0 +1,31 @@ +! { dg-do compile } +! { dg-options "-march=rv64gcv -mabi=lp64d -Ofast -fallow-argument-mismatch -fmax-stack-var-size=65536 -S -std=legacy -w" } + +module a + integer,parameter :: SHR_KIND_R8 = selected_real_kind(12) +end module a +module b + use a, c => shr_kind_r8 +contains + subroutine d(cg , km, i1, i2) + real (c) ch(i2,km) + real (c) cg(4,i1:i2,km) + real dc(i2,km) + real(c) ci(i2,km) + real(c) cj(i2,km) + do k=2,ck + do i=i1,0 + cl = ci(i,k) *ci(i,1) / cj(i,k)+ch(i,1) + cm = cg(1,i,k) - min(e,cg(1,i,co)) + dc(i,k) = sign(cm, cl) + enddo + enddo + if ( cq == 0 ) then + do i=i1,i2 + if( cr <= cs ) then + cg= sign( min(ct, cg), cg) + endif + enddo + endif + end subroutine d +end module b