Committed, thanks!
On Tue, May 24, 2022 at 3:40 AM Philipp Tomsich <philipp.toms...@vrull.eu> wrote: > > Good catch! > > On Mon, 23 May 2022 at 20:12, Vineet Gupta <vine...@rivosinc.com> wrote: > > > Under extreme register pressure, compiler can use FP <--> int > > moves as a cheap alternate to spilling to memory. > > This was seen with SPEC2017 FP benchmark 507.cactu: > > ML_BSSN_Advect.cc:ML_BSSN_Advect_Body() > > > > | fmv.d.x fa5,s9 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1 > > | .LVL325: > > | ld s9,184(sp) # _12469, %sfp > > | ... > > | .LVL339: > > | fmv.x.d s4,fa5 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1 > > | > > > > The FMV instructions could be costlier (than stack spill) on certain > > micro-architectures, thus this needs to be a per-cpu tunable > > (default being to inhibit on all existing RV cpus). > > > > Testsuite run with new test reports 10 failures without the fix > > corresponding to the build variations of pr105666.c > > > > | === gcc Summary === > > | > > | # of expected passes 123318 (+10) > > | # of unexpected failures 34 (-10) > > | # of unexpected successes 4 > > | # of expected failures 780 > > | # of unresolved testcases 4 > > | # of unsupported tests 2796 > > > > gcc/Changelog: > > > > * config/riscv/riscv.cc: (struct riscv_tune_param): Add > > fmv_cost. > > (rocket_tune_info): Add default fmv_cost 8. > > (sifive_7_tune_info): Ditto. > > (thead_c906_tune_info): Ditto. > > (optimize_size_tune_info): Ditto. > > (riscv_register_move_cost): Use fmv_cost for int<->fp moves. > > > > gcc/testsuite/Changelog: > > > > * gcc.target/riscv/pr105666.c: New test. > > > > Signed-off-by: Vineet Gupta <vine...@rivosinc.com> > > --- > > gcc/config/riscv/riscv.cc | 9 ++++ > > gcc/testsuite/gcc.target/riscv/pr105666.c | 55 +++++++++++++++++++++++ > > 2 files changed, 64 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/riscv/pr105666.c > > > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > > index ee756aab6940..f3ac0d8865f0 100644 > > --- a/gcc/config/riscv/riscv.cc > > +++ b/gcc/config/riscv/riscv.cc > > @@ -220,6 +220,7 @@ struct riscv_tune_param > > unsigned short issue_rate; > > unsigned short branch_cost; > > unsigned short memory_cost; > > + unsigned short fmv_cost; > > bool slow_unaligned_access; > > }; > > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info > > = { > > 1, /* issue_rate */ > > 3, /* branch_cost */ > > 5, /* memory_cost */ > > + 8, /* fmv_cost */ > > true, /* > > slow_unaligned_access */ > > }; > > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param > > sifive_7_tune_info = { > > 2, /* issue_rate */ > > 4, /* branch_cost */ > > 3, /* memory_cost */ > > + 8, /* fmv_cost */ > > true, /* > > slow_unaligned_access */ > > }; > > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param > > thead_c906_tune_info = { > > 1, /* issue_rate */ > > 3, /* branch_cost */ > > 5, /* memory_cost */ > > + 8, /* fmv_cost */ > > false, /* slow_unaligned_access */ > > }; > > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param > > optimize_size_tune_info = { > > 1, /* issue_rate */ > > 1, /* branch_cost */ > > 2, /* memory_cost */ > > + 8, /* fmv_cost */ > > false, /* slow_unaligned_access */ > > }; > > > > @@ -4737,6 +4742,10 @@ static int > > riscv_register_move_cost (machine_mode mode, > > reg_class_t from, reg_class_t to) > > { > > + if ((from == FP_REGS && to == GR_REGS) || > > + (from == GR_REGS && to == FP_REGS)) > > + return tune_param->fmv_cost; > > + > > return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2; > > } > > > > diff --git a/gcc/testsuite/gcc.target/riscv/pr105666.c > > b/gcc/testsuite/gcc.target/riscv/pr105666.c > > new file mode 100644 > > index 000000000000..904f3bc0763f > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/pr105666.c > > @@ -0,0 +1,55 @@ > > +/* Shamelessly plugged off > > gcc/testsuite/gcc.c-torture/execute/pr28982a.c. > > + > > + The idea is to induce high register pressure for both int/fp registers > > + so that they spill. By default FMV instructions would be used to stash > > + int reg to a fp reg (and vice-versa) but that could be costlier than > > + spilling to stack. */ > > + > > +/* { dg-do compile } */ > > +/* { dg-options "-march=rv64g -ffast-math" } */ > > + > > +#define NITER 4 > > +#define NVARS 20 > > +#define MULTI(X) \ > > + X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \ > > + X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19) > > + > > +#define DECLAREI(INDEX) inc##INDEX = incs[INDEX] > > +#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 5 > > +#define LOOP(INDEX) result##INDEX += result##INDEX * (*ptr##INDEX), > > ptr##INDEX += inc##INDEX > > +#define COPYOUT(INDEX) results[INDEX] = result##INDEX > > + > > +double *ptrs[NVARS]; > > +double results[NVARS]; > > +int incs[NVARS]; > > + > > +void __attribute__((noinline)) > > +foo (int n) > > +{ > > + int MULTI (DECLAREI); > > + double MULTI (DECLAREF); > > + while (n--) > > + MULTI (LOOP); > > + MULTI (COPYOUT); > > +} > > + > > +double input[NITER * NVARS]; > > + > > +int > > +main (void) > > +{ > > + int i; > > + > > + for (i = 0; i < NVARS; i++) > > + ptrs[i] = input + i, incs[i] = i; > > + for (i = 0; i < NITER * NVARS; i++) > > + input[i] = i; > > + foo (NITER); > > + for (i = 0; i < NVARS; i++) > > + if (results[i] != i * NITER * (NITER + 1) / 2) > > + return 1; > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "\tfmv\\.d\\.x\t" } } */ > > +/* { dg-final { scan-assembler-not "\tfmv\\.x\\.d\t" } } */ > > -- > > 2.32.0 > > > >