https://gcc.gnu.org/g:095700c4cc6dece45f45ae7285b6523170f08953
commit r16-3480-g095700c4cc6dece45f45ae7285b6523170f08953 Author: Shreya Munnangi <smunnan...@ventanamicro.com> Date: Sun Aug 31 07:48:21 2025 -0600 [RISC-V] Improve initial RTL generation for SImode adds on rv64 So this is the next chunk of Shreya's work to adjust our add expanders. In this patch we're adding support for adding a 2*s12 immediate in SI for rv64. To recap, the basic idea is reduce our reliance on the define_insn_and_split that was added a year or so ago by synthesizing the more efficient sequence at expansion time. By handling this early rather than late the synthesized sequence participates in the various optimizer passes in the natural way. In contrast using the define_insn_and_split bypasses the cost modeling in combine and hides the synthesis until after reload as completed (which in turn leads to the problems seen in pr120811). This doesn't solve pr120811, but it is the last prerequisite patch before directly tackling pr120811. This has been bootstrapped & regression tested on the pioneer & bpi and been through the usual testing on riscv32-elf and riscv64-elf. Waiting on pre-commit CI before moving forward. gcc/ * config/riscv/riscv-protos.h (synthesize_add_extended): Prototype. * config/riscv/riscv.cc (synthesize_add_extended): New function. * config/riscv/riscv.md (addsi3): For RV64, try synthesize_add_extended. gcc/testsuite/ * gcc.target/riscv/add-synthesis-2.c: New test. Diff: --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv.cc | 86 ++++++++++++++++++++++++ gcc/config/riscv/riscv.md | 27 ++++---- gcc/testsuite/gcc.target/riscv/add-synthesis-2.c | 24 +++++++ 4 files changed, 125 insertions(+), 13 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 2d60a0ad44b3..46b256d63539 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -144,6 +144,7 @@ extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); extern bool synthesize_ior_xor (rtx_code, rtx [3]); extern bool synthesize_and (rtx [3]); extern bool synthesize_add (rtx [3]); +extern bool synthesize_add_extended (rtx [3]); #ifdef RTX_CODE extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 985fe67f8227..591122f91b8c 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -15466,6 +15466,92 @@ synthesize_add (rtx operands[3]) return true; } +/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2]. + + For 32-bit object cases with a 64-bit target. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + + +bool +synthesize_add_extended (rtx operands[3]) +{ + +/* If operands[2] is a 12-bit signed immediate, + no synthesis needs to be done. */ + + if (SMALL_OPERAND (INTVAL (operands[2]))) + return false; + + HOST_WIDE_INT ival = INTVAL (operands[2]); + int budget1 = riscv_const_insns (operands[2], true); + int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true); + +/* If operands[2] can be split into two 12-bit signed immediates, + split add into two adds. */ + + if (SUM_OF_TWO_S12 (ival)) + { + HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1); + + if (ival >= 0) + saturated = ~saturated; + + ival -= saturated; + + rtx temp = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated))); + temp = gen_lowpart (SImode, temp); + SUBREG_PROMOTED_VAR_P (temp) = 1; + SUBREG_PROMOTED_SET (temp, SRP_SIGNED); + emit_insn (gen_rtx_SET (operands[0], temp)); + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival))); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + return true; + } + + +/* If the negated value is cheaper to synthesize, subtract that from + operands[1]. */ + + if (budget2 < budget1) + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2])))); + + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_subsi3_extended (t, operands[1], tmp)); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + return true; + } + + rtx tsrc = force_reg (SImode, operands[2]); + rtx tdest = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc)); + tdest = gen_lowpart (SImode, tdest); + SUBREG_PROMOTED_VAR_P (tdest) = 1; + SUBREG_PROMOTED_SET (tdest, SRP_SIGNED); + emit_move_insn (operands[0], tdest); + return true; + +} + + /* HINT : argument specify the target cache diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 4718a75598a6..d34405cbc3c6 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -717,24 +717,25 @@ (match_operand:SI 2 "reg_or_const_int_operand")))] "" { + /* We may be able to find a faster sequence, if so, then we are + done. Otherwise let expansion continue normally. */ + if (CONST_INT_P (operands[2]) + && ((!TARGET_64BIT && synthesize_add (operands)) + || (TARGET_64BIT && synthesize_add_extended (operands)))) + DONE; + + /* Constants have already been handled already. */ if (TARGET_64BIT) { - rtx t = gen_reg_rtx (DImode); - - if (CONST_INT_P (operands[2]) && !SMALL_OPERAND (operands[2])) - operands[2] = force_reg (SImode, operands[2]); - emit_insn (gen_addsi3_extended (t, operands[1], operands[2])); - t = gen_lowpart (SImode, t); - SUBREG_PROMOTED_VAR_P (t) = 1; - SUBREG_PROMOTED_SET (t, SRP_SIGNED); - emit_move_insn (operands[0], t); + rtx tdest = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (tdest, operands[1], operands[2])); + tdest = gen_lowpart (SImode, tdest); + SUBREG_PROMOTED_VAR_P (tdest) = 1; + SUBREG_PROMOTED_SET (tdest, SRP_SIGNED); + emit_move_insn (operands[0], tdest); DONE; } - /* We may be able to find a faster sequence, if so, then we are - done. Otherwise let expansion continue normally. */ - if (CONST_INT_P (operands[2]) && synthesize_add (operands)) - DONE; }) (define_expand "adddi3" diff --git a/gcc/testsuite/gcc.target/riscv/add-synthesis-2.c b/gcc/testsuite/gcc.target/riscv/add-synthesis-2.c new file mode 100644 index 000000000000..a0476151ba49 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/add-synthesis-2.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target rv64 } } */ +/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64 } } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + + + +#define T(C) int foo_##C (int x) { return x + C; } +#define TM(C) int foo_M##C (int x) { return x + -C; } + +/* These cases were selected because they all can be synthesized + at expansion time without synthesizing the constant directly. + + That makes the assembler scan testing simpler. I've verified + by hand that cases that should synthesize the constant do in + fact still generate code that way. */ +T (2050) +T (4094) + +TM (2049) +TM (4096) + +/* We have 4/5 tests which should use shNadd insns and 4 + which used paired addi insns. */ +/* { dg-final { scan-assembler-times "addiw\t" 8 } } */