RISC-V's C-extension describes 2-byte instructions with special constraints. One of those constraints is that one of the sources/dest registers are equal (op will clobber one of it's operands). This patch adds support for combining simple sequences:
r1 = r2 + r3 (4 bytes) r2 DEAD r4 = r1 + r5 (4 bytes) r1 DEAD Combine pass now generates: r2 = r2 + r3 (2 bytes) r4 = r2 + r5 (4 bytes) r2 DEAD This change results in a ~150 Byte decrease in the linux kernel's compiled size (text: 5327254 Bytes -> 5327102 Bytes). I added this enforcement during the combine pass since it looks at the cost of certian expressions and can rely on the target to tell the pass that clobber-ops are cheaper than regular ops. The main thing holding this RFC back is the combine pass's behavior for sequences like this: b = a << 5; c = b + 2; Normally the combine pass modifies the RTL to be: c = (a << 5) + 2 before expanding it back to the original statement. With my changes, the RTL is prevented from being combined like that and instead results in RTL like this: c = 2 which is clearly wrong. I think that the next step would be to figure out where this re-expansion takes place and implement the same-register constraint there. However, I'm opening the RFC for any input: 1. Are there better ways to enforce same-register constraints during the combine pass other than declaring the source/dest register to be the same in RTL? Specifically, I'm concerned that this addition may restrict subsequent RTL pass optimizations. 2. Are there other concerns with implementing source-dest constraints within the combine pass? 3. Any other thoughts/input you have is welcome! 2022-03-10 Patrick O'Neill <patr...@rivosinc.com> * combine.cc: Add register equality replacement. * riscv.cc (riscv_insn_cost): Add in order to tell combine pass that clobber-ops are cheaper. * riscv.h: Add c extension argument macros. Signed-off-by: Patrick O'Neill <patr...@rivosinc.com> --- gcc/combine.cc | 71 +++++++++++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.cc | 42 +++++++++++++++++++++++ gcc/config/riscv/riscv.h | 7 ++++ 3 files changed, 120 insertions(+) diff --git a/gcc/combine.cc b/gcc/combine.cc index 8f06ee0e54f..be910f8c734 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -3280,6 +3280,77 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, i2_is_used = n_occurrences; } +/* Attempt to replace ops with clobber-ops. + If the target implements clobber ops (set r1 (plus (r1)(r2))) as cheaper, + this pattern allows the combine pass to optimize with that in mind. + NOTE: This conditional is not triggered in most cases. Ideally we would be + able to move it above the if (i2_is_used == 0), but that breaks the + testsuite. + See RFC blurb for more info. */ + if (!i0 && !i1 && i2 && i3 && GET_CODE(PATTERN(i2)) == SET && GET_CODE(SET_DEST(PATTERN(i2))) == REG + && GET_CODE(PATTERN(i3)) == SET + && (GET_RTX_CLASS(GET_CODE(SET_SRC(PATTERN(i3)))) == RTX_BIN_ARITH || GET_RTX_CLASS(GET_CODE(SET_SRC(PATTERN(i3)))) == RTX_COMM_ARITH || GET_RTX_CLASS(GET_CODE(SET_SRC(PATTERN(i3)))) == RTX_UNARY) + && GET_CODE(SET_DEST(PATTERN(i3))) == REG && REGNO(XEXP(SET_SRC(PATTERN(i3)), 0)) != REGNO(SET_DEST(PATTERN(i3)))) { + + rtx_code insn_class = GET_CODE(SET_SRC (PATTERN(i2))); + + if (GET_RTX_CLASS(insn_class) == RTX_BIN_ARITH || GET_RTX_CLASS(insn_class) == RTX_COMM_ARITH || GET_RTX_CLASS(insn_class) == RTX_UNARY) { + + rtx operand1 = XEXP (SET_SRC (PATTERN (i2)), 0); + rtx prior_reg = SET_DEST (PATTERN (i2)); + + if (GET_CODE(operand1) == REG + && GET_MODE(operand1) == GET_MODE(prior_reg) + && find_reg_note (i2, REG_DEAD, operand1) + && regno_use_in (REGNO(prior_reg), PATTERN(i3)) + && find_reg_note (i3, REG_DEAD, SET_DEST (PATTERN(i2)))) { + + // Now we have a dead operand register, and we know where the dest dies. + + // Remove the note declaring the register as dead + rtx note = find_reg_note (i2, REG_DEAD, operand1); + remove_note (i2, note); + + // Overwrite i2 dest with operand1 + rtx i2_dest = copy_rtx(operand1); + SUBST (SET_DEST (PATTERN (i2)), i2_dest); + + // Replace the previous i2 dest register with operand1 in i3 + rtx op1_copy = copy_rtx(operand1); + rtx new_src = simplify_replace_rtx(SET_SRC (PATTERN (i3)), prior_reg, op1_copy); + SUBST (SET_SRC (PATTERN (i3)), new_src); + + // Move the dest dead note to the new register + note = find_reg_note (i3, REG_DEAD, prior_reg); + if (note) { + remove_note (i3, note); + //add_reg_note (i3, REG_DEAD, op1_copy); + } + + newi2pat = PATTERN (i2); + newpat = PATTERN (i3); + + subst_insn = i3; + subst_low_luid = DF_INSN_LUID (i2); + added_sets_2 = added_sets_1 = added_sets_0 = 0; + i2dest = i2_dest; + i2dest_killed = dead_or_set_p (i2, i2dest); + + changed_i3_dest = false; + + i2_code_number = recog_for_combine (&newi2pat, i2, &new_i2_notes); + + if (i2_code_number >= 0) + insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes); + + if (insn_code_number >= 0) + swap_i2i3 = true; + + goto validate_replacement; + } + } + } + /* If we already got a failure, don't try to do more. Otherwise, try to substitute I1 if we have it. */ diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 7da9d377120..9f0bd59ac41 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -2160,6 +2160,46 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN } } +/* Helper for INSN_COST. + + Copied from arc.cc + + Per Segher Boessenkool: rtx_costs computes the cost for any rtx (an + insn, a set, a set source, any random piece of one). set_src_cost, + set_rtx_cost, etc. are helper functions that use that. + + Those functions do not work for parallels. Also, costs are not + additive like this simplified model assumes. Also, more complex + backends tend to miss many cases in their rtx_costs function. + + Many passes that want costs want to know the cost of a full insn. Like + combine. That's why I created insn_cost: it solves all of the above + problems. */ + +static int +riscv_insn_cost (rtx_insn *insn, bool speed) +{ + rtx pat = PATTERN (insn); + + if (TARGET_RVC && !speed) { + if (GET_CODE(pat) == SET && GET_CODE(SET_DEST(pat)) == REG) { + rtx src = SET_SRC(pat); + rtx dest = SET_DEST(pat); + if (GET_CODE(src) == PLUS && GET_CODE(XEXP(src, 0)) == REG && REGNO(XEXP(src, 0)) == REGNO(dest)) { + if (GET_CODE(XEXP(src, 1)) == REG) + return 2; + else if (GET_CODE(XEXP(src, 1)) == CONST_INT && CMPRESD_OPERAND(INTVAL(XEXP(src, 1)))) + return 2; + } else if (GET_CODE(src) == ASHIFT && GET_CODE(XEXP(src, 0)) == REG && REGNO(dest) == REGNO(XEXP(src, 0))) { + if (GET_CODE(XEXP(src, 1)) == CONST_INT && UNSIGNED_CMPRESD_OPERAND(INTVAL(XEXP(src, 1)))) + return 2; + } + } + } + + return pattern_cost (pat, speed); +} + /* Implement TARGET_ADDRESS_COST. */ static int @@ -5618,6 +5658,8 @@ riscv_asan_shadow_offset (void) #define TARGET_RTX_COSTS riscv_rtx_costs #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST riscv_address_cost +#undef TARGET_INSN_COST +#define TARGET_INSN_COST riscv_insn_cost #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START riscv_file_start diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 8a4d2cf7f85..be5e77b1394 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -522,6 +522,13 @@ enum reg_class #define SMALL_OPERAND(VALUE) \ ((unsigned HOST_WIDE_INT) (VALUE) + IMM_REACH/2 < IMM_REACH) +#define CMPRESD_OPERAND(VALUE) \ + (VALUE < 32 && VALUE >= -32) + +/* True if VALUE is an unsigned 5-bit number. */ +#define UNSIGNED_CMPRESD_OPERAND(VALUE) \ + (VALUE < 64 && VALUE >= 0) + /* True if VALUE can be loaded into a register using LUI. */ #define LUI_OPERAND(VALUE) \ -- 2.25.1