This patch aims at better support of CPSE instruction in cases where otherwise code like
TST Rn ; *cmpqi BREQ .+2 ; branch RJMP .Lm would be produced. As branch is not a single instruction, it cannot be skipped. peephole seems to be the only feature that can do this optimization. I tried a combine pattern and it works will and is straight forward, but then it's hard to detect the situations where TST is superfluous because some arithmetic instruct already set cc0. I also tried movqicc to emit code and to prefer jumps that please CPSE, but the middle end has it's own understanding of what the code should look like and it's merely impossible to work against that in the backend. Moreover there was changes (like for PR45416 to feature specific target) that turned out to give worse code for AVR. But as so often the "what the hell is AVR?" target is left behind. So here is the patch even though I really don't like peephole[2]. It passes without regressions and has a bit of code cleanup, too. Ok to apply? Johann * config/avr/avr-protos.h (lpm_reg_rtx, lpm_addr_reg_rtx, tmp_reg_rtx, zero_reg_rtx, all_regs_rtx, rampz_rtx): Make global. * config/avr/avr.c: Ditto. (avr_regnames): Remove because unused. * config/avr/avr.md (*cpse.ne): New peephole. (*cpse.eq): New peephole from former cpse peepholes.
Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 183529) +++ config/avr/avr.md (working copy) @@ -5010,27 +5010,61 @@ (define_peephole AS1 (jmp,%1)); }") -(define_peephole + +(define_peephole ; "*cpse.eq" [(set (cc0) - (compare (match_operand:QI 0 "register_operand" "") - (const_int 0))) + (compare (match_operand:QI 1 "register_operand" "r,r") + (match_operand:QI 2 "reg_or_0_operand" "r,L"))) (set (pc) - (if_then_else (eq (cc0) (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc)))] - "jump_over_one_insn_p (insn, operands[1])" - "cpse %0,__zero_reg__") + (if_then_else (eq (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "jump_over_one_insn_p (insn, operands[0])" + "@ + cpse %1,%2 + cpse %1,__zero_reg__") -(define_peephole +;; This peephole avoids code like +;; +;; TST Rn ; *cmpqi +;; BREQ .+2 ; branch +;; RJMP .Lm +;; +;; Notice that the peephole is always shorter than cmpqi + branch. +;; The reason to write it as peephole is that sequences like +;; +;; AND Rm, Rn +;; BRNE .La +;; +;; shall not be superseeded. With a respective combine pattern +;; the latter sequence would be +;; +;; AND Rm, Rn +;; CPSE Rm, __zero_reg__ +;; RJMP .La +;; +;; and thus longer and slower and not easy to be rolled back. + +(define_peephole ; "*cpse.ne" [(set (cc0) - (compare (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "register_operand" ""))) + (compare (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "reg_or_0_operand" ""))) (set (pc) - (if_then_else (eq (cc0) (const_int 0)) - (label_ref (match_operand 2 "" "")) + (if_then_else (ne (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) (pc)))] - "jump_over_one_insn_p (insn, operands[2])" - "cpse %0,%1") + "!AVR_HAVE_JMP_CALL + || !avr_current_device->errata_skip" + { + if (operands[2] == const0_rtx) + operands[2] = zero_reg_rtx; + + return 3 == avr_jump_mode (operands[0], insn) + ? "cpse %1,%2\;jmp %0" + : "cpse %1,%2\;rjmp %0"; + }) ;;pppppppppppppppppppppppppppppppppppppppppppppppppppp ;;prologue/epilogue support instructions Index: config/avr/avr-protos.h =================================================================== --- config/avr/avr-protos.h (revision 183529) +++ config/avr/avr-protos.h (working copy) @@ -124,6 +124,14 @@ extern bool avr_mem_flash_p (rtx); extern bool avr_mem_memx_p (rtx); extern bool avr_load_libgcc_p (rtx); extern bool avr_xload_libgcc_p (enum machine_mode); + +extern rtx lpm_reg_rtx; +extern rtx lpm_addr_reg_rtx; +extern rtx tmp_reg_rtx; +extern rtx zero_reg_rtx; +extern rtx all_regs_rtx[32]; +extern rtx rampz_rtx; + #endif /* RTX_CODE */ #ifdef REAL_VALUE_TYPE Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (revision 183529) +++ config/avr/avr.c (working copy) @@ -140,30 +140,33 @@ static bool avr_rtx_costs (rtx, int, int #define FIRST_CUM_REG 26 /* Implicit target register of LPM instruction (R0) */ -static GTY(()) rtx lpm_reg_rtx; +extern GTY(()) rtx lpm_reg_rtx; +rtx lpm_reg_rtx; /* (Implicit) address register of LPM instruction (R31:R30 = Z) */ -static GTY(()) rtx lpm_addr_reg_rtx; +extern GTY(()) rtx lpm_addr_reg_rtx; +rtx lpm_addr_reg_rtx; -/* Temporary register RTX (gen_rtx_REG (QImode, TMP_REGNO)) */ -static GTY(()) rtx tmp_reg_rtx; +/* Temporary register RTX (reg:QI TMP_REGNO) */ +extern GTY(()) rtx tmp_reg_rtx; +rtx tmp_reg_rtx; + +/* Zeroed register RTX (reg:QI ZERO_REGNO) */ +extern GTY(()) rtx zero_reg_rtx; +rtx zero_reg_rtx; -/* Zeroed register RTX (gen_rtx_REG (QImode, ZERO_REGNO)) */ -static GTY(()) rtx zero_reg_rtx; +/* RTXs for all general purpose registers as QImode */ +extern GTY(()) rtx all_regs_rtx[32]; +rtx all_regs_rtx[32]; /* RAMPZ special function register */ -static GTY(()) rtx rampz_rtx; +extern GTY(()) rtx rampz_rtx; +rtx rampz_rtx; /* RTX containing the strings "" and "e", respectively */ static GTY(()) rtx xstring_empty; static GTY(()) rtx xstring_e; -/* RTXs for all general purpose registers as QImode */ -static GTY(()) rtx all_regs_rtx[32]; - -/* AVR register names {"r0", "r1", ..., "r31"} */ -static const char *const avr_regnames[] = REGISTER_NAMES; - /* Preprocessor macros to define depending on MCU type. */ const char *avr_extra_arch_macro;