https://gcc.gnu.org/g:94355acc2debe03eb3b0a85229e340675a1ff6bd
commit r15-8674-g94355acc2debe03eb3b0a85229e340675a1ff6bd Author: Georg-Johann Lay <a...@gjlay.de> Date: Sat Mar 15 20:53:52 2025 +0100 AVR: target/119421 Better optimize some bit operations. There are occasions where knowledge about nonzero bits makes some optimizations possible. For example, Rd |= Rn << Off can be implemented as SBRC Rn, 0 ORI Rd, 1 << Off when Rn in { 0, 1 }, i.e. nonzero_bits (Rn) == 1. This patch adds some patterns that exploit nonzero_bits() in some combiner patterns. As insn conditions are not supposed to contain nonzero_bits(), the patch splits such insns right after pass insn combine. PR target/119421 gcc/ * config/avr/avr.opt (-muse-nonzero-bits): New option. * config/avr/avr-protos.h (avr_nonzero_bits_lsr_operands_p): New. (make_avr_pass_split_nzb): New. * config/avr/avr.cc (avr_nonzero_bits_lsr_operands_p): New function. (avr_rtx_costs_1): Return costs for the new insns. * config/avr/avr.md (nzb): New insn attribute. (*nzb=1.<code>...): New insns to better support some bit operations for <code> in AND, IOR, XOR. * config/avr/avr-passes.def (avr_pass_split_nzb): Insert pass atfer combine. * config/avr/avr-passes.cc (avr_pass_data_split_nzb). New pass data. (avr_pass_split_nzb): New pass. (make_avr_pass_split_nzb): New function. * common/config/avr/avr-common.cc (avr_option_optimization_table): Enable -muse-nonzero-bits for -O2 and higher. * doc/invoke.texi (AVR Options): Document -muse-nonzero-bits. gcc/testsuite/ * gcc.target/avr/torture/pr119421-sreg.c: New test. Diff: --- gcc/common/config/avr/avr-common.cc | 1 + gcc/config/avr/avr-passes.cc | 71 +++++ gcc/config/avr/avr-passes.def | 12 + gcc/config/avr/avr-protos.h | 2 + gcc/config/avr/avr.cc | 106 ++++++++ gcc/config/avr/avr.md | 215 +++++++++++++++ gcc/config/avr/avr.opt | 4 + gcc/doc/invoke.texi | 9 +- .../gcc.target/avr/torture/pr119421-sreg.c | 301 +++++++++++++++++++++ 9 files changed, 719 insertions(+), 2 deletions(-) diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc index 06c6cc856d3c..203a96528186 100644 --- a/gcc/common/config/avr/avr-common.cc +++ b/gcc/common/config/avr/avr-common.cc @@ -42,6 +42,7 @@ static const struct default_options avr_option_optimization_table[] = { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 }, { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_muse_nonzero_bits, NULL, 1 }, // Stick to the "old" placement of the subreg lowering pass. { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 }, /* Allow optimizer to introduce store data races. This used to be the diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index 184619af6cb4..2c21e7be7aba 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -29,6 +29,7 @@ #include "target.h" #include "rtl.h" #include "tree.h" +#include "diagnostic-core.h" #include "cfghooks.h" #include "cfganal.h" #include "df.h" @@ -4846,6 +4847,70 @@ avr_pass_fuse_add::execute1 (function *func) } + +////////////////////////////////////////////////////////////////////////////// +// Split insns with nonzero_bits() after combine. + +static const pass_data avr_pass_data_split_nzb = +{ + RTL_PASS, // type + "", // name (will be patched) + OPTGROUP_NONE, // optinfo_flags + TV_DF_SCAN, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + 0 // todo_flags_finish +}; + +class avr_pass_split_nzb : public rtl_opt_pass +{ +public: + avr_pass_split_nzb (gcc::context *ctxt, const char *name) + : rtl_opt_pass (avr_pass_data_split_nzb, ctxt) + { + this->name = name; + } + + unsigned int execute (function *) final override + { + if (avropt_use_nonzero_bits) + split_nzb_insns (); + return 0; + } + + void split_nzb_insns (); + +}; // avr_pass_split_nzb + + +void +avr_pass_split_nzb::split_nzb_insns () +{ + rtx_insn *next; + + for (rtx_insn *insn = get_insns (); insn; insn = next) + { + next = NEXT_INSN (insn); + + if (INSN_P (insn) + && single_set (insn) + && get_attr_nzb (insn) == NZB_YES) + { + rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/); + + // The nonzero_bits() insns *must* split. If not: ICE. + if (last == insn) + { + debug_rtx (insn); + internal_error ("failed to split insn"); + } + } + } +} + + ////////////////////////////////////////////////////////////////////////////// // Split shift insns after peephole2 / befor avr-fuse-move. @@ -5645,6 +5710,12 @@ make_avr_pass_casesi (gcc::context *ctxt) return new avr_pass_casesi (ctxt, "avr-casesi"); } +rtl_opt_pass * +make_avr_pass_split_nzb (gcc::context *ctxt) +{ + return new avr_pass_split_nzb (ctxt, "avr-split-nzb"); +} + // Try to replace 2 cbranch insns with 1 comparison and 2 branches. rtl_opt_pass * diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index 091005e3b948..eb60a93eeeb0 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -74,6 +74,18 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes); INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi); +/* Some combine insns have nonzero_bits() in their condition, though insns + should not use such stuff in their condition. Therefore, we split such + insn into something without nonzero_bits() in their condition right after + insn combine. + + Since neither split_all_insns() nor split_all_insns_noflow() work at that + point (presumably since there are splits involving branches), we split + respective insns (and only such insns) by hand. Respective insns are + tagged with insn attribute nzb = "yes" so that they are easy to spot. */ + +INSERT_PASS_AFTER (pass_combine, 1, avr_pass_split_nzb); + /* If-else decision trees generated for switch / case may produce sequences like diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 83137c7f6f63..ca30136797dd 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -136,6 +136,7 @@ extern bool reg_unused_after (rtx_insn *insn, rtx reg); extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0); extern bool test_hard_reg_class (enum reg_class rclass, rtx x); extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest); +extern bool avr_nonzero_bits_lsr_operands_p (rtx_code, rtx *); extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand, int num_operands); @@ -205,6 +206,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context *); extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *); extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *); extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *); +extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *); extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *); #ifdef RTX_CODE extern bool avr_casei_sequence_check_operands (rtx *xop); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 0ce06a1e580a..d94df84037c6 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -12706,6 +12706,50 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } } + // Insns with nonzero_bits() == 1 in the condition. + if (avropt_use_nonzero_bits + && mode == QImode + && (code == AND || code == IOR || code == XOR) + && REG_P (XEXP (x, 1))) + { + // "*nzb=1.<code>.lsr_split" + // "*nzb=1.<code>.lsr.not_split" + bool is_nzb = (GET_CODE (XEXP (x, 0)) == LSHIFTRT + && (REG_P (XEXP (XEXP (x, 0), 0)) + || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR) + && const_0_to_7_operand (XEXP (XEXP (x, 0), 1), QImode)); + // "*nzb=1.<code>.zerox_split" + // "*nzb=1.<code>.zerox.not_split" + is_nzb |= (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT + && (REG_P (XEXP (XEXP (x, 0), 0)) + || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR) + && const1_operand (XEXP (XEXP (x, 0), 1), QImode) + && const_0_to_7_operand (XEXP (XEXP (x, 0), 2), QImode)); + // "*nzb=1.<code>.ge0_split" + is_nzb |= (GET_CODE (XEXP (x, 0)) == GE + && REG_P (XEXP (XEXP (x, 0), 0)) + && const0_operand (XEXP (XEXP (x, 0), 1), QImode)); + if (is_nzb) + { + *total = COSTS_N_INSNS (code == XOR ? 3 : 2); + return true; + } + } + + // Insn "*nzb=1.ior.ashift_split" with nonzero_bits() == 1 in the condition. + if (avropt_use_nonzero_bits + && mode == QImode + && code == IOR + && REG_P (XEXP (x, 1)) + && GET_CODE (XEXP (x, 0)) == ASHIFT + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + *total = COSTS_N_INSNS (2); + return true; + } + + switch (code) { case CONST_INT: @@ -13684,6 +13728,28 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); return true; + case GE: + if (mode == QImode + && REG_P (XEXP (x, 0)) + && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (3); + return true; + } + break; + + case ZERO_EXTRACT: + if (mode == QImode + && REG_P (XEXP (x, 0)) + && XEXP (x, 1) == const1_rtx + && CONST_INT_P (XEXP (x, 2))) + { + int bpos = INTVAL (XEXP (x, 2)); + *total = COSTS_N_INSNS (bpos == 0 ? 1 : bpos == 1 ? 2 : 3); + return true; + } + break; + case COMPARE: switch (GET_MODE (XEXP (x, 0))) { @@ -15171,6 +15237,46 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, } +/* A helper for the insn condition of "*nzb=1.<code>.lsr[.not]_split" + where <code> is AND, IOR or XOR. Return true when + + OP[0] <code>= OP[1] >> OP[2] + + can be performed by means of the code of "*nzb=1.<code>.zerox", i.e. + + OP[0] <code>= OP[1].OP[2] + + For example, when OP[0] is in { 0, 1 }, then R24 &= R10.4 + can be performed by means of SBRS R10,4 $ CLR R24. + Notice that the constraint of OP[3] is "0". */ + +bool +avr_nonzero_bits_lsr_operands_p (rtx_code code, rtx *op) +{ + if (reload_completed) + return false; + + const auto offs = INTVAL (op[2]); + const auto op1_non0 = nonzero_bits (op[1], QImode); + const auto op3_non0 = nonzero_bits (op[3], QImode); + + switch (code) + { + default: + gcc_unreachable (); + + case IOR: + case XOR: + return op1_non0 >> offs == 1; + + case AND: + return op3_non0 == 1; + } + + return false; +} + + /* Worker function for cpymemhi expander. XOP[0] Destination as MEM:BLK XOP[1] Source " " diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 06e31aa7d72d..1c4e44dcfe41 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -84,6 +84,7 @@ [UNSPEC_STRLEN UNSPEC_CPYMEM UNSPEC_INDEX_JMP + UNSPEC_NZB UNSPEC_FMUL UNSPEC_FMULS UNSPEC_FMULSU @@ -175,6 +176,10 @@ no" (const_string "no")) +(define_attr "nzb" + "yes, no" + (const_string "no")) + ;; Flavours of instruction set architecture (ISA), used in enabled attribute ;; mov : ISA has no MOVW movw : ISA has MOVW @@ -10916,6 +10921,216 @@ DONE; }) +;; Patterns for -muse-nonzero-bits use nonzero_bits() in their condition, +;; which makes possible some more optimizations. +;; Since combine may add clobber of REG_CC, we must make sure that there are +;; no other routes to synthesize such patterns. We use an UNSPEC for that. +;; As insns are not supposed to use stuff like nonzero_bits() in their +;; condition, we split the insns right after reload. For CFG reasons we have +;; to do the splits by hand in avr_pass_split_nzb. All insns that must be +;; split by that pass must have insn attribute "nzb" set to "yes". Moreover, +;; the insns to split must be single_sets and must not touch control flow. + +(define_code_attr nzb_constr_rdr [(and "r") (ior "d") (xor "r")]) +(define_code_attr nzb_use1_nnr [(and "n") (ior "n") (xor "r")]) + +(define_insn_and_split "*nzb=1.<code>.zerox_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && (<CODE> == IOR || <CODE> == XOR + || nonzero_bits (operands[3], QImode) == 1)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (match_dup 1) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn "*nzb=1.<code>.zerox" + [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>") + (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (unspec:QI [(match_operand:QI 3 "register_operand" "0") + ] UNSPEC_NZB))) + (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>")) + (clobber (reg:CC REG_CC))] + "optimize && avropt_use_nonzero_bits" + { + if (<CODE> == AND) + return "sbrs %1,%2\;clr %0"; + else if (<CODE> == IOR) + return "sbrc %1,%2\;ori %0,1"; + else if (<CODE> == XOR) + return "sbrc %1,%2\;eor %0,%4"; + else + gcc_unreachable (); + } + [(set_attr "length" "2")]) + +(define_insn_and_split "*nzb=1.<code>.lsr_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (lshiftrt:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (match_dup 1) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn_and_split "*nzb=1.<code>.zerox.not_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (zero_extract:QI (xor:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 4 "const_int_operand")) + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4])) + && (<CODE> == IOR + || nonzero_bits (operands[3], QImode) == 1)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + ; "*nzb=1.<code>.zerox.not" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (not:QI (match_dup 1)) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn_and_split "*nzb=1.<code>.lsr.not_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (lshiftrt:QI (xor:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 4 "const_int_operand")) + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4])) + && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + ; "*nzb=1.<code>.zerox.not" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (not:QI (match_dup 1)) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn_and_split "*nzb=1.<code>.ge0_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (ge:QI (match_operand:QI 1 "register_operand") + (const_int 0)) + (match_operand:QI 2 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && (<CODE> == IOR || <CODE> == XOR + || nonzero_bits (operands[2], QImode) == 1)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + ; "*nzb=1.<code>.zerox.not" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (not:QI (match_dup 1)) + (const_int 1) + (const_int 7)) + (unspec:QI [(match_dup 2) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn "*nzb=1.<code>.zerox.not" + [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>") + (bitop:QI (zero_extract:QI (not:QI (match_operand:QI 1 "register_operand" "r")) + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (unspec:QI [(match_operand:QI 3 "register_operand" "0") + ] UNSPEC_NZB))) + (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>")) + (clobber (reg:CC REG_CC))] + "optimize && avropt_use_nonzero_bits" + { + if (<CODE> == AND) + return "sbrc %1,%2\;clr %0"; + else if (<CODE> == IOR) + return "sbrs %1,%2\;ori %0,1"; + else if (<CODE> == XOR) + return "sbrs %1,%2\;eor %0,%4"; + else + gcc_unreachable (); + } + [(set_attr "length" "2")]) + +(define_insn_and_split "*nzb=1.ior.ashift_split" + [(set (match_operand:QI 0 "register_operand" "=d") + (ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (match_operand:QI 3 "register_operand" "0")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && nonzero_bits (operands[1], QImode) == 1" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + [(parallel [(set (match_dup 0) + (unspec:QI [(ior:QI (ashift:QI (match_dup 1) + (match_dup 2)) + (match_dup 3)) + ] UNSPEC_NZB)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn "*nzb=1.ior.ashift" + [(set (match_operand:QI 0 "register_operand" "=d") + (unspec:QI [(ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (match_operand:QI 3 "register_operand" "0")) + ] UNSPEC_NZB)) + (clobber (reg:CC REG_CC))] + "optimize && avropt_use_nonzero_bits" + "sbrc %1,0\;ori %0,1<<%2" + [(set_attr "length" "2")]) + ;; Work around PR115307: Early passes expand isinf/f/l to a bloat. ;; These passes do not consider costs, and there is no way to diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt index d22a118ad9e5..fcd2bf68f2a8 100644 --- a/gcc/config/avr/avr.opt +++ b/gcc/config/avr/avr.opt @@ -65,6 +65,10 @@ mpr118012 Target Var(avropt_pr118012) UInteger Init(1) Undocumented This option is on per default in order to work around PR118012. +muse-nonzero-bits +Target Var(avropt_use_nonzero_bits) UInteger Init(0) Optimization +Optimization. Allow to use nonzero_bits() in some insn conditions. + mshort-calls Target RejectNegative Mask(SHORT_CALLS) This option is used internally for multilib generation and selection. Assume RJMP / RCALL can target all program memory. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 515d91ac2e3a..81bfacfc35d4 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -912,8 +912,8 @@ Objective-C and Objective-C++ Dialects}. -mdouble=@var{bits} -mlong-double=@var{bits} -mno-call-main -mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts -mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack --mrodata-in-ram -msplit-bit-shift -msplit-ldst --mshort-calls -mskip-bug -nodevicelib -nodevicespecs +-mrodata-in-ram -msplit-bit-shift -msplit-ldst -mshort-calls +-mskip-bug -muse-nonzero-bits -nodevicelib -nodevicespecs -Waddr-space-convert -Wmisspelled-isr} @emph{Blackfin Options} (@ref{Blackfin Options}) @@ -24630,6 +24630,11 @@ a multiple of 8 is controlled by @option{-mfuse-move}. Split multi-byte loads and stores into several byte loads and stores. This optimization is turned on per default for @option{-O2} and higher. +@opindex muse-nonzero-bits +@item -muse-nonzero-bits +Enable some patterns for bit optimizations that depend on specific values. +This optimization is turned on per default for @option{-O2} and higher. + @end table @anchor{eind} diff --git a/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c new file mode 100644 index 000000000000..3752d4fe6956 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c @@ -0,0 +1,301 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdbool.h> + +#define BITNO_I 7 +#define BITNO_T 6 +#define BITNO_H 5 +#define BITNO_S 4 +#define BITNO_V 3 +#define BITNO_N 2 +#define BITNO_Z 1 +#define BITNO_C 0 + +#define I (1u << BITNO_I) +#define T (1u << BITNO_T) +#define H (1u << BITNO_H) +#define S (1u << BITNO_S) +#define V (1u << BITNO_V) +#define N (1u << BITNO_N) +#define Z (1u << BITNO_Z) +#define C (1u << BITNO_C) + +#define bit(a, x) ((bool) ((a) & (1u << (x)))) + +typedef union +{ + uint8_t val; + struct + { + bool c:1; + bool z:1; + bool n:1; + bool v:1; + bool s:1; + bool h:1; + bool t:1; + bool i:1; + }; +} sreg_t; + + +typedef struct +{ + sreg_t sreg; + uint8_t mask; + uint16_t result; +} flags_t; + +flags_t flags_sub (uint8_t d, uint8_t r) +{ + uint8_t res = d - r; + bool R7 = bit (res, 7); + + bool Rd7 = bit (d, 7); + bool Rd3 = bit (d, 3); + + bool R3 = bit (res, 3); + bool Rr7 = bit (r, 7); + bool Rr3 = bit (r, 3); + + sreg_t s = { 0 }; + + s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7); + s.n = R7; + s.z = res == 0; + s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7); + s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3); + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +flags_t flags_sbc (uint8_t d, uint8_t r, sreg_t sreg) +{ + uint8_t res = d - r - sreg.c; + bool R7 = bit (res, 7); + + bool Rd7 = bit (d, 7); + bool Rd3 = bit (d, 3); + + bool R3 = bit (res, 3); + bool Rr7 = bit (r, 7); + bool Rr3 = bit (r, 3); + + sreg_t s = { 0 }; + + s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7); + s.n = R7; + s.z = (res == 0) & sreg.z; + s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7); + s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3); + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +flags_t flags_neg (uint8_t d) +{ + uint8_t res = -d; + bool R7 = bit (res, 7); + bool R6 = bit (res, 6); + bool R5 = bit (res, 5); + bool R4 = bit (res, 4); + bool R3 = bit (res, 3); + bool R2 = bit (res, 2); + bool R1 = bit (res, 1); + bool R0 = bit (res, 0); + + bool Rd3 = bit (d, 3); + + sreg_t s = { 0 }; + + s.v = R7 & !R6 & !R5 & !R4 & !R3 & !R2 & !R1 & !R0; + s.n = R7; + s.z = res == 0; + s.c = R7 | R6 | R5 | R4 | R3 | R2 | R1 | R0; + s.h = R3 | Rd3; + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +flags_t flags_ror (uint8_t d, sreg_t sreg) +{ + uint8_t res = (d + 0x100 * sreg.c) >> 1; + + sreg_t s = { 0 }; + + s.c = bit (d, 0); + s.z = res == 0; + s.n = bit (res, 7); + s.v = s.n ^ s.c; + s.s = s.n ^ s.v; + + return (flags_t) { s, S | V | N | Z | C, res }; +} + +flags_t flags_add (uint8_t d, uint8_t r) +{ + uint8_t res = d + r; + bool R7 = bit (res, 7); + + bool Rd7 = bit (d, 7); + bool Rd3 = bit (d, 3); + + bool R3 = bit (res, 3); + bool Rr7 = bit (r, 7); + bool Rr3 = bit (r, 3); + + sreg_t s = { 0 }; + + s.v = (Rd7 & Rr7 & !R7) | (!Rd7 & !Rr7 & R7); + s.n = R7; + s.z = res == 0; + s.c = (Rd7 & Rr7) | (Rr7 & !R7) | (!R7 & Rd7); + s.h = (Rd3 & Rr3) | (Rr3 & !R3) | (!R3 & Rd3); + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +static inline +sreg_t sreg_sub (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "sub %[d],%[r]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d) + : [r] "r" (r)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_sbc (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "sbc %[d],%[r]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d) + : [r] "r" (r)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_neg (uint8_t d, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "neg %[d]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_ror (uint8_t d, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "ror %[d]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_add (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "add %[d],%[r]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d) + : [r] "r" (r)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +void test_sub (uint8_t d, uint8_t r, sreg_t sreg) +{ + sreg_t s0 = sreg_sub (d, r, sreg.val, d - r); + flags_t f = flags_sub (d, r); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_sbc (uint8_t d, uint8_t r, sreg_t sreg) +{ + sreg_t s0 = sreg_sbc (d, r, sreg.val, d - r - sreg.c); + flags_t f = flags_sbc (d, r, sreg); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_neg (uint8_t d, sreg_t sreg) +{ + sreg_t s0 = sreg_neg (d, sreg.val, -d); + flags_t f = flags_neg (d); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_add (uint8_t d, uint8_t r, sreg_t sreg) +{ + sreg_t s0 = sreg_add (d, r, sreg.val, d + r); + flags_t f = flags_add (d, r); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_ror (uint8_t d, sreg_t sreg) +{ + sreg_t s0 = sreg_ror (d, sreg.val, (d + 0x100 * sreg.c) >> 1); + flags_t f = flags_ror (d, sreg); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_sreg (void) +{ + uint8_t d = 0; + + do + { + uint8_t r = 0; + test_neg (d, (sreg_t) { 0x00 }); + test_neg (d, (sreg_t) { 0xff }); + + test_ror (d, (sreg_t) { 0 }); + test_ror (d, (sreg_t) { C }); + + do + { + test_add (d, r, (sreg_t) { 0x00 }); + test_add (d, r, (sreg_t) { 0xff }); + + test_sub (d, r, (sreg_t) { 0x00 }); + test_sub (d, r, (sreg_t) { 0xff }); + + test_sbc (d, r, (sreg_t) { 0 }); + test_sbc (d, r, (sreg_t) { C }); + test_sbc (d, r, (sreg_t) { Z }); + test_sbc (d, r, (sreg_t) { C | Z }); + } while (++r); + } while (++d); +} + +int main (void) +{ + test_sreg(); + return 0; +}