There are occasions where knowledge about nonzero bits makes some optimizations possible. For example,
Rd |= Rn << Off can be implemented as SBRC Rn, 0 ORI Rd, 1 << Off when Rn in { 0, 1 }, i.e. nonzero_bits (Rn) == 1. This patch adds some patterns that exploit nonzero_bits() in some combiner patterns. As insn conditions are not supposed to contain nonzero_bits(), the patch splits such insns right after pass insn combine. The idea to split the patterns right after combine is from Jeff. The patch passes without new regressions. Ok for trunk? Johann -- AVR: target/119421 Better optimize some bit operations. There are occasions where knowledge about nonzero bits makes some optimizations possible. For example, Rd |= Rn << Off can be implemented as SBRC Rn, 0 ORI Rd, 1 << Off when Rn in { 0, 1 }, i.e. nonzero_bits (Rn) == 1. This patch adds some patterns that exploit nonzero_bits() in some combiner patterns. As insn conditions are not supposed to contain nonzero_bits(), the patch splits such insns right after pass insn combine. PR target/119421 gcc/ * config/avr/avr.opt (-muse-nonzero-bits): New option. * config/avr/avr-protos.h (avr_nonzero_bits_lsr_operands_p): New. (make_avr_pass_split_nzb): New. * config/avr/avr.cc (avr_nonzero_bits_lsr_operands_p): New function. (avr_rtx_costs_1): Return costs for the new insns. * config/avr/avr.md (nzb): New insn attribute. (*nzb=1.<code>...): New insns to better support some bit operations for <code> in AND, IOR, XOR. * common/config/avr/avr-common.cc (avr_option_optimization_table): Enable -muse-nonzero-bits for -O2 and higher. * avr-passes.def (avr_pass_split_nzb): Insert pass atfer combine. * avr-passes.cc (avr_pass_data_split_nzb). New pass data. (avr_pass_split_nzb): New pass. (make_avr_pass_split_nzb): New function. * doc/invoke.texi (AVR Options): Document -muse-nonzero-bits. gcc/testsuite/ * gcc.target/avr/torture/pr119421-sreg.c: New test.
AVR: target/119421 Better optimize some bit operations. There are occasions where knowledge about nonzero bits makes some optimizations possible. For example, Rd |= Rn << Off can be implemented as SBRC Rn, 0 ORI Rd, 1 << Off when Rn in { 0, 1 }, i.e. nonzero_bits (Rn) == 1. This patch adds some patterns that exploit nonzero_bits() in some combiner patterns. As insn conditions are not supposed to contain nonzero_bits(), the patch splits such insns right after pass insn combine. PR target/119421 gcc/ * config/avr/avr.opt (-muse-nonzero-bits): New option. * config/avr/avr-protos.h (avr_nonzero_bits_lsr_operands_p): New. (make_avr_pass_split_nzb): New. * config/avr/avr.cc (avr_nonzero_bits_lsr_operands_p): New function. (avr_rtx_costs_1): Return costs for the new insns. * config/avr/avr.md (nzb): New insn attribute. (*nzb=1.<code>...): New insns to better support some bit operations for <code> in AND, IOR, XOR. * common/config/avr/avr-common.cc (avr_option_optimization_table): Enable -muse-nonzero-bits for -O2 and higher. * avr-passes.def (avr_pass_split_nzb): Insert pass atfer combine. * avr-passes.cc (avr_pass_data_split_nzb). New pass data. (avr_pass_split_nzb): New pass. (make_avr_pass_split_nzb): New function. * doc/invoke.texi (AVR Options): Document -muse-nonzero-bits. gcc/testsuite/ * gcc.target/avr/torture/pr119421-sreg.c: New test. diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc index 06c6cc856d3..203a9652818 100644 --- a/gcc/common/config/avr/avr-common.cc +++ b/gcc/common/config/avr/avr-common.cc @@ -42,6 +42,7 @@ static const struct default_options avr_option_optimization_table[] = { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 }, { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_muse_nonzero_bits, NULL, 1 }, // Stick to the "old" placement of the subreg lowering pass. { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 }, /* Allow optimizer to introduce store data races. This used to be the diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index e32c46738d8..91a0f839349 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -29,6 +29,7 @@ #include "target.h" #include "rtl.h" #include "tree.h" +#include "diagnostic-core.h" #include "cfghooks.h" #include "cfganal.h" #include "df.h" @@ -4836,6 +4837,70 @@ avr_pass_fuse_add::execute1 (function *func) } + +////////////////////////////////////////////////////////////////////////////// +// Split insns with nonzero_bits() after combine. + +static const pass_data avr_pass_data_split_nzb = +{ + RTL_PASS, // type + "", // name (will be patched) + OPTGROUP_NONE, // optinfo_flags + TV_DF_SCAN, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + 0 // todo_flags_finish +}; + +class avr_pass_split_nzb : public rtl_opt_pass +{ +public: + avr_pass_split_nzb (gcc::context *ctxt, const char *name) + : rtl_opt_pass (avr_pass_data_split_nzb, ctxt) + { + this->name = name; + } + + unsigned int execute (function *) final override + { + if (avropt_use_nonzero_bits) + split_nzb_insns (); + return 0; + } + + void split_nzb_insns (); + +}; // avr_pass_split_nzb + + +void +avr_pass_split_nzb::split_nzb_insns () +{ + rtx_insn *next; + + for (rtx_insn *insn = get_insns (); insn; insn = next) + { + next = NEXT_INSN (insn); + + if (INSN_P (insn) + && single_set (insn) + && get_attr_nzb (insn) == NZB_YES) + { + rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/); + + // The nonzero_bits() insns *must* split. If not: ICE. + if (last == insn) + { + debug_rtx (insn); + internal_error ("failed to split insn"); + } + } + } +} + + ////////////////////////////////////////////////////////////////////////////// // Split shift insns after peephole2 / befor avr-fuse-move. @@ -5635,6 +5700,12 @@ make_avr_pass_casesi (gcc::context *ctxt) return new avr_pass_casesi (ctxt, "avr-casesi"); } +rtl_opt_pass * +make_avr_pass_split_nzb (gcc::context *ctxt) +{ + return new avr_pass_split_nzb (ctxt, "avr-split-nzb"); +} + // Try to replace 2 cbranch insns with 1 comparison and 2 branches. rtl_opt_pass * diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index 091005e3b94..eb60a93eeeb 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -74,6 +74,18 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes); INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi); +/* Some combine insns have nonzero_bits() in their condition, though insns + should not use such stuff in their condition. Therefore, we split such + insn into something without nonzero_bits() in their condition right after + insn combine. + + Since neither split_all_insns() nor split_all_insns_noflow() work at that + point (presumably since there are splits involving branches), we split + respective insns (and only such insns) by hand. Respective insns are + tagged with insn attribute nzb = "yes" so that they are easy to spot. */ + +INSERT_PASS_AFTER (pass_combine, 1, avr_pass_split_nzb); + /* If-else decision trees generated for switch / case may produce sequences like diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 83137c7f6f6..ca30136797d 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -136,6 +136,7 @@ extern bool reg_unused_after (rtx_insn *insn, rtx reg); extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0); extern bool test_hard_reg_class (enum reg_class rclass, rtx x); extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest); +extern bool avr_nonzero_bits_lsr_operands_p (rtx_code, rtx *); extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand, int num_operands); @@ -205,6 +206,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context *); extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *); extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *); extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *); +extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *); extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *); #ifdef RTX_CODE extern bool avr_casei_sequence_check_operands (rtx *xop); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 71c03b42148..7fcd3180f7e 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -12683,6 +12683,50 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } } + // Insns with nonzero_bits() == 1 in the condition. + if (avropt_use_nonzero_bits + && mode == QImode + && (code == AND || code == IOR || code == XOR) + && REG_P (XEXP (x, 1))) + { + // "*nzb=1.<code>.lsr_split" + // "*nzb=1.<code>.lsr.not_split" + bool is_nzb = (GET_CODE (XEXP (x, 0)) == LSHIFTRT + && (REG_P (XEXP (XEXP (x, 0), 0)) + || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR) + && const_0_to_7_operand (XEXP (XEXP (x, 0), 1), QImode)); + // "*nzb=1.<code>.zerox_split" + // "*nzb=1.<code>.zerox.not_split" + is_nzb |= (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT + && (REG_P (XEXP (XEXP (x, 0), 0)) + || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR) + && const1_operand (XEXP (XEXP (x, 0), 1), QImode) + && const_0_to_7_operand (XEXP (XEXP (x, 0), 2), QImode)); + // "*nzb=1.<code>.ge0_split" + is_nzb |= (GET_CODE (XEXP (x, 0)) == GE + && REG_P (XEXP (XEXP (x, 0), 0)) + && const0_operand (XEXP (XEXP (x, 0), 1), QImode)); + if (is_nzb) + { + *total = COSTS_N_INSNS (code == XOR ? 3 : 2); + return true; + } + } + + // Insn "*nzb=1.ior.ashift_split" with nonzero_bits() == 1 in the condition. + if (avropt_use_nonzero_bits + && mode == QImode + && code == IOR + && REG_P (XEXP (x, 1)) + && GET_CODE (XEXP (x, 0)) == ASHIFT + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + *total = COSTS_N_INSNS (2); + return true; + } + + switch (code) { case CONST_INT: @@ -13661,6 +13705,28 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); return true; + case GE: + if (mode == QImode + && REG_P (XEXP (x, 0)) + && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (3); + return true; + } + break; + + case ZERO_EXTRACT: + if (mode == QImode + && REG_P (XEXP (x, 0)) + && XEXP (x, 1) == const1_rtx + && CONST_INT_P (XEXP (x, 2))) + { + int bpos = INTVAL (XEXP (x, 2)); + *total = COSTS_N_INSNS (bpos == 0 ? 1 : bpos == 1 ? 2 : 3); + return true; + } + break; + case COMPARE: switch (GET_MODE (XEXP (x, 0))) { @@ -15148,6 +15214,46 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, } +/* A helper for the insn condition of "*nzb=1.<code>.lsr[.not]_split" + where <code> is AND, IOR or XOR. Return true when + + OP[0] <code>= OP[1] >> OP[2] + + can be performed by means of the code of "*nzb=1.<code>.zerox", i.e. + + OP[0] <code>= OP[1].OP[2] + + For example, when OP[0] is in { 0, 1 }, then R24 &= R10.4 + can be performed by means of SBRS R10,4 $ CLR R24. + Notice that the constraint of OP[3] is "0". */ + +bool +avr_nonzero_bits_lsr_operands_p (rtx_code code, rtx *op) +{ + if (reload_completed) + return false; + + const auto offs = INTVAL (op[2]); + const auto op1_non0 = nonzero_bits (op[1], QImode); + const auto op3_non0 = nonzero_bits (op[3], QImode); + + switch (code) + { + default: + gcc_unreachable (); + + case IOR: + case XOR: + return op1_non0 >> offs == 1; + + case AND: + return op3_non0 == 1; + } + + return false; +} + + /* Worker function for cpymemhi expander. XOP[0] Destination as MEM:BLK XOP[1] Source " " diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 06e31aa7d72..1c4e44dcfe4 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -84,6 +84,7 @@ (define_c_enum "unspec" [UNSPEC_STRLEN UNSPEC_CPYMEM UNSPEC_INDEX_JMP + UNSPEC_NZB UNSPEC_FMUL UNSPEC_FMULS UNSPEC_FMULSU @@ -175,6 +176,10 @@ (define_attr "adjust_len" no" (const_string "no")) +(define_attr "nzb" + "yes, no" + (const_string "no")) + ;; Flavours of instruction set architecture (ISA), used in enabled attribute ;; mov : ISA has no MOVW movw : ISA has MOVW @@ -10916,6 +10921,216 @@ (define_insn_and_split "*map.and1-to-skip.<QISI:mode>" DONE; }) +;; Patterns for -muse-nonzero-bits use nonzero_bits() in their condition, +;; which makes possible some more optimizations. +;; Since combine may add clobber of REG_CC, we must make sure that there are +;; no other routes to synthesize such patterns. We use an UNSPEC for that. +;; As insns are not supposed to use stuff like nonzero_bits() in their +;; condition, we split the insns right after reload. For CFG reasons we have +;; to do the splits by hand in avr_pass_split_nzb. All insns that must be +;; split by that pass must have insn attribute "nzb" set to "yes". Moreover, +;; the insns to split must be single_sets and must not touch control flow. + +(define_code_attr nzb_constr_rdr [(and "r") (ior "d") (xor "r")]) +(define_code_attr nzb_use1_nnr [(and "n") (ior "n") (xor "r")]) + +(define_insn_and_split "*nzb=1.<code>.zerox_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && (<CODE> == IOR || <CODE> == XOR + || nonzero_bits (operands[3], QImode) == 1)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (match_dup 1) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn "*nzb=1.<code>.zerox" + [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>") + (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (unspec:QI [(match_operand:QI 3 "register_operand" "0") + ] UNSPEC_NZB))) + (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>")) + (clobber (reg:CC REG_CC))] + "optimize && avropt_use_nonzero_bits" + { + if (<CODE> == AND) + return "sbrs %1,%2\;clr %0"; + else if (<CODE> == IOR) + return "sbrc %1,%2\;ori %0,1"; + else if (<CODE> == XOR) + return "sbrc %1,%2\;eor %0,%4"; + else + gcc_unreachable (); + } + [(set_attr "length" "2")]) + +(define_insn_and_split "*nzb=1.<code>.lsr_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (lshiftrt:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (match_dup 1) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn_and_split "*nzb=1.<code>.zerox.not_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (zero_extract:QI (xor:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 4 "const_int_operand")) + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4])) + && (<CODE> == IOR + || nonzero_bits (operands[3], QImode) == 1)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + ; "*nzb=1.<code>.zerox.not" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (not:QI (match_dup 1)) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn_and_split "*nzb=1.<code>.lsr.not_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (lshiftrt:QI (xor:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 4 "const_int_operand")) + (match_operand:QI 2 "const_0_to_7_operand")) + (match_operand:QI 3 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4])) + && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + ; "*nzb=1.<code>.zerox.not" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (not:QI (match_dup 1)) + (const_int 1) + (match_dup 2)) + (unspec:QI [(match_dup 3) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn_and_split "*nzb=1.<code>.ge0_split" + [(set (match_operand:QI 0 "register_operand") + (bitop:QI (ge:QI (match_operand:QI 1 "register_operand") + (const_int 0)) + (match_operand:QI 2 "register_operand")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && (<CODE> == IOR || <CODE> == XOR + || nonzero_bits (operands[2], QImode) == 1)" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + ; "*nzb=1.<code>.zerox.not" + [(parallel [(set (match_dup 0) + (bitop:QI (zero_extract:QI (not:QI (match_dup 1)) + (const_int 1) + (const_int 7)) + (unspec:QI [(match_dup 2) + ] UNSPEC_NZB))) + (use (const_int 1)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn "*nzb=1.<code>.zerox.not" + [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>") + (bitop:QI (zero_extract:QI (not:QI (match_operand:QI 1 "register_operand" "r")) + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (unspec:QI [(match_operand:QI 3 "register_operand" "0") + ] UNSPEC_NZB))) + (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>")) + (clobber (reg:CC REG_CC))] + "optimize && avropt_use_nonzero_bits" + { + if (<CODE> == AND) + return "sbrc %1,%2\;clr %0"; + else if (<CODE> == IOR) + return "sbrs %1,%2\;ori %0,1"; + else if (<CODE> == XOR) + return "sbrs %1,%2\;eor %0,%4"; + else + gcc_unreachable (); + } + [(set_attr "length" "2")]) + +(define_insn_and_split "*nzb=1.ior.ashift_split" + [(set (match_operand:QI 0 "register_operand" "=d") + (ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (match_operand:QI 3 "register_operand" "0")))] + "optimize && avropt_use_nonzero_bits + && !reload_completed + && nonzero_bits (operands[1], QImode) == 1" + { gcc_unreachable (); } + "optimize && avropt_use_nonzero_bits + && !reload_completed" + [(parallel [(set (match_dup 0) + (unspec:QI [(ior:QI (ashift:QI (match_dup 1) + (match_dup 2)) + (match_dup 3)) + ] UNSPEC_NZB)) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "nzb" "yes")]) + +(define_insn "*nzb=1.ior.ashift" + [(set (match_operand:QI 0 "register_operand" "=d") + (unspec:QI [(ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_0_to_7_operand" "n")) + (match_operand:QI 3 "register_operand" "0")) + ] UNSPEC_NZB)) + (clobber (reg:CC REG_CC))] + "optimize && avropt_use_nonzero_bits" + "sbrc %1,0\;ori %0,1<<%2" + [(set_attr "length" "2")]) + ;; Work around PR115307: Early passes expand isinf/f/l to a bloat. ;; These passes do not consider costs, and there is no way to diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt index d22a118ad9e..fcd2bf68f2a 100644 --- a/gcc/config/avr/avr.opt +++ b/gcc/config/avr/avr.opt @@ -65,6 +65,10 @@ mpr118012 Target Var(avropt_pr118012) UInteger Init(1) Undocumented This option is on per default in order to work around PR118012. +muse-nonzero-bits +Target Var(avropt_use_nonzero_bits) UInteger Init(0) Optimization +Optimization. Allow to use nonzero_bits() in some insn conditions. + mshort-calls Target RejectNegative Mask(SHORT_CALLS) This option is used internally for multilib generation and selection. Assume RJMP / RCALL can target all program memory. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 6f0779b900c..2c2e831c531 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -911,8 +911,8 @@ Objective-C and Objective-C++ Dialects}. -mdouble=@var{bits} -mlong-double=@var{bits} -mno-call-main -mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts -mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack --mrodata-in-ram -msplit-bit-shift -msplit-ldst --mshort-calls -mskip-bug -nodevicelib -nodevicespecs +-mrodata-in-ram -msplit-bit-shift -msplit-ldst -mshort-calls +-mskip-bug -muse-nonzero-bits -nodevicelib -nodevicespecs -Waddr-space-convert -Wmisspelled-isr} @emph{Blackfin Options} (@ref{Blackfin Options}) @@ -24599,6 +24599,11 @@ a multiple of 8 is controlled by @option{-mfuse-move}. Split multi-byte loads and stores into several byte loads and stores. This optimization is turned on per default for @option{-O2} and higher. +@opindex muse-nonzero-bits +@item -muse-nonzero-bits +Enable some patterns for bit optimizations that depend on specific values. +This optimization is turned on per default for @option{-O2} and higher. + @end table @anchor{eind} diff --git a/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c new file mode 100644 index 00000000000..3752d4fe695 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c @@ -0,0 +1,301 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdbool.h> + +#define BITNO_I 7 +#define BITNO_T 6 +#define BITNO_H 5 +#define BITNO_S 4 +#define BITNO_V 3 +#define BITNO_N 2 +#define BITNO_Z 1 +#define BITNO_C 0 + +#define I (1u << BITNO_I) +#define T (1u << BITNO_T) +#define H (1u << BITNO_H) +#define S (1u << BITNO_S) +#define V (1u << BITNO_V) +#define N (1u << BITNO_N) +#define Z (1u << BITNO_Z) +#define C (1u << BITNO_C) + +#define bit(a, x) ((bool) ((a) & (1u << (x)))) + +typedef union +{ + uint8_t val; + struct + { + bool c:1; + bool z:1; + bool n:1; + bool v:1; + bool s:1; + bool h:1; + bool t:1; + bool i:1; + }; +} sreg_t; + + +typedef struct +{ + sreg_t sreg; + uint8_t mask; + uint16_t result; +} flags_t; + +flags_t flags_sub (uint8_t d, uint8_t r) +{ + uint8_t res = d - r; + bool R7 = bit (res, 7); + + bool Rd7 = bit (d, 7); + bool Rd3 = bit (d, 3); + + bool R3 = bit (res, 3); + bool Rr7 = bit (r, 7); + bool Rr3 = bit (r, 3); + + sreg_t s = { 0 }; + + s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7); + s.n = R7; + s.z = res == 0; + s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7); + s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3); + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +flags_t flags_sbc (uint8_t d, uint8_t r, sreg_t sreg) +{ + uint8_t res = d - r - sreg.c; + bool R7 = bit (res, 7); + + bool Rd7 = bit (d, 7); + bool Rd3 = bit (d, 3); + + bool R3 = bit (res, 3); + bool Rr7 = bit (r, 7); + bool Rr3 = bit (r, 3); + + sreg_t s = { 0 }; + + s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7); + s.n = R7; + s.z = (res == 0) & sreg.z; + s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7); + s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3); + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +flags_t flags_neg (uint8_t d) +{ + uint8_t res = -d; + bool R7 = bit (res, 7); + bool R6 = bit (res, 6); + bool R5 = bit (res, 5); + bool R4 = bit (res, 4); + bool R3 = bit (res, 3); + bool R2 = bit (res, 2); + bool R1 = bit (res, 1); + bool R0 = bit (res, 0); + + bool Rd3 = bit (d, 3); + + sreg_t s = { 0 }; + + s.v = R7 & !R6 & !R5 & !R4 & !R3 & !R2 & !R1 & !R0; + s.n = R7; + s.z = res == 0; + s.c = R7 | R6 | R5 | R4 | R3 | R2 | R1 | R0; + s.h = R3 | Rd3; + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +flags_t flags_ror (uint8_t d, sreg_t sreg) +{ + uint8_t res = (d + 0x100 * sreg.c) >> 1; + + sreg_t s = { 0 }; + + s.c = bit (d, 0); + s.z = res == 0; + s.n = bit (res, 7); + s.v = s.n ^ s.c; + s.s = s.n ^ s.v; + + return (flags_t) { s, S | V | N | Z | C, res }; +} + +flags_t flags_add (uint8_t d, uint8_t r) +{ + uint8_t res = d + r; + bool R7 = bit (res, 7); + + bool Rd7 = bit (d, 7); + bool Rd3 = bit (d, 3); + + bool R3 = bit (res, 3); + bool Rr7 = bit (r, 7); + bool Rr3 = bit (r, 3); + + sreg_t s = { 0 }; + + s.v = (Rd7 & Rr7 & !R7) | (!Rd7 & !Rr7 & R7); + s.n = R7; + s.z = res == 0; + s.c = (Rd7 & Rr7) | (Rr7 & !R7) | (!R7 & Rd7); + s.h = (Rd3 & Rr3) | (Rr3 & !R3) | (!R3 & Rd3); + s.s = s.n ^ s.v; + + return (flags_t) { s, H | S | V | N | Z | C, res }; +} + +static inline +sreg_t sreg_sub (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "sub %[d],%[r]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d) + : [r] "r" (r)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_sbc (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "sbc %[d],%[r]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d) + : [r] "r" (r)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_neg (uint8_t d, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "neg %[d]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_ror (uint8_t d, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "ror %[d]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +static inline +sreg_t sreg_add (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result) +{ + __asm ("out __SREG__,%[sreg]" "\n\t" + "add %[d],%[r]" "\n\t" + "in %[sreg],__SREG__" + : [sreg] "+r" (sreg), [d] "+r" (d) + : [r] "r" (r)); + if (d != result) + exit (__LINE__); + return (sreg_t) sreg; +} + +void test_sub (uint8_t d, uint8_t r, sreg_t sreg) +{ + sreg_t s0 = sreg_sub (d, r, sreg.val, d - r); + flags_t f = flags_sub (d, r); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_sbc (uint8_t d, uint8_t r, sreg_t sreg) +{ + sreg_t s0 = sreg_sbc (d, r, sreg.val, d - r - sreg.c); + flags_t f = flags_sbc (d, r, sreg); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_neg (uint8_t d, sreg_t sreg) +{ + sreg_t s0 = sreg_neg (d, sreg.val, -d); + flags_t f = flags_neg (d); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_add (uint8_t d, uint8_t r, sreg_t sreg) +{ + sreg_t s0 = sreg_add (d, r, sreg.val, d + r); + flags_t f = flags_add (d, r); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_ror (uint8_t d, sreg_t sreg) +{ + sreg_t s0 = sreg_ror (d, sreg.val, (d + 0x100 * sreg.c) >> 1); + flags_t f = flags_ror (d, sreg); + if ((f.sreg.val & f.mask) != (s0.val & f.mask)) + exit (__LINE__); +} + +void test_sreg (void) +{ + uint8_t d = 0; + + do + { + uint8_t r = 0; + test_neg (d, (sreg_t) { 0x00 }); + test_neg (d, (sreg_t) { 0xff }); + + test_ror (d, (sreg_t) { 0 }); + test_ror (d, (sreg_t) { C }); + + do + { + test_add (d, r, (sreg_t) { 0x00 }); + test_add (d, r, (sreg_t) { 0xff }); + + test_sub (d, r, (sreg_t) { 0x00 }); + test_sub (d, r, (sreg_t) { 0xff }); + + test_sbc (d, r, (sreg_t) { 0 }); + test_sbc (d, r, (sreg_t) { C }); + test_sbc (d, r, (sreg_t) { Z }); + test_sbc (d, r, (sreg_t) { C | Z }); + } while (++r); + } while (++d); +} + +int main (void) +{ + test_sreg(); + return 0; +}