https://gcc.gnu.org/g:94355acc2debe03eb3b0a85229e340675a1ff6bd

commit r15-8674-g94355acc2debe03eb3b0a85229e340675a1ff6bd
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Sat Mar 15 20:53:52 2025 +0100

    AVR: target/119421 Better optimize some bit operations.
    
    There are occasions where knowledge about nonzero bits makes some
    optimizations possible.  For example,
    
       Rd |= Rn << Off
    
    can be implemented as
    
       SBRC Rn, 0
       ORI  Rd, 1 << Off
    
    when Rn in { 0, 1 }, i.e. nonzero_bits (Rn) == 1.  This patch adds some
    patterns that exploit nonzero_bits() in some combiner patterns.
    As insn conditions are not supposed to contain nonzero_bits(), the patch
    splits such insns right after pass insn combine.
    
            PR target/119421
    gcc/
            * config/avr/avr.opt (-muse-nonzero-bits): New option.
            * config/avr/avr-protos.h (avr_nonzero_bits_lsr_operands_p): New.
            (make_avr_pass_split_nzb): New.
            * config/avr/avr.cc (avr_nonzero_bits_lsr_operands_p): New function.
            (avr_rtx_costs_1): Return costs for the new insns.
            * config/avr/avr.md (nzb): New insn attribute.
            (*nzb=1.<code>...): New insns to better support some bit
            operations for <code> in AND, IOR, XOR.
            * config/avr/avr-passes.def (avr_pass_split_nzb): Insert pass
            atfer combine.
            * config/avr/avr-passes.cc (avr_pass_data_split_nzb). New pass data.
            (avr_pass_split_nzb): New pass.
            (make_avr_pass_split_nzb): New function.
            * common/config/avr/avr-common.cc (avr_option_optimization_table):
            Enable -muse-nonzero-bits for -O2 and higher.
            * doc/invoke.texi (AVR Options): Document -muse-nonzero-bits.
    gcc/testsuite/
            * gcc.target/avr/torture/pr119421-sreg.c: New test.

Diff:
---
 gcc/common/config/avr/avr-common.cc                |   1 +
 gcc/config/avr/avr-passes.cc                       |  71 +++++
 gcc/config/avr/avr-passes.def                      |  12 +
 gcc/config/avr/avr-protos.h                        |   2 +
 gcc/config/avr/avr.cc                              | 106 ++++++++
 gcc/config/avr/avr.md                              | 215 +++++++++++++++
 gcc/config/avr/avr.opt                             |   4 +
 gcc/doc/invoke.texi                                |   9 +-
 .../gcc.target/avr/torture/pr119421-sreg.c         | 301 +++++++++++++++++++++
 9 files changed, 719 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/avr/avr-common.cc 
b/gcc/common/config/avr/avr-common.cc
index 06c6cc856d3c..203a96528186 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -42,6 +42,7 @@ static const struct default_options 
avr_option_optimization_table[] =
     { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
     { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_muse_nonzero_bits, NULL, 1 },
     // Stick to the "old" placement of the subreg lowering pass.
     { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Allow optimizer to introduce store data races. This used to be the
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 184619af6cb4..2c21e7be7aba 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -29,6 +29,7 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "diagnostic-core.h"
 #include "cfghooks.h"
 #include "cfganal.h"
 #include "df.h"
@@ -4846,6 +4847,70 @@ avr_pass_fuse_add::execute1 (function *func)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////
+// Split insns with nonzero_bits() after combine.
+
+static const pass_data avr_pass_data_split_nzb =
+{
+  RTL_PASS,        // type
+  "",              // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,      // tv_id
+  0,               // properties_required
+  0,               // properties_provided
+  0,               // properties_destroyed
+  0,               // todo_flags_start
+  0                // todo_flags_finish
+};
+
+class avr_pass_split_nzb : public rtl_opt_pass
+{
+public:
+  avr_pass_split_nzb (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_split_nzb, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *) final override
+  {
+    if (avropt_use_nonzero_bits)
+      split_nzb_insns ();
+    return 0;
+  }
+
+  void split_nzb_insns ();
+
+}; // avr_pass_split_nzb
+
+
+void
+avr_pass_split_nzb::split_nzb_insns ()
+{
+  rtx_insn *next;
+
+  for (rtx_insn *insn = get_insns (); insn; insn = next)
+    {
+      next = NEXT_INSN (insn);
+
+      if (INSN_P (insn)
+         && single_set (insn)
+         && get_attr_nzb (insn) == NZB_YES)
+       {
+         rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/);
+
+         // The nonzero_bits() insns *must* split.  If not: ICE.
+         if (last == insn)
+           {
+             debug_rtx (insn);
+             internal_error ("failed to split insn");
+           }
+       }
+    }
+}
+
+
 
 //////////////////////////////////////////////////////////////////////////////
 // Split shift insns after peephole2 / befor avr-fuse-move.
@@ -5645,6 +5710,12 @@ make_avr_pass_casesi (gcc::context *ctxt)
   return new avr_pass_casesi (ctxt, "avr-casesi");
 }
 
+rtl_opt_pass *
+make_avr_pass_split_nzb (gcc::context *ctxt)
+{
+  return new avr_pass_split_nzb (ctxt, "avr-split-nzb");
+}
+
 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
 
 rtl_opt_pass *
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index 091005e3b948..eb60a93eeeb0 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,18 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, 
avr_pass_recompute_notes);
 
 INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
 
+/* Some combine insns have nonzero_bits() in their condition, though insns
+   should not use such stuff in their condition.  Therefore, we split such
+   insn into something without nonzero_bits() in their condition right after
+   insn combine.
+
+   Since neither split_all_insns() nor split_all_insns_noflow() work at that
+   point (presumably since there are splits involving branches), we split
+   respective insns (and only such insns) by hand.  Respective insns are
+   tagged with insn attribute nzb = "yes" so that they are easy to spot.  */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_split_nzb);
+
 /* If-else decision trees generated for switch / case may produce sequences
    like
 
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 83137c7f6f63..ca30136797dd 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -136,6 +136,7 @@ extern bool reg_unused_after (rtx_insn *insn, rtx reg);
 extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
 extern bool test_hard_reg_class (enum reg_class rclass, rtx x);
 extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest);
+extern bool avr_nonzero_bits_lsr_operands_p (rtx_code, rtx *);
 
 extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand,
                                    int num_operands);
@@ -205,6 +206,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context 
*);
 extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
 #ifdef RTX_CODE
 extern bool avr_casei_sequence_check_operands (rtx *xop);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 0ce06a1e580a..d94df84037c6 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12706,6 +12706,50 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
        }
     }
 
+  // Insns with nonzero_bits() == 1 in the condition.
+  if (avropt_use_nonzero_bits
+      && mode == QImode
+      && (code == AND || code == IOR || code == XOR)
+      && REG_P (XEXP (x, 1)))
+    {
+      // "*nzb=1.<code>.lsr_split"
+      // "*nzb=1.<code>.lsr.not_split"
+      bool is_nzb = (GET_CODE (XEXP (x, 0)) == LSHIFTRT
+                    && (REG_P (XEXP (XEXP (x, 0), 0))
+                        || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+                    && const_0_to_7_operand (XEXP (XEXP (x, 0), 1), QImode));
+      // "*nzb=1.<code>.zerox_split"
+      // "*nzb=1.<code>.zerox.not_split"
+      is_nzb |= (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+                && (REG_P (XEXP (XEXP (x, 0), 0))
+                    || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+                && const1_operand (XEXP (XEXP (x, 0), 1), QImode)
+                && const_0_to_7_operand (XEXP (XEXP (x, 0), 2), QImode));
+      // "*nzb=1.<code>.ge0_split"
+      is_nzb |= (GET_CODE (XEXP (x, 0)) == GE
+                && REG_P (XEXP (XEXP (x, 0), 0))
+                && const0_operand (XEXP (XEXP (x, 0), 1), QImode));
+      if (is_nzb)
+       {
+         *total = COSTS_N_INSNS (code == XOR ? 3 : 2);
+         return true;
+       }
+    }
+
+  // Insn "*nzb=1.ior.ashift_split" with nonzero_bits() == 1 in the condition.
+  if (avropt_use_nonzero_bits
+      && mode == QImode
+      && code == IOR
+      && REG_P (XEXP (x, 1))
+      && GET_CODE (XEXP (x, 0)) == ASHIFT
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      *total = COSTS_N_INSNS (2);
+      return true;
+    }
+
+
   switch (code)
     {
     case CONST_INT:
@@ -13684,6 +13728,28 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
       return true;
 
+    case GE:
+      if (mode == QImode
+         && REG_P (XEXP (x, 0))
+         && XEXP (x, 1) == const0_rtx)
+       {
+         *total = COSTS_N_INSNS (3);
+         return true;
+       }
+      break;
+
+    case ZERO_EXTRACT:
+      if (mode == QImode
+         && REG_P (XEXP (x, 0))
+         && XEXP (x, 1) == const1_rtx
+         && CONST_INT_P (XEXP (x, 2)))
+       {
+         int bpos = INTVAL (XEXP (x, 2));
+         *total = COSTS_N_INSNS (bpos == 0 ? 1 : bpos == 1 ? 2 : 3);
+         return true;
+       }
+      break;
+
     case COMPARE:
       switch (GET_MODE (XEXP (x, 0)))
        {
@@ -15171,6 +15237,46 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx 
*op,
 }
 
 
+/* A helper for the insn condition of "*nzb=1.<code>.lsr[.not]_split"
+   where <code> is AND, IOR or XOR.  Return true when
+
+      OP[0] <code>= OP[1] >> OP[2]
+
+   can be performed by means of the code of "*nzb=1.<code>.zerox", i.e.
+
+      OP[0] <code>= OP[1].OP[2]
+
+   For example, when OP[0] is in { 0, 1 }, then  R24 &= R10.4
+   can be performed by means of  SBRS R10,4  $  CLR R24.
+   Notice that the constraint of OP[3] is "0".  */
+
+bool
+avr_nonzero_bits_lsr_operands_p (rtx_code code, rtx *op)
+{
+  if (reload_completed)
+    return false;
+
+  const auto offs = INTVAL (op[2]);
+  const auto op1_non0 = nonzero_bits (op[1], QImode);
+  const auto op3_non0 = nonzero_bits (op[3], QImode);
+
+  switch (code)
+    {
+    default:
+      gcc_unreachable ();
+
+    case IOR:
+    case XOR:
+      return op1_non0 >> offs == 1;
+
+    case AND:
+      return op3_non0 == 1;
+    }
+
+  return false;
+}
+
+
 /* Worker function for cpymemhi expander.
    XOP[0]  Destination as MEM:BLK
    XOP[1]  Source      "     "
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 06e31aa7d72d..1c4e44dcfe41 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -84,6 +84,7 @@
   [UNSPEC_STRLEN
    UNSPEC_CPYMEM
    UNSPEC_INDEX_JMP
+   UNSPEC_NZB
    UNSPEC_FMUL
    UNSPEC_FMULS
    UNSPEC_FMULSU
@@ -175,6 +176,10 @@
    no"
   (const_string "no"))
 
+(define_attr "nzb"
+  "yes, no"
+  (const_string "no"))
+
 ;; Flavours of instruction set architecture (ISA), used in enabled attribute
 
 ;; mov  : ISA has no MOVW                movw  : ISA has MOVW
@@ -10916,6 +10921,216 @@
     DONE;
   })
 
+;; Patterns for -muse-nonzero-bits use nonzero_bits() in their condition,
+;; which makes possible some more optimizations.
+;;    Since combine may add clobber of REG_CC, we must make sure that there are
+;; no other routes to synthesize such patterns.  We use an UNSPEC for that.
+;;    As insns are not supposed to use stuff like nonzero_bits() in their
+;; condition, we split the insns right after reload.  For CFG reasons we have
+;; to do the splits by hand in avr_pass_split_nzb.  All insns that must be
+;; split by that pass must have insn attribute "nzb" set to "yes".  Moreover,
+;; the insns to split must be single_sets and must not touch control flow.
+
+(define_code_attr nzb_constr_rdr [(and "r") (ior "d") (xor "r")])
+(define_code_attr nzb_use1_nnr   [(and "n") (ior "n") (xor "r")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand")
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && (<CODE> == IOR || <CODE> == XOR
+       || nonzero_bits (operands[3], QImode) == 1)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (match_dup 1)
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox"
+  [(set (match_operand:QI 0 "register_operand"                
"=<nzb_constr_rdr>")
+        (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand"     
"r")
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand" 
"n"))
+                  (unspec:QI [(match_operand:QI 3 "register_operand"          
"0")
+                              ] UNSPEC_NZB)))
+   (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+   (clobber (reg:CC REG_CC))]
+  "optimize && avropt_use_nonzero_bits"
+  {
+    if (<CODE> == AND)
+      return "sbrs %1,%2\;clr %0";
+    else if (<CODE> == IOR)
+      return "sbrc %1,%2\;ori %0,1";
+    else if (<CODE> == XOR)
+      return "sbrc %1,%2\;eor %0,%4";
+    else
+      gcc_unreachable ();
+  }
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (lshiftrt:QI (match_operand:QI 1 "register_operand")
+                               (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (match_dup 1)
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                        ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox.not_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (zero_extract:QI (xor:QI (match_operand:QI 1 
"register_operand")
+                                           (match_operand:QI 4 
"const_int_operand"))
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+   && (<CODE> == IOR
+       || nonzero_bits (operands[3], QImode) == 1)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  ; "*nzb=1.<code>.zerox.not"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr.not_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (lshiftrt:QI (xor:QI (match_operand:QI 1 "register_operand")
+                                       (match_operand:QI 4 
"const_int_operand"))
+                               (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+   && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  ; "*nzb=1.<code>.zerox.not"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.ge0_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (ge:QI (match_operand:QI 1 "register_operand")
+                         (const_int 0))
+                  (match_operand:QI 2 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && (<CODE> == IOR || <CODE> == XOR
+       || nonzero_bits (operands[2], QImode) == 1)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  ; "*nzb=1.<code>.zerox.not"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+                                              (const_int 1)
+                                              (const_int 7))
+                             (unspec:QI [(match_dup 2)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox.not"
+  [(set (match_operand:QI 0 "register_operand"                    
"=<nzb_constr_rdr>")
+        (bitop:QI (zero_extract:QI (not:QI (match_operand:QI 1 
"register_operand" "r"))
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand"  
   "n"))
+                  (unspec:QI [(match_operand:QI 3 "register_operand"           
   "0")
+                              ] UNSPEC_NZB)))
+   (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+   (clobber (reg:CC REG_CC))]
+  "optimize && avropt_use_nonzero_bits"
+  {
+    if (<CODE> == AND)
+      return "sbrc %1,%2\;clr %0";
+    else if (<CODE> == IOR)
+      return "sbrs %1,%2\;ori %0,1";
+    else if (<CODE> == XOR)
+      return "sbrs %1,%2\;eor %0,%4";
+    else
+      gcc_unreachable ();
+  }
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.ior.ashift_split"
+  [(set (match_operand:QI 0 "register_operand"                       "=d")
+        (ior:QI (ashift:QI (match_operand:QI 1 "register_operand"     "r")
+                           (match_operand:QI 2 "const_0_to_7_operand" "n"))
+                (match_operand:QI 3 "register_operand"                "0")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && nonzero_bits (operands[1], QImode) == 1"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (unspec:QI [(ior:QI (ashift:QI (match_dup 1)
+                                                  (match_dup 2))
+                                       (match_dup 3))
+                               ] UNSPEC_NZB))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.ior.ashift"
+  [(set (match_operand:QI 0 "register_operand"                                 
  "=d")
+        (unspec:QI [(ior:QI (ashift:QI (match_operand:QI 1 "register_operand"  
   "r")
+                                       (match_operand:QI 2 
"const_0_to_7_operand" "n"))
+                            (match_operand:QI 3 "register_operand"             
   "0"))
+                    ] UNSPEC_NZB))
+   (clobber (reg:CC REG_CC))]
+  "optimize && avropt_use_nonzero_bits"
+  "sbrc %1,0\;ori %0,1<<%2"
+  [(set_attr "length" "2")])
+
 
 ;; Work around PR115307: Early passes expand isinf/f/l to a bloat.
 ;; These passes do not consider costs, and there is no way to
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index d22a118ad9e5..fcd2bf68f2a8 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -65,6 +65,10 @@ mpr118012
 Target Var(avropt_pr118012) UInteger Init(1) Undocumented
 This option is on per default in order to work around PR118012.
 
+muse-nonzero-bits
+Target Var(avropt_use_nonzero_bits) UInteger Init(0) Optimization
+Optimization. Allow to use nonzero_bits() in some insn conditions.
+
 mshort-calls
 Target RejectNegative Mask(SHORT_CALLS)
 This option is used internally for multilib generation and selection.  Assume 
RJMP / RCALL can target all program memory.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 515d91ac2e3a..81bfacfc35d4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -912,8 +912,8 @@ Objective-C and Objective-C++ Dialects}.
 -mdouble=@var{bits}  -mlong-double=@var{bits}  -mno-call-main
 -mn_flash=@var{size}  -mfract-convert-truncate  -mno-interrupts
 -mmain-is-OS_task  -mrelax  -mrmw  -mstrict-X  -mtiny-stack
--mrodata-in-ram  -msplit-bit-shift  -msplit-ldst
--mshort-calls  -mskip-bug  -nodevicelib  -nodevicespecs
+-mrodata-in-ram  -msplit-bit-shift  -msplit-ldst  -mshort-calls
+-mskip-bug  -muse-nonzero-bits  -nodevicelib  -nodevicespecs
 -Waddr-space-convert  -Wmisspelled-isr}
 
 @emph{Blackfin Options} (@ref{Blackfin Options})
@@ -24630,6 +24630,11 @@ a multiple of 8 is controlled by @option{-mfuse-move}.
 Split multi-byte loads and stores into several byte loads and stores.
 This optimization is turned on per default for @option{-O2} and higher.
 
+@opindex muse-nonzero-bits
+@item -muse-nonzero-bits
+Enable some patterns for bit optimizations that depend on specific values.
+This optimization is turned on per default for @option{-O2} and higher.
+
 @end table
 
 @anchor{eind}
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c 
b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c
new file mode 100644
index 000000000000..3752d4fe6956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c
@@ -0,0 +1,301 @@
+/* { dg-do run } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define BITNO_I         7
+#define BITNO_T         6
+#define BITNO_H         5
+#define BITNO_S         4
+#define BITNO_V         3
+#define BITNO_N         2
+#define BITNO_Z         1
+#define BITNO_C         0
+
+#define I (1u << BITNO_I)
+#define T (1u << BITNO_T)
+#define H (1u << BITNO_H)
+#define S (1u << BITNO_S)
+#define V (1u << BITNO_V)
+#define N (1u << BITNO_N)
+#define Z (1u << BITNO_Z)
+#define C (1u << BITNO_C)
+
+#define bit(a, x) ((bool) ((a) & (1u << (x))))
+
+typedef union
+{
+  uint8_t val;
+  struct
+  {
+    bool c:1;
+    bool z:1;
+    bool n:1;
+    bool v:1;
+    bool s:1;
+    bool h:1;
+    bool t:1;
+    bool i:1;
+  };
+} sreg_t;
+
+
+typedef struct
+{
+  sreg_t sreg;
+  uint8_t mask;
+  uint16_t result;
+} flags_t;
+
+flags_t flags_sub (uint8_t d, uint8_t r)
+{
+  uint8_t res = d - r;
+  bool R7 = bit (res, 7);
+
+  bool Rd7 = bit (d, 7);
+  bool Rd3 = bit (d, 3);
+
+  bool R3 = bit (res, 3);
+  bool Rr7 = bit (r, 7);
+  bool Rr3 = bit (r, 3);
+
+  sreg_t s = { 0 };
+
+  s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7);
+  s.n = R7;
+  s.z = res == 0;
+  s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7);
+  s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3);
+  s.s = s.n ^ s.v;
+  
+  return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_sbc (uint8_t d, uint8_t r, sreg_t sreg)
+{
+  uint8_t res = d - r - sreg.c;
+  bool R7 = bit (res, 7);
+
+  bool Rd7 = bit (d, 7);
+  bool Rd3 = bit (d, 3);
+
+  bool R3 = bit (res, 3);
+  bool Rr7 = bit (r, 7);
+  bool Rr3 = bit (r, 3);
+
+  sreg_t s = { 0 };
+
+  s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7);
+  s.n = R7;
+  s.z = (res == 0) & sreg.z;
+  s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7);
+  s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3);
+  s.s = s.n ^ s.v;
+  
+  return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_neg (uint8_t d)
+{
+  uint8_t res = -d;
+  bool R7 = bit (res, 7);
+  bool R6 = bit (res, 6);
+  bool R5 = bit (res, 5);
+  bool R4 = bit (res, 4);
+  bool R3 = bit (res, 3);
+  bool R2 = bit (res, 2);
+  bool R1 = bit (res, 1);
+  bool R0 = bit (res, 0);
+
+  bool Rd3 = bit (d, 3);
+
+  sreg_t s = { 0 };
+
+  s.v = R7 & !R6  & !R5         & !R4  & !R3  & !R2  & !R1  & !R0; 
+  s.n = R7;
+  s.z = res == 0;
+  s.c = R7 | R6 | R5 | R4 | R3 | R2 | R1 | R0;
+  s.h = R3 | Rd3;
+  s.s = s.n ^ s.v;
+  
+  return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_ror (uint8_t d, sreg_t sreg)
+{
+  uint8_t res = (d + 0x100 * sreg.c) >> 1;
+
+  sreg_t s = { 0 };
+
+  s.c = bit (d, 0);
+  s.z = res == 0;
+  s.n = bit (res, 7);
+  s.v = s.n ^ s.c;
+  s.s = s.n ^ s.v;
+
+  return (flags_t) { s, S | V | N | Z | C, res };
+}
+
+flags_t flags_add (uint8_t d, uint8_t r)
+{
+  uint8_t res = d + r;
+  bool R7 = bit (res, 7);
+
+  bool Rd7 = bit (d, 7);
+  bool Rd3 = bit (d, 3);
+
+  bool R3 = bit (res, 3);
+  bool Rr7 = bit (r, 7);
+  bool Rr3 = bit (r, 3);
+
+  sreg_t s = { 0 };
+
+  s.v = (Rd7 & Rr7 & !R7) | (!Rd7 & !Rr7 & R7);
+  s.n = R7;
+  s.z = res == 0;
+  s.c = (Rd7 & Rr7) | (Rr7 & !R7) | (!R7 & Rd7);
+  s.h = (Rd3 & Rr3) | (Rr3 & !R3) | (!R3 & Rd3);
+  s.s = s.n ^ s.v;
+  
+  return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+static inline
+sreg_t sreg_sub (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+  __asm ("out __SREG__,%[sreg]"         "\n\t"
+        "sub %[d],%[r]"         "\n\t"
+        "in %[sreg],__SREG__"
+        : [sreg] "+r" (sreg), [d] "+r" (d)
+        : [r] "r" (r));
+  if (d != result)
+    exit (__LINE__);
+  return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_sbc (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+  __asm ("out __SREG__,%[sreg]"         "\n\t"
+        "sbc %[d],%[r]"         "\n\t"
+        "in %[sreg],__SREG__"
+        : [sreg] "+r" (sreg), [d] "+r" (d)
+        : [r] "r" (r));
+  if (d != result)
+    exit (__LINE__);
+  return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_neg (uint8_t d, uint8_t sreg, uint8_t result)
+{
+  __asm ("out __SREG__,%[sreg]"         "\n\t"
+        "neg %[d]"              "\n\t"
+        "in %[sreg],__SREG__"
+        : [sreg] "+r" (sreg), [d] "+r" (d));
+  if (d != result)
+    exit (__LINE__);
+  return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_ror (uint8_t d, uint8_t sreg, uint8_t result)
+{
+  __asm ("out __SREG__,%[sreg]"         "\n\t"
+        "ror %[d]"              "\n\t"
+        "in %[sreg],__SREG__"
+        : [sreg] "+r" (sreg), [d] "+r" (d));
+  if (d != result)
+    exit (__LINE__);
+  return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_add (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+  __asm ("out __SREG__,%[sreg]"         "\n\t"
+        "add %[d],%[r]"         "\n\t"
+        "in %[sreg],__SREG__"
+        : [sreg] "+r" (sreg), [d] "+r" (d)
+        : [r] "r" (r));
+  if (d != result)
+    exit (__LINE__);
+  return (sreg_t) sreg;
+}
+
+void test_sub (uint8_t d, uint8_t r, sreg_t sreg)
+{
+  sreg_t s0 = sreg_sub (d, r, sreg.val, d - r);
+  flags_t f = flags_sub (d, r);
+  if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+    exit (__LINE__);
+}
+
+void test_sbc (uint8_t d, uint8_t r, sreg_t sreg)
+{
+  sreg_t s0 = sreg_sbc (d, r, sreg.val, d - r - sreg.c);
+  flags_t f = flags_sbc (d, r, sreg);
+  if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+    exit (__LINE__);
+}
+
+void test_neg (uint8_t d, sreg_t sreg)
+{
+  sreg_t s0 = sreg_neg (d, sreg.val, -d);
+  flags_t f = flags_neg (d);
+  if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+    exit (__LINE__);
+}
+
+void test_add (uint8_t d, uint8_t r, sreg_t sreg)
+{
+  sreg_t s0 = sreg_add (d, r, sreg.val, d + r);
+  flags_t f = flags_add (d, r);
+  if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+    exit (__LINE__);
+}
+
+void test_ror (uint8_t d, sreg_t sreg)
+{
+  sreg_t s0 = sreg_ror (d, sreg.val, (d + 0x100 * sreg.c) >> 1);
+  flags_t f = flags_ror (d, sreg);
+  if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+    exit (__LINE__);
+}
+
+void test_sreg (void)
+{
+  uint8_t d = 0;
+
+  do
+    {
+      uint8_t r = 0;
+      test_neg (d, (sreg_t) { 0x00 });
+      test_neg (d, (sreg_t) { 0xff });
+
+      test_ror (d, (sreg_t) { 0 });
+      test_ror (d, (sreg_t) { C });
+
+      do
+       {
+         test_add (d, r, (sreg_t) { 0x00 });
+         test_add (d, r, (sreg_t) { 0xff });
+
+         test_sub (d, r, (sreg_t) { 0x00 });
+         test_sub (d, r, (sreg_t) { 0xff });
+
+         test_sbc (d, r, (sreg_t) { 0 });
+         test_sbc (d, r, (sreg_t) { C });
+         test_sbc (d, r, (sreg_t) { Z });
+         test_sbc (d, r, (sreg_t) { C | Z });
+       } while (++r);
+    } while (++d);
+}
+
+int main (void)
+{
+  test_sreg();
+  return 0;
+}

Reply via email to