https://gcc.gnu.org/g:873cffc79209119a65aa657b0d427345e52b75c3

commit r15-5569-g873cffc79209119a65aa657b0d427345e52b75c3
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Wed Nov 20 12:25:18 2024 +0100

    AVR: target/117726 - Better optimizations of ASHIFT:SI insns.
    
    This patch improves the 4-byte ASHIFT insns.
    1) It adds a "r,r,C15" alternative for improved long << 15.
    2) It adds 3-operand alternatives (depending on options) and
       splits them after peephole2 / before avr-fuse-move into
       a 3-operand byte shift and a 2-operand residual bit shift.
    For better control, it introduces new option -msplit-bit-shift
    that's activated at -O2 and higher per default.  2) is even
    performed with -Os, but not with -Oz.
    
            PR target/117726
    gcc/
            * config/avr/avr.opt (-msplit-bit-shift): Add new optimization 
option.
            * common/config/avr/avr-common.cc (avr_option_optimization_table)
            [OPT_LEVELS_2_PLUS]: Turn on -msplit-bit-shift.
            * config/avr/avr.h (machine_function.n_avr_fuse_add_executed):
            New bool component.
            * config/avr/avr.md (attr "isa") <2op, 3op>: Add new values.
            (attr "enabled"): Handle them.
            (ashlsi3, *ashlsi3, *ashlsi3_const): Add "r,r,C15" alternative.
            Add "r,0,C4l" and "r,r,C4l" alternatives (depending on 2op / 3op).
            (define_split) [avr_split_bit_shift]: Add 2 new ashift:ALL4 
splitters.
            (define_peephole2) [ashift:ALL4]: Add (match_dup 3) so that the 
scratch
            won't overlap with the output operand of the matched insn.
            (*ashl<mode>3_const_split): Remove unused ashift:ALL4 splitter.
            * config/avr/avr-passes.cc (emit_valid_insn)
            (emit_valid_move_clobbercc): Move out of anonymous namespace.
            (make_avr_pass_fuse_add) <gate>: Don't override.
            <execute>: Set n_avr_fuse_add_executed according to
            func->machine->n_avr_fuse_add_executed.
            (pass_data avr_pass_data_split_after_peephole2): New object.
            (avr_pass_split_after_peephole2): New rtl_opt_pass.
            (avr_emit_shift): New static function.
            (avr_shift_is_3op, avr_split_shift_p, avr_split_shift)
            (make_avr_pass_split_after_peephole2): New functions.
            * config/avr/avr-passes.def (avr_pass_split_after_peephole2):
            Insert new pass after pass_peephole2.
            * config/avr/avr-protos.h
            (n_avr_fuse_add_executed, avr_shift_is_3op, avr_split_shift_p)
            (avr_split_shift, avr_optimize_size_level)
            (make_avr_pass_split_after_peephole2): New prototypes.
            * config/avr/avr.cc (n_avr_fuse_add_executed): New global variable.
            (avr_optimize_size_level): New function.
            (avr_set_current_function): Set n_avr_fuse_add_executed
            according to cfun->machine->n_avr_fuse_add_executed.
            (ashlsi3_out) [case 15]: Output optimized code for this offset.
            (avr_rtx_costs_1) [ASHIFT, SImode]: Adjust costs of oggsets 15, 16.
            * config/avr/constraints.md (C4a, C4r, C4r): New constraints.
            * pass_manager.h (pass_manager): Adjust comments.

Diff:
---
 gcc/common/config/avr/avr-common.cc |   1 +
 gcc/config/avr/avr-passes.cc        | 214 +++++++++++++++++++++++++++++++-----
 gcc/config/avr/avr-passes.def       |   7 ++
 gcc/config/avr/avr-protos.h         |   8 ++
 gcc/config/avr/avr.cc               |  56 +++++++++-
 gcc/config/avr/avr.h                |   6 +
 gcc/config/avr/avr.md               | 204 +++++++++++++++++++++-------------
 gcc/config/avr/avr.opt              |   4 +
 gcc/config/avr/constraints.md       |  16 +++
 gcc/pass_manager.h                  |   2 +-
 10 files changed, 407 insertions(+), 111 deletions(-)

diff --git a/gcc/common/config/avr/avr-common.cc 
b/gcc/common/config/avr/avr-common.cc
index 333f950c80e3..54c99bd0b4af 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -39,6 +39,7 @@ static const struct default_options 
avr_option_optimization_table[] =
     { OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
     { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
     { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
+    { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
     // Stick to the "old" placement of the subreg lowering pass.
     { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Allow optimizer to introduce store data races. This used to be the
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 49473efbb0d3..b854f186a7ac 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -49,6 +49,34 @@
 
 #define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
 
+
+// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
+
+static rtx_insn *
+emit_valid_insn (rtx pat)
+{
+  rtx_insn *insn = emit_insn (pat);
+
+  if (! valid_insn_p (insn))  // Also runs recog().
+    fatal_insn ("emit unrecognizable insn", insn);
+
+  return insn;
+}
+
+// Emit a single_set with an optional scratch operand.  This function
+// asserts that the new insn is valid and recognized.
+
+static rtx_insn *
+emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
+{
+  rtx pat = scratch
+    ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
+    : gen_gen_move_clobbercc (dest, src);
+
+  return emit_valid_insn (pat);
+}
+
+
 namespace
 {
 
@@ -116,31 +144,6 @@ single_set_with_scratch (rtx_insn *insn, int 
&regno_scratch)
   return single_set (insn);
 }
 
-// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
-
-static rtx_insn *
-emit_valid_insn (rtx pat)
-{
-  rtx_insn *insn = emit_insn (pat);
-
-  if (! valid_insn_p (insn))  // Also runs recog().
-    fatal_insn ("emit unrecognizable insn", insn);
-
-  return insn;
-}
-
-// Emit a single_set with an optional scratch operand.  This function
-// asserts that the new insn is valid and recognized.
-
-static rtx_insn *
-emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
-{
-  rtx pat = scratch
-    ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
-    : gen_gen_move_clobbercc (dest, src);
-
-  return emit_valid_insn (pat);
-}
 
 // One bit for each GRP in REG_0 ... REG_31.
 using gprmask_t = uint32_t;
@@ -4213,12 +4216,17 @@ public:
     return make_avr_pass_fuse_add (m_ctxt);
   }
 
-  bool gate (function *) final override
+  unsigned int execute (function *func) final override
   {
-    return optimize && avr_fuse_add > 0;
+    func->machine->n_avr_fuse_add_executed += 1;
+    n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
+
+    if (optimize && avr_fuse_add > 0)
+      return execute1 (func);
+    return 0;
   }
 
-  unsigned int execute (function *) final override;
+  unsigned int execute1 (function *);
 
   struct Some_Insn
   {
@@ -4697,7 +4705,7 @@ avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn 
&add)
    as  PRE_DEC + PRE_DEC  for two adjacent locations.  */
 
 unsigned int
-avr_pass_fuse_add::execute (function *func)
+avr_pass_fuse_add::execute1 (function *func)
 {
   df_note_add_problem ();
   df_analyze ();
@@ -4769,6 +4777,146 @@ avr_pass_fuse_add::execute (function *func)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////
+// Split insns after peephole2 / befor avr-fuse-move.
+static const pass_data avr_pass_data_split_after_peephole2 =
+{
+  RTL_PASS,        // type
+  "",              // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,      // tv_id
+  0,               // properties_required
+  0,               // properties_provided
+  0,               // properties_destroyed
+  0,               // todo_flags_start
+  0                // todo_flags_finish
+};
+
+class avr_pass_split_after_peephole2 : public rtl_opt_pass
+{
+public:
+  avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *) final override
+  {
+    if (avr_shift_is_3op ())
+      split_all_insns ();
+    return 0;
+  }
+
+}; // avr_pass_split_after_peephole2
+
+} // anonymous namespace
+
+
+/* Whether some shift insn alternatives are a 3-operand insn or a
+   2-operand insn.  This 3op alternatives allow the source and the
+   destination register of the shift to be different right from the
+   start, because the splitter will split the 3op shift into a 3op byte
+   shift and a 2op residual bit shift.
+   (When the residual shift has an offset of one less than the bitsize,
+   then the residual shift is also a 3op insn.  */
+
+bool
+avr_shift_is_3op ()
+{
+  // Don't split for OPTIMIZE_SIZE_MAX (-Oz).
+  // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
+  // the size overhead (if exists at all) is marginal.
+
+  return (avr_split_bit_shift
+         && optimize > 0
+         && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
+}
+
+
+/* Implement constraints `C4a', `C4l' and `C4r'.
+   Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
+   LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift.  */
+
+bool
+avr_split_shift_p (int n_bytes, int offset, rtx_code)
+{
+  gcc_assert (n_bytes == 4);
+
+  return (avr_shift_is_3op ()
+         && offset % 8 != 0 && IN_RANGE (offset, 17, 30));
+}
+
+
+static void
+avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
+{
+  machine_mode mode = GET_MODE (dest);
+  rtx shift;
+
+  if (off == GET_MODE_BITSIZE (mode) - 1)
+    {
+      shift = gen_rtx_fmt_ee (code, mode, src, GEN_INT (off));
+    }
+  else
+    {
+      if (REGNO (dest) != REGNO (src))
+       emit_valid_move_clobbercc (dest, src);
+      shift = gen_rtx_fmt_ee (code, mode, dest, GEN_INT (off));
+    }
+
+  emit_valid_move_clobbercc (dest, shift, scratch);
+}
+
+
+/* Worker for define_split that run when -msplit-bit-shift is on.
+   Split a shift of code CODE into a 3op byte shift and a residual bit shift.
+   Return 'true' when a split has been performed and insns have been emitted.
+   Otherwise, return 'false'.  */
+
+bool
+avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
+{
+  scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
+  rtx dest = xop[0];
+  rtx src = xop[1];
+  int ioff = INTVAL (xop[2]);
+
+  gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
+
+  if (code == ASHIFT)
+    {
+      if (ioff >= 25)
+       {
+         rtx dst8 = avr_byte (dest, 3);
+         rtx src8 = avr_byte (src, 0);
+         avr_emit_shift (code, dst8, src8, ioff % 8, NULL_RTX);
+         emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
+         emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+         return true;
+       }
+      else if (ioff >= 17)
+       {
+         rtx dst16 = avr_word (dest, 2);
+         rtx src16 = avr_word (src, 0);
+         avr_emit_shift (code, dst16, src16, ioff % 16, scratch);
+         emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+         return true;
+       }
+      else
+       gcc_unreachable ();
+    }
+  else
+    gcc_unreachable ();
+
+  return false;
+}
+
+
+namespace
+{
+
 
 //////////////////////////////////////////////////////////////////////////////
 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
@@ -5125,3 +5273,11 @@ make_avr_pass_fuse_move (gcc::context *ctxt)
 {
   return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
 }
+
+// Split insns after peephole2 / befor avr-fuse-move.
+
+rtl_opt_pass *
+make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
+{
+  return new avr_pass_split_after_peephole2 (ctxt, 
"avr-split-after-peephole2");
+}
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index 857e6b521238..be8278370b01 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -104,3 +104,10 @@ INSERT_PASS_BEFORE (pass_split_after_reload, 1, 
avr_pass_ifelse);
    -  The RTL peepholer may optimize insns involving lower registers.  */
 
 INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_fuse_move);
+
+  /* Run an instance of post-reload split prior to avr-fuse-move.
+     Purpose is to split 3-operand shift insns into a 3-operand shift
+     with a byte offset, and a 2-operand residual shift after
+        RTL peepholes but prior to the avr-fuse-move pass.  */
+
+INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_split_after_peephole2);
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index d316e0182a23..c9bb5bc5139a 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -169,6 +169,13 @@ extern rtx cc_reg_rtx;
 extern rtx ccn_reg_rtx;
 extern rtx cczn_reg_rtx;
 
+extern int n_avr_fuse_add_executed;
+extern bool avr_shift_is_3op ();
+extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
+extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
+
+extern int avr_optimize_size_level ();
+
 #endif /* RTX_CODE */
 
 #ifdef REAL_VALUE_TYPE
@@ -188,6 +195,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context 
*);
 extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
 #ifdef RTX_CODE
 extern bool avr_casei_sequence_check_operands (rtx *xop);
 extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 508e2d147bff..d74b20e798e5 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -229,6 +229,12 @@ bool avr_need_clear_bss_p = false;
 bool avr_need_copy_data_p = false;
 bool avr_has_rodata_p = false;
 
+/* Counts how often pass avr-fuse-add has been executed.  Is is kept in
+   sync with cfun->machine->n_avr_fuse_add_executed and serves as an
+   insn condition for shift insn splitters.  */
+int n_avr_fuse_add_executed = 0;
+
+
 
 /* Transform UP into lowercase and write the result to LO.
    You must provide enough space for LO.  Return LO.  */
@@ -526,6 +532,14 @@ avr_option_override (void)
 }
 
 
+int avr_optimize_size_level ()
+{
+  return cfun && cfun->decl
+    ? opt_for_fn (cfun->decl, optimize_size)
+    : optimize_size;
+}
+
+
 /* Implement `INIT_EXPANDERS'.  */
 /* The function works like a singleton.  */
 
@@ -823,8 +837,12 @@ avr_set_current_function (tree decl)
   if (decl == NULL_TREE
       || current_function_decl == NULL_TREE
       || current_function_decl == error_mark_node
-      || ! cfun->machine
-      || cfun->machine->attributes_checked_p)
+      || ! cfun->machine)
+    return;
+
+  n_avr_fuse_add_executed = cfun->machine->n_avr_fuse_add_executed;
+
+  if (cfun->machine->attributes_checked_p)
     return;
 
   location_t loc = DECL_SOURCE_LOCATION (decl);
@@ -6590,7 +6608,7 @@ avr_out_cmp_ext (rtx xop[], rtx_code code, int *plen)
 
 
 /* Generate asm equivalent for various shifts.  This only handles cases
-   that are not already carefully hand-optimized in ?sh??i3_out.
+   that are not already carefully hand-optimized in ?sh<mode>3_out.
 
    OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
    OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
@@ -7042,6 +7060,7 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
     {
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
+      bool reg1_unused_after_p = reg_unused_after (insn, operands[1]);
 
       if (plen)
        *plen = 0;
@@ -7070,6 +7089,30 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
                           "mov %B0,%A1"  CR_TAB
                           "mov %C0,%B1"  CR_TAB
                           "mov %D0,%C1", operands, plen, 4);
+       case 15:
+         avr_asm_len (reg1_unused_after_p
+                      ? "lsr %C1"
+                      : "bst %C1,0", operands, plen, 1);
+         if (reg0 + 2 != reg1)
+           {
+             if (AVR_HAVE_MOVW)
+               avr_asm_len ("movw %C0,%A1", operands, plen, 1);
+             else
+               avr_asm_len ("mov %C0,%A1"  CR_TAB
+                            "mov %D0,%B1", operands, plen, 2);
+           }
+         return reg1_unused_after_p
+           ? avr_asm_len ("clr %A0"  CR_TAB
+                          "clr %B0"  CR_TAB
+                          "ror %D0"  CR_TAB
+                          "ror %C0"  CR_TAB
+                          "ror %B0", operands, plen, 5)
+           : avr_asm_len ("clr %A0"  CR_TAB
+                          "clr %B0"  CR_TAB
+                          "lsr %D0"  CR_TAB
+                          "ror %C0"  CR_TAB
+                          "ror %B0"  CR_TAB
+                          "bld %D0,7", operands, plen, 6);
        case 16:
          if (reg0 + 2 == reg1)
            return avr_asm_len ("clr %B0"  CR_TAB
@@ -12392,9 +12435,14 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
                break;
              case 1:
              case 8:
-             case 16:
                *total = COSTS_N_INSNS (4);
                break;
+             case 15:
+               *total = COSTS_N_INSNS (8 - AVR_HAVE_MOVW);
+               break;
+             case 16:
+               *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
+               break;
              case 31:
                *total = COSTS_N_INSNS (6);
                break;
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index df0462259db5..7d887a6579ce 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -610,6 +610,12 @@ struct GTY(()) machine_function
   /* 'true' if this function references .L__stack_usage like with
      __builtin_return_address.  */
   bool use_L__stack_usage;
+
+  /* Counts how many times the execute() method of the avr-fuse-add
+     has been invoked.  The count is even increased when the optimization
+     itself is not run.  This purpose of this variable is to provide
+     information about where in the pass sequence we are.  */
+  int n_avr_fuse_add_executed;
 };
 
 /* AVR does not round pushes, but the existence of this macro is
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 04d838ef8a72..8c1326fdf167 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -184,73 +184,75 @@
 ;; no_xmega: non-XMEGA core              xmega : XMEGA core
 ;; no_adiw:  ISA has no ADIW, SBIW       adiw  : ISA has ADIW, SBIW
 
+;; The following ISA attributes are actually not architecture specific,
+;; but depend on (optimization) options.  This is because the "enabled"
+;; attribut can't depend on more than one other attribute.  This means
+;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue
+;; scheme can be used (as opposed to a true cartesian product).
+
+;; 2op  : insn is a 2-operand insn       3op   : insn is a 3-operand insn
+
 (define_attr "isa"
   "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
    no_adiw,adiw,
+   2op,3op,
    standard"
   (const_string "standard"))
 
 (define_attr "enabled" ""
-  (cond [(eq_attr "isa" "standard")
-         (const_int 1)
+  (if_then_else
+   (ior (eq_attr "isa" "standard")
+
+        (and (eq_attr "isa" "mov")
+             (match_test "!AVR_HAVE_MOVW"))
 
-         (and (eq_attr "isa" "mov")
-              (match_test "!AVR_HAVE_MOVW"))
-         (const_int 1)
+        (and (eq_attr "isa" "movw")
+             (match_test "AVR_HAVE_MOVW"))
 
-         (and (eq_attr "isa" "movw")
-              (match_test "AVR_HAVE_MOVW"))
-         (const_int 1)
+        (and (eq_attr "isa" "rjmp")
+             (match_test "!AVR_HAVE_JMP_CALL"))
 
-         (and (eq_attr "isa" "rjmp")
-              (match_test "!AVR_HAVE_JMP_CALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "jmp")
+             (match_test "AVR_HAVE_JMP_CALL"))
 
-         (and (eq_attr "isa" "jmp")
-              (match_test "AVR_HAVE_JMP_CALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "ijmp")
+             (match_test "!AVR_HAVE_EIJMP_EICALL"))
 
-         (and (eq_attr "isa" "ijmp")
-              (match_test "!AVR_HAVE_EIJMP_EICALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "eijmp")
+             (match_test "AVR_HAVE_EIJMP_EICALL"))
 
-         (and (eq_attr "isa" "eijmp")
-              (match_test "AVR_HAVE_EIJMP_EICALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "lpm")
+             (match_test "!AVR_HAVE_LPMX"))
 
-         (and (eq_attr "isa" "lpm")
-              (match_test "!AVR_HAVE_LPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "lpmx")
+             (match_test "AVR_HAVE_LPMX"))
 
-         (and (eq_attr "isa" "lpmx")
-              (match_test "AVR_HAVE_LPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "elpm")
+             (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
 
-         (and (eq_attr "isa" "elpm")
-              (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "elpmx")
+             (match_test "AVR_HAVE_ELPMX"))
 
-         (and (eq_attr "isa" "elpmx")
-              (match_test "AVR_HAVE_ELPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "xmega")
+             (match_test "AVR_XMEGA"))
 
-         (and (eq_attr "isa" "xmega")
-              (match_test "AVR_XMEGA"))
-         (const_int 1)
+        (and (eq_attr "isa" "no_xmega")
+             (match_test "!AVR_XMEGA"))
 
-         (and (eq_attr "isa" "no_xmega")
-              (match_test "!AVR_XMEGA"))
-         (const_int 1)
+        (and (eq_attr "isa" "adiw")
+             (match_test "AVR_HAVE_ADIW"))
 
-         (and (eq_attr "isa" "adiw")
-              (match_test "AVR_HAVE_ADIW"))
-         (const_int 1)
+        (and (eq_attr "isa" "no_adiw")
+             (match_test "!AVR_HAVE_ADIW"))
 
-         (and (eq_attr "isa" "no_adiw")
-              (match_test "!AVR_HAVE_ADIW"))
-         (const_int 1)
+        (and (eq_attr "isa" "2op")
+             (match_test "!avr_shift_is_3op ()"))
 
-         ] (const_int 0)))
+        (and (eq_attr "isa" "3op")
+             (match_test "avr_shift_is_3op ()"))
+        )
+   (const_int 1)
+   (const_int 0)))
 
 
 ;; Define mode iterators
@@ -5257,28 +5259,31 @@
 ;; "ashlsq3"  "ashlusq3"
 ;; "ashlsa3"  "ashlusa3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r    
,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r    
,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O 
C31,K,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r      
  ,r  ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r      
  ,0  ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 
C31,C4l,C4l,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (ashift:ALL4 (match_dup 1)
                                 (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,2op,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r    
,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r    
,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O 
C31,K,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r      
  ,r  ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r      
  ,0  ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 
C31,C4l,C4l,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "8,0,4,5,8,10,12")
-   (set_attr "adjust_len" "ashlsi")])
+  [(set_attr "length" "12")
+   (set_attr "adjust_len" "ashlsi")
+   (set_attr "isa" "*,*,*,2op,3op,*,*")])
 
 ;; Optimize if a scratch register from LD_REGS happens to be available.
 
@@ -5380,12 +5385,72 @@
   [(set_attr "length" "0,2,2,4,10")
    (set_attr "adjust_len" "ashlhi")])
 
+
+;; Split shift into a byte shift and a residual bit shift (without scratch)
+(define_split
+  [(parallel [(set (match_operand:ALL4 0 "register_operand")
+                   (ashift:ALL4 (match_operand:ALL4 1 "register_operand")
+                                (match_operand:QI 2 "const_int_operand")))
+              (clobber (reg:CC REG_CC))])]
+  "avr_split_bit_shift
+   && n_avr_fuse_add_executed >= 1
+   && satisfies_constraint_C4l (operands[2])"
+  [(parallel [(set (match_dup 0)
+                   (ashift:ALL4 (match_dup 1)
+                                (match_dup 3)))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+                   (ashift:ALL4 (match_dup 0)
+                                (match_dup 4)))
+              (clobber (reg:CC REG_CC))])]
+  {
+    if (avr_split_shift (operands, NULL_RTX, ASHIFT))
+      DONE;
+    else if (REGNO (operands[0]) == REGNO (operands[1]))
+      FAIL;
+    int offset = INTVAL (operands[2]);
+    operands[3] = GEN_INT (offset & ~7);
+    operands[4] = GEN_INT (offset & 7);
+  })
+
+;; Split shift into a byte shift and a residual bit shift (with scratch)
+(define_split
+  [(parallel [(set (match_operand:ALL4 0 "register_operand")
+                   (ashift:ALL4 (match_operand:ALL4 1 "register_operand")
+                                (match_operand:QI 2 "const_int_operand")))
+              (clobber (match_operand:QI 3 "scratch_or_d_register_operand"))
+              (clobber (reg:CC REG_CC))])]
+  "avr_split_bit_shift
+   && n_avr_fuse_add_executed >= 1
+   && satisfies_constraint_C4l (operands[2])"
+  [(parallel [(set (match_dup 0)
+                   (ashift:ALL4 (match_dup 1)
+                                (match_dup 4)))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+                   (ashift:ALL4 (match_dup 0)
+                                (match_dup 5)))
+              (clobber (match_dup 3))
+              (clobber (reg:CC REG_CC))])]
+  {
+    if (avr_split_shift (operands, operands[3], ASHIFT))
+      DONE;
+    else if (REGNO (operands[0]) == REGNO (operands[1]))
+      FAIL;
+    int offset = INTVAL (operands[2]);
+    operands[4] = GEN_INT (offset & ~7);
+    operands[5] = GEN_INT (offset & 7);
+  })
+
+
 (define_peephole2
   [(match_scratch:QI 3 "d")
    (parallel [(set (match_operand:ALL4 0 "register_operand" "")
                    (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "")
                                 (match_operand:QI 2 "const_int_operand" "")))
-              (clobber (reg:CC REG_CC))])]
+              (clobber (reg:CC REG_CC))])
+   ;; $3 must not overlap with the output of the insn above.
+   (match_dup 3)]
   ""
   [(parallel [(set (match_dup 0)
                    (ashift:ALL4 (match_dup 1)
@@ -5393,35 +5458,20 @@
               (clobber (match_dup 3))
               (clobber (reg:CC REG_CC))])])
 
-;; "*ashlsi3_const"
-;; "*ashlsq3_const"  "*ashlusq3_const"
-;; "*ashlsa3_const"  "*ashlusa3_const"
-(define_insn_and_split "*ashl<mode>3_const_split"
-  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r    ,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r    ,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O C31,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X    
,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL4 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
 
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r    ,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r    ,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O C31,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X    ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r ,r        
,r  ,r  ,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0 ,r        
,0  ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"     "LP,O C15 
C31,C4l,C4l,n")))
+   (clobber (match_operand:QI 3 "scratch_or_d_register_operand" "=X ,X        
,&d ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "0,4,5,10")
-   (set_attr "adjust_len" "ashlsi")])
+  [(set_attr "length" "10")
+   (set_attr "adjust_len" "ashlsi")
+   (set_attr "isa" "*,*,2op,3op,*")])
 
 (define_expand "ashlpsi3"
   [(parallel [(set (match_operand:PSI 0 "register_operand"             "")
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 5f2e52ccfc79..1b7e967dfc8e 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -94,6 +94,10 @@ maccumulate-args
 Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization
 Optimization. Accumulate outgoing function arguments and acquire/release the 
needed stack space for outgoing function arguments in function 
prologue/epilogue.  Without this option, outgoing arguments are pushed before 
calling a function and popped afterwards.  This option can lead to reduced code 
size for functions that call many functions that get their arguments on the 
stack like, for example printf.
 
+msplit-bit-shift
+Target Var(avr_split_bit_shift) Init(0) Optimization
+Optimization. Split shifts of 4-byte values into a byte shift and a residual 
bit shift.
+
 mstrict-X
 Target Var(avr_strict_X) Init(0) Optimization
 Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just 
use pre-decrement, post-increment and indirect addressing with the X register.  
Without this option, the compiler may assume that there is an addressing mode 
X+const similar to Y+const and Z+const and emit instructions to emulate such an 
addressing mode for X.
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
index ac64009b3c03..a362f31e30b8 100644
--- a/gcc/config/avr/constraints.md
+++ b/gcc/config/avr/constraints.md
@@ -263,6 +263,22 @@
   (and (match_code "const_int,symbol_ref,const")
        (match_test "const_0mod256_operand (op, HImode)")))
 
+(define_constraint "C4a"
+  "A constant integer shift offset for a 4-byte ASHIFTRT that's opt to being 
split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (4, ival, ASHIFTRT)")))
+
+(define_constraint "C4r"
+  "A constant integer shift offset for a 4-byte LSHIFTRT that's opt to being 
split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (4, ival, LSHIFTRT)")))
+
+(define_constraint "C4l"
+  "A constant integer shift offset for a 4-byte ASHIFT that's opt to being 
split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (4, ival, ASHIFT)")))
+
+
 ;; CONST_FIXED is no element of 'n' so cook our own.
 ;; "i" or "s" would match but because the insn uses iterators that cover
 ;; INT_MODE, "i" or "s" is not always possible.
diff --git a/gcc/pass_manager.h b/gcc/pass_manager.h
index 294cdd0b1f7f..1a1c83fc6565 100644
--- a/gcc/pass_manager.h
+++ b/gcc/pass_manager.h
@@ -65,7 +65,7 @@ public:
   void execute_early_local_passes ();
   unsigned int execute_pass_mode_switching ();
 
-  /* Various passes are manually cloned by epiphany. */
+  /* Various passes are manually cloned by avr and epiphany. */
   opt_pass *get_pass_split_all_insns () const {
     return pass_split_all_insns_1;
   }

Reply via email to