This patch splits multi-byte loads and stores into single-byte
ones provided:

-  New option -msplit-ldst is on (e.g. -O2 and higher), and
-  The memory is non-volatile, and
-  The address space is generic, and
-  The split addresses are natively supported by the hardware.

Passes without regressions.  Ok for trunk?

Johann

--

AVR: target/107957 - Split multi-byte loads and stores.

This patch splits multi-byte loads and stores into single-byte
ones provided:

-  New option -msplit-ldst is on (e.g. -O2 and higher), and
-  The memory is non-volatile, and
-  The address space is generic, and
-  The split addresses are natively supported by the hardware.

gcc/
        PR target/107957
        * config/avr/avr.opt (-msplit-ldst, avropt_split_ldst):
        New option and associated var.
        * common/config/avr/avr-common.cc (avr_option_optimization_table)
        [OPT_LEVELS_2_PLUS]: Turn on -msplit_ldst.
        * config/avr/avr-passes.cc (splittable_address_p)
        (avr_byte_maybe_mem, avr_split_ldst): New functions.
        * config/avr/avr-protos.h (avr_split_ldst): New proto.
        * config/avr/avr.md (define_split) [avropt_split_ldst]: Run
        avr_split_ldst().
diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc
index 7473429fa36..9059e7d2b48 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -39,6 +39,7 @@ static const struct default_options avr_option_optimization_table[] =
     { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
     { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
     { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 },
     // Stick to the "old" placement of the subreg lowering pass.
     { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Allow optimizer to introduce store data races. This used to be the
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 7be5ec25fbc..5ad20c46238 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -5462,6 +5462,112 @@ avr_split_fake_addressing_move (rtx_insn * /*insn*/, rtx *xop)
 }
 
 
+/* Given memory reference mem(ADDR), return true when it can be split into
+   single-byte moves, and all resulting addresses are natively supported.
+   ADDR is in addr-space generic.  */
+
+static bool
+splittable_address_p (rtx addr, int n_bytes)
+{
+  if (CONSTANT_ADDRESS_P (addr)
+      || GET_CODE (addr) == PRE_DEC
+      || GET_CODE (addr) == POST_INC)
+    return true;
+
+  if (! AVR_TINY)
+    {
+      rtx base = select<rtx>()
+	: REG_P (addr) ? addr
+	: GET_CODE (addr) == PLUS ? XEXP (addr, 0)
+	: NULL_RTX;
+
+      int off = select<int>()
+	: REG_P (addr) ? 0
+	: GET_CODE (addr) == PLUS ? (int) INTVAL (XEXP (addr, 1))
+	: -1;
+
+      return (base && REG_P (base)
+	      && (REGNO (base) == REG_Y || REGNO (base) == REG_Z)
+	      && IN_RANGE (off, 0, 64 - n_bytes));
+    }
+
+  return false;
+}
+
+
+/* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC
+   memory references.  */
+
+static rtx
+avr_byte_maybe_mem (rtx x, int n)
+{
+  rtx addr, b;
+  if (MEM_P (x)
+      && (GET_CODE (addr = XEXP (x, 0)) == POST_INC
+	  || GET_CODE (addr) == PRE_DEC))
+    b = gen_rtx_MEM (QImode, copy_rtx (addr));
+  else
+    b = avr_byte (x, n);
+
+  if (MEM_P (x))
+    gcc_assert (MEM_P (b));
+
+  return b;
+}
+
+
+/* Split multi-byte load / stores into 1-byte such insns
+   provided non-volatile, addr-space = generic, no reg-overlap
+   and the resulting addressings are all natively supported.
+   Returns true when the  XOP[0] = XOP[1]  insn has been split and
+   false, otherwise.  */
+
+bool
+avr_split_ldst (rtx *xop)
+{
+  rtx dest = xop[0];
+  rtx src = xop[1];
+  machine_mode mode = GET_MODE (dest);
+  int n_bytes = GET_MODE_SIZE (mode);
+  rtx mem, reg_or_0;
+
+  if (MEM_P (dest) && reg_or_0_operand (src, mode))
+    {
+      mem = dest;
+      reg_or_0 = src;
+    }
+  else if (register_operand (dest, mode) && MEM_P (src))
+    {
+      reg_or_0 = dest;
+      mem = src;
+    }
+  else
+    return false;
+
+  rtx addr = XEXP (mem, 0);
+
+  if (MEM_VOLATILE_P (mem)
+      || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
+      || ! IN_RANGE (n_bytes, 2, 4)
+      || ! splittable_address_p (addr, n_bytes)
+      || reg_overlap_mentioned_p (reg_or_0, addr))
+    return false;
+
+  const int step = GET_CODE (addr) == PRE_DEC ? -1 : 1;
+  const int istart = step > 0 ? 0 : n_bytes - 1;
+  const int iend = istart + step * n_bytes;
+
+  for (int i = istart; i != iend; i += step)
+    {
+      rtx di = avr_byte_maybe_mem (dest, i);
+      rtx si = avr_byte_maybe_mem (src, i);
+      emit_move_ccc (di, si);
+    }
+
+  return true;
+}
+
+
 
 // Functions  make_<pass-name> (gcc::context*)  where <pass-name> is
 // according to the pass declaration in avr-passes.def.  GCC's pass
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 4aa8554000b..5329d29702e 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -173,6 +173,7 @@ extern int n_avr_fuse_add_executed;
 extern bool avr_shift_is_3op ();
 extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
 extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
+extern bool avr_split_ldst (rtx xop[]);
 
 extern int avr_optimize_size_level ();
 
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index e343fb23d07..42f41891a90 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1000,15 +1000,24 @@ (define_split
                    (match_operand:MOVMODE 1 "general_operand"))
               (clobber (reg:CC REG_CC))])]
   "reload_completed
-   && avropt_fuse_add > 0
-   // Only split this for .split2 when we are before
-   // pass .avr-fuse-add (which runs after proep).
-   && ! epilogue_completed
    && (MEM_P (operands[0]) || MEM_P (operands[1]))"
   [(scratch)]
   {
-    if (avr_split_fake_addressing_move (curr_insn, operands))
+    if (avropt_fuse_add > 0
+        // Only split fake addressing for .split2 when we are before
+        // pass .avr-fuse-add (which runs after proep).
+        && ! epilogue_completed
+        && avr_split_fake_addressing_move (curr_insn, operands))
       DONE;
+
+    // Splitting multi-byte load / stores into 1-byte such insns
+    // provided non-volatile, addr-space = generic, no reg-overlap
+    // and the resulting addressings are natively supported.
+    if (avropt_split_ldst
+        && GET_MODE_SIZE (<MODE>mode) > 1
+        && avr_split_ldst (operands))
+      DONE;
+
     FAIL;
   })
 
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 7770c278d40..6c86d2bb42a 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -98,6 +98,10 @@ msplit-bit-shift
 Target Var(avropt_split_bit_shift) Init(0) Optimization
 Optimization. Split shifts of 4-byte values into a byte shift and a residual bit shift.
 
+msplit-ldst
+Target Var(avropt_split_ldst) Init(0) Optimization
+Optimization. Split most of the load and store instructions into byte load and stores.
+
 mstrict-X
 Target Var(avropt_strict_X) Init(0) Optimization
 Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register.  Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.

Reply via email to