This patch splits multi-byte loads and stores into single-byte
ones provided:
- New option -msplit-ldst is on (e.g. -O2 and higher), and
- The memory is non-volatile, and
- The address space is generic, and
- The split addresses are natively supported by the hardware.
Passes without regressions. Ok for trunk?
Johann
--
AVR: target/107957 - Split multi-byte loads and stores.
This patch splits multi-byte loads and stores into single-byte
ones provided:
- New option -msplit-ldst is on (e.g. -O2 and higher), and
- The memory is non-volatile, and
- The address space is generic, and
- The split addresses are natively supported by the hardware.
gcc/
PR target/107957
* config/avr/avr.opt (-msplit-ldst, avropt_split_ldst):
New option and associated var.
* common/config/avr/avr-common.cc (avr_option_optimization_table)
[OPT_LEVELS_2_PLUS]: Turn on -msplit_ldst.
* config/avr/avr-passes.cc (splittable_address_p)
(avr_byte_maybe_mem, avr_split_ldst): New functions.
* config/avr/avr-protos.h (avr_split_ldst): New proto.
* config/avr/avr.md (define_split) [avropt_split_ldst]: Run
avr_split_ldst().
diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc
index 7473429fa36..9059e7d2b48 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -39,6 +39,7 @@ static const struct default_options avr_option_optimization_table[] =
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 },
// Stick to the "old" placement of the subreg lowering pass.
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
/* Allow optimizer to introduce store data races. This used to be the
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 7be5ec25fbc..5ad20c46238 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -5462,6 +5462,112 @@ avr_split_fake_addressing_move (rtx_insn * /*insn*/, rtx *xop)
}
+/* Given memory reference mem(ADDR), return true when it can be split into
+ single-byte moves, and all resulting addresses are natively supported.
+ ADDR is in addr-space generic. */
+
+static bool
+splittable_address_p (rtx addr, int n_bytes)
+{
+ if (CONSTANT_ADDRESS_P (addr)
+ || GET_CODE (addr) == PRE_DEC
+ || GET_CODE (addr) == POST_INC)
+ return true;
+
+ if (! AVR_TINY)
+ {
+ rtx base = select<rtx>()
+ : REG_P (addr) ? addr
+ : GET_CODE (addr) == PLUS ? XEXP (addr, 0)
+ : NULL_RTX;
+
+ int off = select<int>()
+ : REG_P (addr) ? 0
+ : GET_CODE (addr) == PLUS ? (int) INTVAL (XEXP (addr, 1))
+ : -1;
+
+ return (base && REG_P (base)
+ && (REGNO (base) == REG_Y || REGNO (base) == REG_Z)
+ && IN_RANGE (off, 0, 64 - n_bytes));
+ }
+
+ return false;
+}
+
+
+/* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC
+ memory references. */
+
+static rtx
+avr_byte_maybe_mem (rtx x, int n)
+{
+ rtx addr, b;
+ if (MEM_P (x)
+ && (GET_CODE (addr = XEXP (x, 0)) == POST_INC
+ || GET_CODE (addr) == PRE_DEC))
+ b = gen_rtx_MEM (QImode, copy_rtx (addr));
+ else
+ b = avr_byte (x, n);
+
+ if (MEM_P (x))
+ gcc_assert (MEM_P (b));
+
+ return b;
+}
+
+
+/* Split multi-byte load / stores into 1-byte such insns
+ provided non-volatile, addr-space = generic, no reg-overlap
+ and the resulting addressings are all natively supported.
+ Returns true when the XOP[0] = XOP[1] insn has been split and
+ false, otherwise. */
+
+bool
+avr_split_ldst (rtx *xop)
+{
+ rtx dest = xop[0];
+ rtx src = xop[1];
+ machine_mode mode = GET_MODE (dest);
+ int n_bytes = GET_MODE_SIZE (mode);
+ rtx mem, reg_or_0;
+
+ if (MEM_P (dest) && reg_or_0_operand (src, mode))
+ {
+ mem = dest;
+ reg_or_0 = src;
+ }
+ else if (register_operand (dest, mode) && MEM_P (src))
+ {
+ reg_or_0 = dest;
+ mem = src;
+ }
+ else
+ return false;
+
+ rtx addr = XEXP (mem, 0);
+
+ if (MEM_VOLATILE_P (mem)
+ || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
+ || ! IN_RANGE (n_bytes, 2, 4)
+ || ! splittable_address_p (addr, n_bytes)
+ || reg_overlap_mentioned_p (reg_or_0, addr))
+ return false;
+
+ const int step = GET_CODE (addr) == PRE_DEC ? -1 : 1;
+ const int istart = step > 0 ? 0 : n_bytes - 1;
+ const int iend = istart + step * n_bytes;
+
+ for (int i = istart; i != iend; i += step)
+ {
+ rtx di = avr_byte_maybe_mem (dest, i);
+ rtx si = avr_byte_maybe_mem (src, i);
+ emit_move_ccc (di, si);
+ }
+
+ return true;
+}
+
+
// Functions make_<pass-name> (gcc::context*) where <pass-name> is
// according to the pass declaration in avr-passes.def. GCC's pass
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 4aa8554000b..5329d29702e 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -173,6 +173,7 @@ extern int n_avr_fuse_add_executed;
extern bool avr_shift_is_3op ();
extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
+extern bool avr_split_ldst (rtx xop[]);
extern int avr_optimize_size_level ();
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index e343fb23d07..42f41891a90 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1000,15 +1000,24 @@ (define_split
(match_operand:MOVMODE 1 "general_operand"))
(clobber (reg:CC REG_CC))])]
"reload_completed
- && avropt_fuse_add > 0
- // Only split this for .split2 when we are before
- // pass .avr-fuse-add (which runs after proep).
- && ! epilogue_completed
&& (MEM_P (operands[0]) || MEM_P (operands[1]))"
[(scratch)]
{
- if (avr_split_fake_addressing_move (curr_insn, operands))
+ if (avropt_fuse_add > 0
+ // Only split fake addressing for .split2 when we are before
+ // pass .avr-fuse-add (which runs after proep).
+ && ! epilogue_completed
+ && avr_split_fake_addressing_move (curr_insn, operands))
DONE;
+
+ // Splitting multi-byte load / stores into 1-byte such insns
+ // provided non-volatile, addr-space = generic, no reg-overlap
+ // and the resulting addressings are natively supported.
+ if (avropt_split_ldst
+ && GET_MODE_SIZE (<MODE>mode) > 1
+ && avr_split_ldst (operands))
+ DONE;
+
FAIL;
})
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 7770c278d40..6c86d2bb42a 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -98,6 +98,10 @@ msplit-bit-shift
Target Var(avropt_split_bit_shift) Init(0) Optimization
Optimization. Split shifts of 4-byte values into a byte shift and a residual bit shift.
+msplit-ldst
+Target Var(avropt_split_ldst) Init(0) Optimization
+Optimization. Split most of the load and store instructions into byte load and stores.
+
mstrict-X
Target Var(avropt_strict_X) Init(0) Optimization
Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.