Hi Ramana,
Please find attached reworked patch. The patch is tested with check-gcc,
check-gdb and bootstrap with no regression.
Ok?
- Thanks and regards,
Sameera D.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 05c9368..334a25f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15438,6 +15438,125 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
}
}
+/* Generate and emit a pattern that will be recognized as STRD pattern. If even
+ number of registers are being pushed, multiple STRD patterns are created for
+ all register pairs. If odd number of registers are pushed, emit a
+ combination of STRDs and STR for the prologue saves. */
+static void
+thumb2_emit_strd_push (unsigned long saved_regs_mask)
+{
+ int num_regs = 0;
+ int i, j;
+ rtx par = NULL_RTX;
+ rtx insn = NULL_RTX;
+ rtx dwarf = NULL_RTX;
+ rtx tmp, reg, tmp1;
+
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ gcc_assert (num_regs && num_regs <= 16);
+
+ /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
+ registers to push. */
+ tmp = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -4 * num_regs));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ insn = emit_insn (tmp);
+
+ /* Create sequence for DWARF info. */
+ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
+
+ /* RTLs cannot be shared, hence create new copy for dwarf. */
+ tmp1 = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -4 * num_regs));
+ RTX_FRAME_RELATED_P (tmp1) = 1;
+ XVECEXP (dwarf, 0, 0) = tmp1;
+
+ /* Var j iterates over all the registers to gather all the registers in
+ saved_regs_mask. Var i gives index of register R_j in stack frame.
+ A PARALLEL RTX of register-pair is created here, so that pattern for
+ STRD can be matched. If num_regs is odd, 1st register will be pushed
+ using STR and remaining registers will be pushed with STRD in pairs.
+ If num_regs is even, all registers are pushed with STRD in pairs.
+ Hence, skip first element for odd num_regs. */
+ for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
+ if (saved_regs_mask & (1 << j))
+ {
+ gcc_assert (j != SP_REGNUM);
+ gcc_assert (j != PC_REGNUM);
+
+ /* Create RTX for store. New RTX is created for dwarf as
+ they are not sharable. */
+ reg = gen_rtx_REG (SImode, j);
+ tmp = gen_rtx_SET (SImode,
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx, 4 * i)),
+ reg);
+
+ tmp1 = gen_rtx_SET (SImode,
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx, 4 * i)),
+ reg);
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ RTX_FRAME_RELATED_P (tmp1) = 1;
+
+ if (((i - (num_regs % 2)) % 2) == 1)
+ /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
+ be created. Hence create it first. The STRD pattern we are
+ generating is :
+ [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
+ (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
+ were target registers need not be consecutive. */
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+ /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
+ even, the reg_j is added as 0th element and if it is odd, reg_i is
+ added as 1st element of STRD pattern shown above. */
+ XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
+ XVECEXP (dwarf, 0, (i + 1)) = tmp1;
+
+ if (((i - (num_regs % 2)) % 2) == 0)
+ /* When (i - (num_regs % 2)) is even, RTXs for both the registers
+ to be loaded are generated in above given STRD pattern, and the
+ pattern can be emitted now. */
+ emit_insn (par);
+
+ i--;
+ }
+
+ if ((num_regs % 2) == 1)
+ {
+ /* If odd number of registers are pushed, generate STR pattern to store
+ lone register. */
+ for (; (saved_regs_mask & (1 << j)) == 0; j--);
+
+ tmp1 = gen_frame_mem (SImode, plus_constant (stack_pointer_rtx, 4 * i));
+ reg = gen_rtx_REG (SImode, j);
+ tmp = gen_rtx_SET (SImode, tmp1, reg);
+ RTX_FRAME_RELATED_P (tmp) = 1;
+
+ emit_insn (tmp);
+
+ tmp1 = gen_rtx_SET (SImode,
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx, 4 * i)),
+ reg);
+ RTX_FRAME_RELATED_P (tmp1) = 1;
+ XVECEXP (dwarf, 0, (i + 1)) = tmp1;
+ }
+
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ return;
+}
+
/* Generate and emit an insn that we will recognize as a push_multi.
Unfortunately, since this insn does not reflect very well the actual
semantics of the operation, we need to annotate the insn for the benefit
@@ -16598,8 +16717,18 @@ arm_expand_prologue (void)
saved_regs += frame;
}
}
- insn = emit_multi_reg_push (live_regs_mask);
- RTX_FRAME_RELATED_P (insn) = 1;
+
+ if (TARGET_THUMB2
+ && current_tune->prefer_ldrd_strd
+ && !optimize_function_for_size_p (cfun))
+ {
+ thumb2_emit_strd_push (live_regs_mask);
+ }
+ else
+ {
+ insn = emit_multi_reg_push (live_regs_mask);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
if (! IS_VOLATILE (func_type))
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 21d2815..e3dcd4f 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -47,6 +47,32 @@
[(set_attr "type" "load2")
(set_attr "predicable" "yes")])
+(define_insn "*thumb2_strd_base"
+ [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
+ (match_operand:SI 1 "register_operand" "r"))
+ (set (mem:SI (plus:SI (match_dup 0)
+ (const_int 4)))
+ (match_operand:SI 2 "register_operand" "r"))]
+ "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd
+ && (!bad_reg_pair_for_thumb_ldrd_strd (operands[1], operands[2])))"
+ "strd%?\t%1, %2, [%0]"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb2_strd"
+ [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+ (match_operand:SI 1 "ldrd_immediate_operand" "Pz")))
+ (match_operand:SI 2 "register_operand" "r"))
+ (set (mem:SI (plus:SI (match_dup 0)
+ (match_operand:SI 3 "const_int_operand" "")))
+ (match_operand:SI 4 "register_operand" "r"))]
+ "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd
+ && ((INTVAL (operands[1]) + 4) == INTVAL (operands[3]))
+ && (!bad_reg_pair_for_thumb_ldrd_strd (operands[2], operands[4])))"
+ "strd%?\t%2, %4, [%0, %1]"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
(define_insn "*ldm4_ia"
[(match_parallel 0 "load_multiple_operation"
[(set (match_operand:SI 1 "arm_hard_register_operand" "")