>
>
> I don't believe REG_FRAME_RELATED_EXPR does the right thing for
> anything besides prologues. You need to emit REG_CFA_RESTORE
> for the pop inside an epilogue.
Richard, here is updated patch that uses REG_CFA_RESTORE instead of
REG_FRAME_RELATED_EXPR.
The patch is tested with check-gcc, check-gdb and bootstrap with no
regression.
Ok for trunk?
- Thanks and regards,
Sameera
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 37113f5..e71ead5 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -203,6 +203,7 @@ extern void thumb_reload_in_hi (rtx *);
extern void thumb_set_return_address (rtx, rtx);
extern const char *thumb1_output_casesi (rtx *);
extern const char *thumb2_output_casesi (rtx *);
+extern bool bad_reg_pair_for_thumb_ldrd_strd (rtx, rtx);
#endif
/* Defined in pe.c. */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 429b644..05c9368 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15706,6 +15706,151 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
REG_NOTES (par) = dwarf;
}
+bool
+bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2)
+{
+ return (GET_CODE (src1) != REG
+ || GET_CODE (src2) != REG
+ || (REGNO (src1) == PC_REGNUM)
+ || (REGNO (src1) == SP_REGNUM)
+ || (REGNO (src1) == REGNO (src2))
+ || (REGNO (src2) == PC_REGNUM)
+ || (REGNO (src2) == SP_REGNUM));
+}
+
+/* Generate and emit a pattern that will be recognized as LDRD pattern. If even
+ number of registers are being popped, multiple LDRD patterns are created for
+ all register pairs. If odd number of registers are popped, last register is
+ loaded by using LDR pattern. */
+static bool
+thumb2_emit_ldrd_pop (unsigned long saved_regs_mask, bool really_return)
+{
+ int num_regs = 0;
+ int i, j;
+ rtx par = NULL_RTX;
+ rtx dwarf = NULL_RTX;
+ rtx tmp, reg, tmp1;
+
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ gcc_assert (num_regs && num_regs <= 16);
+ gcc_assert (really_return || ((saved_regs_mask & (1 << PC_REGNUM)) == 0));
+
+ /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
+ to be popped. So, if num_regs is even, now it will become odd,
+ and we can generate pop with PC. If num_regs is odd, it will be
+ even now, and ldr with return can be generated for PC. */
+ if (really_return && (saved_regs_mask & (1 << PC_REGNUM)))
+ num_regs--;
+
+ /* Var j iterates over all the registers to gather all the registers in
+ saved_regs_mask. Var i gives index of saved registers in stack frame.
+ A PARALLEL RTX of register-pair is created here, so that pattern for
+ LDRD can be matched. As PC is always last register to be popped, and
+ we have already decremented num_regs if PC, we don't have to worry
+ about PC in this loop. */
+ for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
+ if (saved_regs_mask & (1 << j))
+ {
+ gcc_assert (j != SP_REGNUM);
+
+ /* Create RTX for memory load. */
+ reg = gen_rtx_REG (SImode, j);
+ tmp = gen_rtx_SET (SImode,
+ reg,
+ gen_frame_mem (SImode,
+ plus_constant (stack_pointer_rtx, 4 * i)));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+
+ if (i % 2 == 0)
+ {
+ /* When saved-register index (i) is even, the RTX to be emitted is
+ yet to be created. Hence create it first. The LDRD pattern we
+ are generating is :
+ [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
+ (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
+ where target registers need not be consecutive. */
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ dwarf = NULL_RTX;
+ }
+
+ /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
+ added as 0th element and if i is odd, reg_i is added as 1st element
+ of LDRD pattern shown above. */
+ XVECEXP (par, 0, (i % 2)) = tmp;
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ if ((i % 2) == 1)
+ {
+ /* When saved-register index (i) is odd, RTXs for both the registers
+ to be loaded are generated in above given LDRD pattern, and the
+ pattern can be emitted now. */
+ par = emit_insn (par);
+ REG_NOTES (par) = dwarf;
+ }
+
+ i++;
+ }
+
+ /* If the number of registers pushed is odd AND really_return is false OR
+ number of registers are even AND really_return is true, last register is
+ popped using LDR. It can be PC as well. Hence, adjust the stack first and
+ then LDR with post increment. */
+
+ /* Increment the stack pointer, based on there being
+ num_regs 4-byte registers to restore. */
+ tmp = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, 4 * i));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ emit_insn (tmp);
+
+ dwarf = NULL_RTX;
+
+ if (((num_regs % 2) == 1 && !really_return)
+ || ((num_regs % 2) == 0 && really_return))
+ {
+ /* Gen LDR with post increment here. */
+ for (; (saved_regs_mask & (1 << j)) == 0; j++);
+
+ tmp1 = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (tmp1, get_frame_alias_set ());
+
+ reg = gen_rtx_REG (SImode, j);
+ tmp = gen_rtx_SET (SImode, reg, tmp1);
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ if (really_return)
+ {
+ /* If really_return, j must be PC_REGNUM. */
+ gcc_assert (j == PC_REGNUM);
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ XVECEXP (par, 0, 0) = ret_rtx;
+ XVECEXP (par, 0, 1) = tmp;
+ par = emit_jump_insn (par);
+ }
+ else
+ {
+ par = emit_insn (tmp);
+ }
+ REG_NOTES (par) = dwarf;
+ }
+ else if ((num_regs % 2) == 1 && really_return)
+ {
+ /* There are 2 registers to be popped. So, generate the pattern
+ pop_multiple_with_stack_update_and_return to pop in PC. */
+ arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)),
+ really_return);
+ }
+
+ return really_return;
+}
+
/* Calculate the size of the return value that is passed in registers. */
static unsigned
arm_size_return_regs (void)
@@ -22557,7 +22702,14 @@ thumb2_expand_epilogue (bool is_sibling)
really_return = true;
}
- arm_emit_multi_reg_pop (saved_regs_mask, really_return);
+ if (!current_tune->prefer_ldrd_strd
+ || optimize_function_for_size_p (cfun))
+ arm_emit_multi_reg_pop (saved_regs_mask, really_return);
+ else
+ /* Generate LDRD pattern instead of POP pattern. */
+ really_return = thumb2_emit_ldrd_pop (saved_regs_mask,
+ really_return);
+
if (really_return == true)
return;
}
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index d8ce982..3c55699 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -207,6 +207,12 @@
(and (match_code "const_int")
(match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
+(define_constraint "Pz"
+ "@internal In Thumb-2 state a constant in the range -1020 to 1020"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1020
+ && ival % 4 == 0")))
+
(define_constraint "G"
"In ARM/Thumb-2 state a valid FPA immediate constant."
(and (match_code "const_double")
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 5db4a32..21d2815 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -21,6 +21,32 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
+(define_insn "*thumb2_ldrd_base"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+ (set (match_operand:SI 2 "register_operand" "=r")
+ (mem:SI (plus:SI (match_dup 1)
+ (const_int 4))))]
+ "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd
+ && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[2])))"
+ "ldrd%?\t%0, %2, [%1]"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb2_ldrd"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+ (match_operand:SI 2 "ldrd_immediate_operand" "Pz"))))
+ (set (match_operand:SI 3 "register_operand" "=r")
+ (mem:SI (plus:SI (match_dup 1)
+ (match_operand:SI 4 "const_int_operand" ""))))]
+ "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd
+ && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4]))
+ && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[3])))"
+ "ldrd%?\t%0, %3, [%1, %2]"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
(define_insn "*ldm4_ia"
[(match_parallel 0 "load_multiple_operation"
[(set (match_operand:SI 1 "arm_hard_register_operand" "")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 7e2203d..60ee008 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -209,6 +209,10 @@
(match_test "(GET_CODE (op) != CONST_INT
|| (INTVAL (op) < 4096 && INTVAL (op) > -4096))"))))
+(define_predicate "ldrd_immediate_operand"
+ (and (match_operand 0 "const_int_operand")
+ (match_test "(INTVAL (op) < 1020 && INTVAL (op) > -1020)")))
+
;; True for operators that can be combined with a shift in ARM state.
(define_special_predicate "shiftable_operator"
(and (match_code "plus,minus,ior,xor,and")