Hello,
>
> @@ -162,6 +175,7 @@ doloop_condition_get (rtx doloop_pat)
> return 0;
>
> if ((XEXP (condition, 0) == reg)
> + || (REGNO (XEXP (condition, 0)) == CC_REGNUM)
> || (GET_CODE (XEXP (condition, 0)) == PLUS
> && XEXP (XEXP (condition, 0), 0) == reg))
>
> You can't depend on CC_REGNUM in generic code. That's part of the
> private machine description for ARM. Other cores have different ways of
> representing condition codes.
>
> R.
Yes, thanks, I found that out when testing the patch on PowerPC.
Attached is a newer version of the patch which is currently
under testing.
Thanks,
Revital
(See attached file: patch_arm_doloop_5.txt)
Index: modulo-sched.c
===================================================================
--- modulo-sched.c (revision 168397)
+++ modulo-sched.c (working copy)
@@ -1021,7 +1021,8 @@ sms_schedule (void)
if (CALL_P (insn)
|| BARRIER_P (insn)
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
+ && !reg_mentioned_p (count_reg, insn))
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
Index: loop-doloop.c
===================================================================
--- loop-doloop.c (revision 168397)
+++ loop-doloop.c (working copy)
@@ -78,6 +78,8 @@ doloop_condition_get (rtx doloop_pat)
rtx inc_src;
rtx condition;
rtx pattern;
+ rtx cc_reg = NULL_RTX;
+ rtx reg_orig;
/* The canonical doloop pattern we expect has one of the following
forms:
@@ -96,7 +98,16 @@ doloop_condition_get (rtx doloop_pat)
2) (set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(label_ref (label))
- (pc))). */
+ (pc))).
+
+ Some targets (ARM) do the comparison before the branch, as in the
+ folloring form:
+
+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))) */
pattern = PATTERN (doloop_pat);
@@ -104,14 +115,42 @@ doloop_condition_get (rtx doloop_pat)
{
rtx cond;
rtx prev_insn = prev_nondebug_insn (doloop_pat);
+ rtx src_orig;
+ rtx cmp_orig;
- /* We expect the decrement to immediately precede the branch. */
+ /* In case the pattern is not PARALLEL we expect two forms
+ of doloop which are cases 2) and 3) above: in case 2) the
+ decrement is immediately precedes the branch. while in case
+ 3) the compre and decrement instructions immediately precede
+ the branch. */
if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
return 0;
cmp = pattern;
- inc = PATTERN (PREV_INSN (doloop_pat));
+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
+ {
+ /* The third case: the compre and decrement instructions
+ immediately precede the branch. */
+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
+ if (GET_CODE (cmp_orig) != SET)
+ return 0;
+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
+ return 0;
+ src_orig = XEXP (SET_SRC (cmp_orig), 0);
+ if (XEXP (SET_SRC (cmp_orig), 1) != const0_rtx
+ || GET_CODE (src_orig) != PLUS)
+ return 0;
+ reg_orig = XEXP (src_orig, 0);
+ if (XEXP (src_orig, 1) != GEN_INT (-1)
+ || !REG_P (reg_orig))
+ return 0;
+ cc_reg = SET_DEST (cmp_orig);
+
+ inc = XVECEXP (PATTERN (prev_insn), 0, 1);
+ }
+ else
+ inc = PATTERN (PREV_INSN (doloop_pat));
/* We expect the condition to be of the form (reg != 0) */
cond = XEXP (SET_SRC (cmp), 0);
if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
@@ -162,6 +201,9 @@ doloop_condition_get (rtx doloop_pat)
return 0;
if ((XEXP (condition, 0) == reg)
+ || ((cc_reg != NULL_RTX)
+ && (XEXP (condition, 0) == cc_reg)
+ && (reg_orig == reg))
|| (GET_CODE (XEXP (condition, 0)) == PLUS
&& XEXP (XEXP (condition, 0), 0) == reg))
{
@@ -181,7 +223,24 @@ doloop_condition_get (rtx doloop_pat)
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
- So we return that form instead.
+ For the third form we expect:
+
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc)))
+
+ which is equivalent to the following:
+
+ (parallel [(set (cc) (compare (reg, 1))
+ (set (reg) (plus (reg) (const_int -1)))
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))))])
+
+ So we return the second form instead for the two cases.
+
*/
condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
Index: config/arm/thumb2.md
===================================================================
--- config/arm/thumb2.md (revision 168424)
+++ config/arm/thumb2.md (working copy)
@@ -836,7 +836,7 @@
"operands[4] = GEN_INT (- INTVAL (operands[2]));"
)
-(define_insn "*thumb2_addsi3_compare0"
+(define_insn "thumb2_addsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r")
@@ -1118,3 +1118,53 @@
"
operands[2] = GEN_INT (32 - INTVAL (operands[2]));
")
+
+ ;; Define the subtract-one-and-jump insns so loop.c
+ ;; knows what to generate.
+ (define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
+ "TARGET_THUMB2"
+ "
+ {
+ /* Currently SMS relies on the do-loop pattern to recognize loops
+ where (1) the control part comprises of all insns defining and/or
+ using a certain 'count' register and (2) the loop count can be
+ adjusted by modifying this register prior to the loop.
+ ??? The possible introduction of a new block to initialize the
+ new IV can potentially effects branch optimizations. */
+ if (optimize > 0 && flag_modulo_sched)
+ {
+ rtx s0;
+ rtx bcomp;
+ rtx loc_ref;
+ rtx cc_reg;
+ rtx insn;
+ rtx cmp;
+
+ /* Only use this on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+
+ s0 = operands [0];
+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+ cmp = XVECEXP (PATTERN (insn), 0, 0);
+ cc_reg = SET_DEST (cmp);
+ bcomp = gen_rtx_NE(VOIDmode, cc_reg, const0_rtx);
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ loc_ref, pc_rtx)));
+
+ DONE;
+ }else
+ FAIL;
+ }")
+
+
+