On 14/02/12 17:30, Richard Earnshaw wrote:
> On 14/02/12 16:53, Andrew Stubbs wrote:
>> Hi all,
>>
>> I've discovered that GCC does not use ARM conditional execution for
>> 16-bit Thumb opcodes in many cases. It's fine for individual
>> instructions, but if-conversion of basic blocks with more than one
>> instruction fails.
>>
>> E.g.
>>
>> int
>> foo (int a, int b)
>> {
>>    if (a != b)
>>      {
>>        a = a << b;
>>        a = a >> 1;
>>      }
>>
>>    return a + b;
>> }
>>
>> The current compiler gives:
>>
>> foo:
>>          cmp     r0, r1
>>          beq     .L2
>>          lsls    r0, r0, r1
>>          asrs    r0, r0, #1
>> .L2:
>>          adds    r0, r0, r1
>>          bx      lr
>>
>> With my patch I get this:
>>
>> foo:
>>          cmp     r0, r1
>>          itt     ne
>>          lslne   r0, r0, r1
>>          asrne   r0, r0, #1
>>          adds    r0, r0, r1
>>          bx      lr
>>
>> The problem comes from the fact that the compiler prefers "lsls" over
>> "lsl" because the former is a 16-bit encoding, and the latter a 32-bit
>> encoding. There's actually a peephole optimization defined to make this
>> happen wherever the CC register is not live.
>>
>> This is fine in unconditional code, but the CC register clobber means
>> that it's only possible to convert it to conditional code if it is the
>> last instruction in the IT block, so if-conversion fails on the above
>> example.
>>
>> My patch introduces a new target hook "IFCVT_OVERRIDE_MODIFIED_TEST"
>> that allows the CC clobber to be ignored on such instructions, and uses
>> IFCVT_MODIFY_INSN to convert from "lsls" to "lsl<c>" where possible.
>>
>> I've also introduced a new instruction attribute "it_cc" to indicate
>> which instruction patterns are affected.
>>
>> OK for trunk, once stage 1 reopens?
>>
>> Andrew
>>
> 
> Bernds checked in a patch last year (or maybe even before that) to make
> the selection of flags clobbered insns run very late (certainly after
> condexec had run), so I'm not sure why you're not seeing this.
> 

Hm, you mentioned some peepholes.  Try removing them....

R.

> R.
> 
>>
>> ifcvt_modify_insn.patch
>>
>>
>> 2012-02-14  Andrew Stubbs  <a...@codesourcery.com>
>>
>>       gcc/
>>       * config/arm/arm-protos.h (arm_ifcvt_modify_insn): New prototype.
>>       (arm_ifcvt_override_modified_test): New prototype.
>>       * config/arm/arm.c (thumb_insn_suitable_for_ifcvt): New function.
>>       (arm_ifcvt_override_modified_test): New function.
>>       (arm_ifcvt_modify_insn): New function.
>>       * config/arm/arm.h (IFCVT_OVERRIDE_MODIFIED_TEST): New macro.
>>       (IFCVT_MODIFY_INSN): New macro.
>>       * config/arm/thumb2.md (it_cc): New attribute.
>>       (thumb2_alusi3_short): Set it_cc attribute.
>>       (thumb2_shiftsi3_short, thumb2_mov<mode>_shortim): Likewise.
>>       (thumb2_addsi_short, thumb2_subsi_short): Likewise.
>>       (thumb2_mulsi_short, thumb2_one_cmplsi2_short): Likewise.
>>       (thumb2_negsi2_short): Likewise.
>>       * doc/tm.texi: Regenerate.
>>       * doc/tm.texi.in (IFCVT_OVERRIDE_MODIFIED_TEST): Document.
>>       * ifcvt.c (cond_exec_process_insns): Add IFCVT_OVERRIDE_MODIFIED_TEST.
>>
>>       gcc/testsuite/
>>       * gcc.target/arm/thumb-ifcvt.c: New test case.
>>
>> ---
>>  gcc/config/arm/arm-protos.h                |    5 ++
>>  gcc/config/arm/arm.c                       |   67 
>> ++++++++++++++++++++++++++++
>>  gcc/config/arm/arm.h                       |    5 ++
>>  gcc/config/arm/thumb2.md                   |   11 +++++
>>  gcc/doc/tm.texi                            |   13 +++++
>>  gcc/doc/tm.texi.in                         |   13 +++++
>>  gcc/ifcvt.c                                |   14 +++++-
>>  gcc/testsuite/gcc.target/arm/thumb-ifcvt.c |   19 ++++++++
>>  8 files changed, 146 insertions(+), 1 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
>>
>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>> index 296550a..67396f0 100644
>> --- a/gcc/config/arm/arm-protos.h
>> +++ b/gcc/config/arm/arm-protos.h
>> @@ -244,4 +244,9 @@ extern const struct tune_params *current_tune;
>>  extern int vfp3_const_double_for_fract_bits (rtx);
>>  #endif /* RTX_CODE */
>>
>> +#ifdef BB_HEAD
>> +extern int arm_ifcvt_override_modified_test (rtx, rtx);
>> +extern rtx arm_ifcvt_modify_insn (ce_if_block_t *, rtx, rtx);
>> +#endif
>> +
>>  #endif /* ! GCC_ARM_PROTOS_H */
>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>> index 0bded8d..803e1c9 100644
>> --- a/gcc/config/arm/arm.c
>> +++ b/gcc/config/arm/arm.c
>> @@ -25139,5 +25139,72 @@ vfp3_const_double_for_fract_bits (rtx operand)
>>    return 0;
>>  }
>>
>> +/* Find the portion of INSN that is suitable for if-conversion.
>> +
>> +   Some Thumb mode 16-bit instructions have two variants: one that is
>> +   unconditional but clobbers the condition flags, and one that is
>> +   conditional (in an IT block) and does not clobber anything.
>> +
>> +   There are 32-bit variants that are unconditional but don't clobber
>> +   anything, so the peephole2 pass adds the clobber in order to use the
>> +   smaller instruction encoding.  Unfortunately this defeats the
>> +   if-conversion pass since CC must not be modified in an IT block.
>> +
>> +   The peephole can be reversed, and the instruction converted to
>> +   conditional execution without affecting the size optimization.
>> +
>> +   This function detects these instructions and returns the sub-expression
>> +   that contains the operation, without the clobber.  */
>> +
>> +static rtx
>> +thumb_insn_suitable_for_ifcvt (rtx pattern, rtx insn)
>> +{
>> +  if (GET_CODE (pattern) == PARALLEL
>> +      && XVECLEN (pattern, 0) == 2)
>> +    {
>> +      rtx op = XVECEXP (pattern, 0, 0);
>> +      rtx clobber = XVECEXP (pattern, 0, 1);
>> +
>> +      if (GET_CODE (clobber) == CLOBBER
>> +       && GET_CODE (XEXP (clobber, 0)) == REG
>> +       && REGNO (XEXP (clobber, 0)) == CC_REGNUM
>> +       && get_attr_it_cc (insn) == IT_CC_NOCLOBBER)
>> +     return op;
>> +    }
>> +
>> +  return NULL_RTX;
>> +}
>> +
>> +/* Prevent if-conversion thinking that certain Thumb1 instructions
>> +   clobber CC.  These instruction in fact do not when in an IT block.  */
>> +int
>> +arm_ifcvt_override_modified_test (rtx test, rtx insn)
>> +{
>> +  if (thumb_insn_suitable_for_ifcvt (PATTERN (insn), insn) != NULL_RTX)
>> +    return 0; /* Force *not* modified.  */
>> +
>> +  return -1;
>> +}
>> +
>> +/* Modify insns to enable conditional execution.
>> +
>> +   This function removes CC clobers that impede if-conversion.  See the
>> +   comments on thumb_insn_suitable_for_ifcvt.  */
>> +rtx
>> +arm_ifcvt_modify_insn (ce_if_block_t *ce, rtx pattern, rtx insn)
>> +{
>> +  rtx op, cond;
>> +
>> +  gcc_assert (GET_CODE (pattern) == COND_EXEC);
>> +
>> +  cond = XEXP (pattern, 0);
>> +  op = thumb_insn_suitable_for_ifcvt (XEXP (pattern, 1), insn);
>> +
>> +  if (op != NULL_RTX)
>> +    return gen_rtx_COND_EXEC (VOIDmode, cond, op);
>> +
>> +  return pattern;
>> +}
>> +
>>  #include "gt-arm.h"
>>
>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
>> index 5a78125..0b7f332 100644
>> --- a/gcc/config/arm/arm.h
>> +++ b/gcc/config/arm/arm.h
>> @@ -2220,4 +2220,9 @@ extern const char *host_detect_local_cpu (int argc, 
>> const char **argv);
>>
>>  #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
>>
>> +#define IFCVT_OVERRIDE_MODIFIED_TEST(TEST,INSN) \
>> +  arm_ifcvt_override_modified_test ((TEST), (INSN))
>> +#define IFCVT_MODIFY_INSN(CE,PATTERN,INSN) \
>> +  (PATTERN) = arm_ifcvt_modify_insn ((CE), (PATTERN), (INSN))
>> +
>>  #endif /* ! GCC_ARM_H */
>> diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
>> index 05585da..454c42f 100644
>> --- a/gcc/config/arm/thumb2.md
>> +++ b/gcc/config/arm/thumb2.md
>> @@ -24,6 +24,9 @@
>>  ;; changes made in armv5t as "thumb2".  These are considered part
>>  ;; the 16-bit Thumb-1 instruction set.
>>
>> +;; Does an insn clobber CC if it appears in an IT block.  */
>> +(define_attr "it_cc" "undef,clobber,noclobber" (const_string "undef"))
>> +
>>  (define_insn "*thumb2_incscc"
>>    [(set (match_operand:SI 0 "s_register_operand" "=r,r")
>>          (plus:SI (match_operator:SI 2 "arm_comparison_operator"
>> @@ -674,6 +677,7 @@
>>     && GET_CODE(operands[3]) != MINUS"
>>    "%I3%!\\t%0, %1, %2"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")]
>>  )
>>
>> @@ -708,6 +712,7 @@
>>         || REG_P(operands[2]))"
>>    "* return arm_output_shift(operands, 2);"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "shift" "1")
>>     (set_attr "length" "2")
>>     (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
>> @@ -736,6 +741,7 @@
>>    "TARGET_THUMB2 && reload_completed"
>>    "mov%!\t%0, %1"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")]
>>  )
>>
>> @@ -778,6 +784,7 @@
>>        return \"add%!\\t%0, %1, %2\";
>>    "
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")]
>>  )
>>
>> @@ -789,6 +796,7 @@
>>    "TARGET_THUMB2 && reload_completed"
>>    "sub%!\\t%0, %1, %2"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")]
>>  )
>>
>> @@ -905,6 +913,7 @@
>>    "TARGET_THUMB2 && optimize_size && reload_completed"
>>    "mul%!\\t%0, %2, %0"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")
>>     (set_attr "insn" "muls")])
>>
>> @@ -999,6 +1008,7 @@
>>    "TARGET_THUMB2 && reload_completed"
>>    "mvn%!\t%0, %1"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")]
>>  )
>>
>> @@ -1022,6 +1032,7 @@
>>    "TARGET_THUMB2 && reload_completed"
>>    "neg%!\t%0, %1"
>>    [(set_attr "predicable" "yes")
>> +   (set_attr "it_cc" "noclobber")
>>     (set_attr "length" "2")]
>>  )
>>
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index 6d41cee..7d09ddb 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -10879,6 +10879,19 @@ if-statements into conditions combined by 
>> @code{and} and @code{or} operations.
>>  being processed and about to be turned into a condition.
>>  @end defmac
>>
>> +@defmac IFCVT_OVERRIDE_MODIFIED_TEST (@var{test}, @var{insn})
>> +Used if the target has instructions that modify the condition
>> +register when executed unconditionally, but do not do so when executed
>> +conditionally, or vice-versa.  @var{TEST} contains the condition test, and
>> +@var{INSN} contains the original insn to be conditionalized.  The macro
>> +should return minus-one if there is to be no override, zero to disregard
>> +an apparent modified condition register, or any other value to indicate
>> +that the instruction does modify the condition register, even if it does
>> +not appear to.  Additionally, if conditionalizing the instruction does
>> +change its behaviour, @code{IFCVT_MODIFY_INSN} should make the appropriate
>> +adjustments to match.
>> +@end defmac
>> +
>>  @defmac IFCVT_MODIFY_INSN (@var{ce_info}, @var{pattern}, @var{insn})
>>  A C expression to modify the @var{PATTERN} of an @var{INSN} that is to
>>  be converted to conditional execution format.  @var{ce_info} points to
>> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
>> index 396f244..52d9e96 100644
>> --- a/gcc/doc/tm.texi.in
>> +++ b/gcc/doc/tm.texi.in
>> @@ -10759,6 +10759,19 @@ if-statements into conditions combined by 
>> @code{and} and @code{or} operations.
>>  being processed and about to be turned into a condition.
>>  @end defmac
>>
>> +@defmac IFCVT_OVERRIDE_MODIFIED_TEST (@var{test}, @var{insn})
>> +Used if the target has instructions that modify the condition
>> +register when executed unconditionally, but do not do so when executed
>> +conditionally, or vice-versa.  @var{TEST} contains the condition test, and
>> +@var{INSN} contains the original insn to be conditionalized.  The macro
>> +should return minus-one if there is to be no override, zero to disregard
>> +an apparent modified condition register, or any other value to indicate
>> +that the instruction does modify the condition register, even if it does
>> +not appear to.  Additionally, if conditionalizing the instruction does
>> +change its behaviour, @code{IFCVT_MODIFY_INSN} should make the appropriate
>> +adjustments to match.
>> +@end defmac
>> +
>>  @defmac IFCVT_MODIFY_INSN (@var{ce_info}, @var{pattern}, @var{insn})
>>  A C expression to modify the @var{PATTERN} of an @var{INSN} that is to
>>  be converted to conditional execution format.  @var{ce_info} points to
>> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
>> index ce60ce2..ff620da 100644
>> --- a/gcc/ifcvt.c
>> +++ b/gcc/ifcvt.c
>> @@ -310,6 +310,7 @@ cond_exec_process_insns (ce_if_block_t *ce_info 
>> ATTRIBUTE_UNUSED,
>>    rtx insn;
>>    rtx xtest;
>>    rtx pattern;
>> +  int override_modified = -1;
>>
>>    if (!start || !end)
>>      return FALSE;
>> @@ -338,7 +339,18 @@ cond_exec_process_insns (ce_if_block_t *ce_info 
>> ATTRIBUTE_UNUSED,
>>        if (must_be_last)
>>       return FALSE;
>>
>> -      if (modified_in_p (test, insn))
>> +      /* Some instructions modify CC flags when executed unconditionally
>> +      but not when executed conditionally.
>> +         Return values:
>> +         -1 = no override
>> +         0 = force false
>> +         other = force true.  */
>> +#ifdef IFCVT_OVERRIDE_MODIFIED_TEST
>> +      override_modified = IFCVT_OVERRIDE_MODIFIED_TEST (test, insn);
>> +#endif
>> +
>> +      if ((override_modified == -1 && modified_in_p (test, insn))
>> +       || override_modified)
>>       {
>>         if (!mod_ok)
>>           return FALSE;
>> diff --git a/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c 
>> b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
>> new file mode 100644
>> index 0000000..b03bbce
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
>> @@ -0,0 +1,19 @@
>> +/* Check that Thumb 16-bit shifts can be if-converted.  */
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target arm_thumb2_ok } */
>> +/* { dg-options "-O2" } */
>> +
>> +int
>> +foo (int a, int b)
>> +{
>> +  if (a != b)
>> +    {
>> +      a = a << b;
>> +      a = a >> 1;
>> +    }
>> +
>> +  return a + b;
>> +}
>> +
>> +/* { dg-final { scan-assembler "lslne" } } */
>> +/* { dg-final { scan-assembler "asrne" } } */
> 
> 
> 


Reply via email to