On 12/2/19 4:43 PM, Stam Markianos-Wright wrote:
> 
> 
> On 11/15/19 5:26 PM, Stam Markianos-Wright wrote:
>> Pinging with more correct maintainers this time :)
>>
>> Also would need to backport to gcc7,8,9, but need to get this approved 
>> first!
>>
>> Thank you,
>> Stam
>>
>>
>> -------- Forwarded Message --------
>> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional 
>> branches in Thumb2 (PR91816)
>> Date: Mon, 21 Oct 2019 10:37:09 +0100
>> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com>
>> To: Ramana Radhakrishnan <ramana....@googlemail.com>
>> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd 
>> <n...@arm.com>, James Greenhalgh <james.greenha...@arm.com>, Richard 
>> Earnshaw <richard.earns...@arm.com>
>>
>>
>>
>> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote:
>>>>
>>>> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf,
>>>> however, on my native Aarch32 setup the test times out when run as part
>>>> of a big "make check-gcc" regression, but not when run individually.
>>>>
>>>> 2019-10-11  Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>>>
>>>>     * config/arm/arm.md: Update b<cond> for Thumb2 range checks.
>>>>     * config/arm/arm.c: New function arm_gen_far_branch.
>>>>        * config/arm/arm-protos.h: New function arm_gen_far_branch
>>>>     prototype.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> 2019-10-11  Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>>>
>>>>        * testsuite/gcc.target/arm/pr91816.c: New test.
>>>
>>>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>>>> index f995974f9bb..1dce333d1c3 100644
>>>> --- a/gcc/config/arm/arm-protos.h
>>>> +++ b/gcc/config/arm/arm-protos.h
>>>> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const 
>>>> cpu_arch_option *,
>>>>   void arm_initialize_isa (sbitmap, const enum isa_feature *);
>>>> +const char * arm_gen_far_branch (rtx *, int,const char * , const 
>>>> char *);
>>>> +
>>>> +
>>>
>>> Lets get the nits out of the way.
>>>
>>> Unnecessary extra new line, need a space between int and const above.
>>>
>>>
>>
>> .Fixed!
>>
>>>>   #endif /* ! GCC_ARM_PROTOS_H */
>>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>>> index 39e1a1ef9a2..1a693d2ddca 100644
>>>> --- a/gcc/config/arm/arm.c
>>>> +++ b/gcc/config/arm/arm.c
>>>> @@ -32139,6 +32139,31 @@ arm_run_selftests (void)
>>>>   }
>>>>   } /* Namespace selftest.  */
>>>> +
>>>> +/* Generate code to enable conditional branches in functions over 1 
>>>> MiB.  */
>>>> +const char *
>>>> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
>>>> +            const char * branch_format)
>>>
>>> Not sure if this is some munging from the attachment but check
>>> vertical alignment of parameters.
>>>
>>
>> .Fixed!
>>
>>>> +{
>>>> +  rtx_code_label * tmp_label = gen_label_rtx ();
>>>> +  char label_buf[256];
>>>> +  char buffer[128];
>>>> +  ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
>>>> +            CODE_LABEL_NUMBER (tmp_label));
>>>> +  const char *label_ptr = arm_strip_name_encoding (label_buf);
>>>> +  rtx dest_label = operands[pos_label];
>>>> +  operands[pos_label] = tmp_label;
>>>> +
>>>> +  snprintf (buffer, sizeof (buffer), "%s%s", branch_format , 
>>>> label_ptr);
>>>> +  output_asm_insn (buffer, operands);
>>>> +
>>>> +  snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, 
>>>> label_ptr);
>>>> +  operands[pos_label] = dest_label;
>>>> +  output_asm_insn (buffer, operands);
>>>> +  return "";
>>>> +}
>>>> +
>>>> +
>>>
>>> Unnecessary extra newline.
>>>
>>
>> .Fixed!
>>
>>>>   #undef TARGET_RUN_TARGET_SELFTESTS
>>>>   #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
>>>>   #endif /* CHECKING_P */
>>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>>>> index f861c72ccfc..634fd0a59da 100644
>>>> --- a/gcc/config/arm/arm.md
>>>> +++ b/gcc/config/arm/arm.md
>>>> @@ -6686,9 +6686,16 @@
>>>>   ;; And for backward branches we have
>>>>   ;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or 
>>>> -4) + 4).
>>>>   ;;
>>>> +;; In 16-bit Thumb these ranges are:
>>>>   ;; For a 'b'       pos_range = 2046, neg_range = -2048 giving 
>>>> (-2040->2048).
>>>>   ;; For a 'b<cond>' pos_range = 254,  neg_range = -256  giving 
>>>> (-250 ->256).
>>>> +;; In 32-bit Thumb these ranges are:
>>>> +;; For a 'b'       +/- 16MB is not checked for.
>>>> +;; For a 'b<cond>' pos_range = 1048574,  neg_range = -1048576  giving
>>>> +;; (-1048568 -> 1048576).
>>>> +
>>>> +
>>>
>>> Unnecessary extra newline.
>>>
>>
>> .Fixed!
>>
>>>>   (define_expand "cbranchsi4"
>>>>     [(set (pc) (if_then_else
>>>>             (match_operator 0 "expandable_comparison_operator"
>>>> @@ -6947,22 +6954,42 @@
>>>>                 (pc)))]
>>>>     "TARGET_32BIT"
>>>>     "*
>>>> -  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>>> -    {
>>>> -      arm_ccfsm_state += 2;
>>>> -      return \"\";
>>>> -    }
>>>> -  return \"b%d1\\t%l0\";
>>>> +     if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>>> +      {
>>>> +    arm_ccfsm_state += 2;
>>>> +    return \"\";
>>>> +      }
>>>> +     switch (get_attr_length (insn))
>>>> +      {
>>>> +    // Thumb2 16-bit b{cond}
>>>> +    case 2:
>>>> +
>>>> +    // Thumb2 32-bit b{cond}
>>>> +    case 4: return \"b%d1\\t%l0\";break;
>>>> +
>>>> +    // Thumb2 b{cond} out of range.  Use unconditional branch.
>>>> +    case 8: return arm_gen_far_branch \
>>>> +        (operands, 0, \"Lbcond\", \"b%D1\t\");
>>>> +    break;
>>>> +
>>>> +    // A32 b{cond}
>>>> +    default: return \"b%d1\\t%l0\";
>>>> +      }
>>>
>>> Please fix indentation here.
>>>
>>
>> .Fixed together with below changes.
>>
>>>>     "
>>>>     [(set_attr "conds" "use")
>>>>      (set_attr "type" "branch")
>>>>      (set (attr "length")
>>>> -    (if_then_else
>>>> -       (and (match_test "TARGET_THUMB2")
>>>> -        (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>>> -             (le (minus (match_dup 0) (pc)) (const_int 256))))
>>>> -       (const_int 2)
>>>> -       (const_int 4)))]
>>>> +    (if_then_else (match_test "TARGET_THUMB2")
>>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int 
>>>> -250))
>>>> +    (le (minus (match_dup 0) (pc)) (const_int 256)))
>>>> +    (const_int 2)
>>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>>>> +                        (const_int -1048568))
>>>> +            (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>>> +    (const_int 4)
>>>> +    (const_int 8)))
>>>> +    (const_int 10)))
>>>> +   ]
>>>
>>> This conditional is unreadable and is getting quite complex.
>>>
>>> Please fix the indentation and add some comments to indicate when
>>> this is 2, 4, 8, 10 above the pattern and ask for the comment to
>>> be in sync with this.
>>>
>>> How did we end up with length 10 ? That indicates 2 4 byte instructions
>>> and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in
>>> the switch - is length 10 going to be a single A32 b<cond> instruction ?
>>>
>>> What am I missing ?
>>>
>>>
>>
>> Ah sorry, I had not realised that the "length" related to the number 
>> of bytes in the instruction, so I just used it as a variable to then 
>> check in the switch().
>> And yes, you are correct in assuming that length 10 would have been 
>> the A32 b<cond> version.
>> So the mapping I had in mind was:
>> 2->  Thumb2 b<cond> - narrow 16bit version
>> 4->  Thumb2 b<cond> - wide 32bit version
>> 8->  Thumb2 b       - "far branch".
>> 10-> A32 b<cond>
>>
>> The new version that maintains the "length=number of bytes" would be:
>>
>> 2->  Thumb2 b<cond> - narrow 16bit version
>> 4->  Thumb2 b<cond> - wide 32bit version OR A32 b<cond>
>> 6->  Thumb2 "far branch" made up from one b<cond> to a very close 
>> Lbcond label (so 16 bits) and one b for 32 bits. (so 2+4 == 6)
>>
>> I've gone ahead and done this in the new proposed patch. Let me know 
>> if it's ok! (also I changed the first check to !TARGET_THUMB2 - this 
>> makes it slightly more readable). I'm still not sure about this, so 
>> any suggestions are welcome!
> 
> Ping :)
> 

.Ping

>>
>>>
>>>>   )
>>>>   (define_insn "*arm_cond_branch_reversed"
>>>> @@ -6978,17 +7005,36 @@
>>>>         arm_ccfsm_state += 2;
>>>>         return \"\";
>>>>       }
>>>> -  return \"b%D1\\t%l0\";
>>>> +     switch (get_attr_length (insn))
>>>> +      {
>>>> +    // Thumb2 16-bit b{cond}
>>>> +    case 2:
>>>> +
>>>> +    // Thumb2 32-bit b{cond}
>>>> +    case 4: return \"b%D1\\t%l0\";break;
>>>> +
>>>> +    // Thumb2 b{cond} out of range.  Use unconditional branch.
>>>> +    case 8: return arm_gen_far_branch \
>>>> +        (operands, 0, \"Lbcond\", \"b%d1\t\");
>>>> +        break;
>>>> +    // A32 b{cond}
>>>> +    default: return \"b%D1\\t%l0\";
>>>> +       }
>>>>     "
>>>>     [(set_attr "conds" "use")
>>>>      (set_attr "type" "branch")
>>>>      (set (attr "length")
>>>> -    (if_then_else
>>>> -       (and (match_test "TARGET_THUMB2")
>>>> -        (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>>> -             (le (minus (match_dup 0) (pc)) (const_int 256))))
>>>> -       (const_int 2)
>>>> -       (const_int 4)))]
>>>> +    (if_then_else (match_test "TARGET_THUMB2")
>>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int 
>>>> -250))
>>>> +        (le (minus (match_dup 0) (pc)) (const_int 256)))
>>>> +    (const_int 2)
>>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>>>> +                            (const_int -1048568))
>>>> +        (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>>> +    (const_int 4)
>>>> +    (const_int 8)))
>>>> +    (const_int 10)))
>>>> +   ]
>>>
>>> Same comments as above apply here too.
>>>
>>
>> Same as above.
> 
> Ping :)

.Ping


> 
>>
>> Thank you for the feedback and apologies for being a clueless :)
>>
>> And, of course, let me know of any problems or queries!
>>
>> Cheers,
>> Stam
>>
>>> Ramana
>>>
>>>>   )
>>>>   
>>>> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c 
>>>> b/gcc/testsuite/gcc.target/arm/pr91816.c
>>>> new file mode 100644
>>>> index 00000000000..176bf61780b
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c
>>>> @@ -0,0 +1,102 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" }  */
>>>> +int printf(const char *, ...);
>>>> +
>>>> +__attribute__((noinline,noclone)) void f1(int a)
>>>> +{
>>>> +    if (a) {
>>>> +#define HW0    printf("Hello World!\n");
>>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> +        HW0
>>>> +    }
>>>> +}
>>>> +
>>>> +__attribute__((noinline,noclone)) void f2(int a)
>>>> +{
>>>> +    if (a) {
>>>> +#define HW0    printf("Hello World!\n");
>>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> +        HW3
>>>> +    }
>>>> +}
>>>> +
>>>> +
>>>> +__attribute__((noinline,noclone)) void f3(int a)
>>>> +{
>>>> +    if (a) {
>>>> +#define HW0    printf("Hello World!\n");
>>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> +        HW5
>>>> +    }
>>>> +}
>>>> +
>>>> +__attribute__((noinline,noclone)) void f4(int a)
>>>> +{
>>>> +    if (a==1) {
>>>> +#define HW0    printf("Hello World!\n");
>>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> +        HW0
>>>> +    }
>>>> +}
>>>> +
>>>> +__attribute__((noinline,noclone)) void f5(int a)
>>>> +{
>>>> +    if (a==1) {
>>>> +#define HW0    printf("Hello World!\n");
>>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> +        HW3
>>>> +    }
>>>> +}
>>>> +
>>>> +
>>>> +__attribute__((noinline,noclone)) void f6(int a)
>>>> +{
>>>> +    if (a==1) {
>>>> +#define HW0    printf("Hello World!\n");
>>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> +        HW5
>>>> +    }
>>>> +}
>>>> +
>>>> +
>>>> +int main(void)
>>>> +{
>>>> +    f1(0);
>>>> +    f2(0);
>>>> +    f3(0);
>>>> +    f4(0);
>>>> +    f5(0);
>>>> +    f6(0);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +
>>>> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */
>>>> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */
>>>> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */
>>>> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */
>>>> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */
>>>
>>
>>

Reply via email to