On 12/2/19 4:43 PM, Stam Markianos-Wright wrote: > > > On 11/15/19 5:26 PM, Stam Markianos-Wright wrote: >> Pinging with more correct maintainers this time :) >> >> Also would need to backport to gcc7,8,9, but need to get this approved >> first! >> >> Thank you, >> Stam >> >> >> -------- Forwarded Message -------- >> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional >> branches in Thumb2 (PR91816) >> Date: Mon, 21 Oct 2019 10:37:09 +0100 >> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com> >> To: Ramana Radhakrishnan <ramana....@googlemail.com> >> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd >> <n...@arm.com>, James Greenhalgh <james.greenha...@arm.com>, Richard >> Earnshaw <richard.earns...@arm.com> >> >> >> >> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote: >>>> >>>> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf, >>>> however, on my native Aarch32 setup the test times out when run as part >>>> of a big "make check-gcc" regression, but not when run individually. >>>> >>>> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >>>> >>>> * config/arm/arm.md: Update b<cond> for Thumb2 range checks. >>>> * config/arm/arm.c: New function arm_gen_far_branch. >>>> * config/arm/arm-protos.h: New function arm_gen_far_branch >>>> prototype. >>>> >>>> gcc/testsuite/ChangeLog: >>>> >>>> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >>>> >>>> * testsuite/gcc.target/arm/pr91816.c: New test. >>> >>>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h >>>> index f995974f9bb..1dce333d1c3 100644 >>>> --- a/gcc/config/arm/arm-protos.h >>>> +++ b/gcc/config/arm/arm-protos.h >>>> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const >>>> cpu_arch_option *, >>>> void arm_initialize_isa (sbitmap, const enum isa_feature *); >>>> +const char * arm_gen_far_branch (rtx *, int,const char * , const >>>> char *); >>>> + >>>> + >>> >>> Lets get the nits out of the way. >>> >>> Unnecessary extra new line, need a space between int and const above. >>> >>> >> >> .Fixed! >> >>>> #endif /* ! GCC_ARM_PROTOS_H */ >>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c >>>> index 39e1a1ef9a2..1a693d2ddca 100644 >>>> --- a/gcc/config/arm/arm.c >>>> +++ b/gcc/config/arm/arm.c >>>> @@ -32139,6 +32139,31 @@ arm_run_selftests (void) >>>> } >>>> } /* Namespace selftest. */ >>>> + >>>> +/* Generate code to enable conditional branches in functions over 1 >>>> MiB. */ >>>> +const char * >>>> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, >>>> + const char * branch_format) >>> >>> Not sure if this is some munging from the attachment but check >>> vertical alignment of parameters. >>> >> >> .Fixed! >> >>>> +{ >>>> + rtx_code_label * tmp_label = gen_label_rtx (); >>>> + char label_buf[256]; >>>> + char buffer[128]; >>>> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ >>>> + CODE_LABEL_NUMBER (tmp_label)); >>>> + const char *label_ptr = arm_strip_name_encoding (label_buf); >>>> + rtx dest_label = operands[pos_label]; >>>> + operands[pos_label] = tmp_label; >>>> + >>>> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , >>>> label_ptr); >>>> + output_asm_insn (buffer, operands); >>>> + >>>> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, >>>> label_ptr); >>>> + operands[pos_label] = dest_label; >>>> + output_asm_insn (buffer, operands); >>>> + return ""; >>>> +} >>>> + >>>> + >>> >>> Unnecessary extra newline. >>> >> >> .Fixed! >> >>>> #undef TARGET_RUN_TARGET_SELFTESTS >>>> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests >>>> #endif /* CHECKING_P */ >>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md >>>> index f861c72ccfc..634fd0a59da 100644 >>>> --- a/gcc/config/arm/arm.md >>>> +++ b/gcc/config/arm/arm.md >>>> @@ -6686,9 +6686,16 @@ >>>> ;; And for backward branches we have >>>> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or >>>> -4) + 4). >>>> ;; >>>> +;; In 16-bit Thumb these ranges are: >>>> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving >>>> (-2040->2048). >>>> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving >>>> (-250 ->256). >>>> +;; In 32-bit Thumb these ranges are: >>>> +;; For a 'b' +/- 16MB is not checked for. >>>> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving >>>> +;; (-1048568 -> 1048576). >>>> + >>>> + >>> >>> Unnecessary extra newline. >>> >> >> .Fixed! >> >>>> (define_expand "cbranchsi4" >>>> [(set (pc) (if_then_else >>>> (match_operator 0 "expandable_comparison_operator" >>>> @@ -6947,22 +6954,42 @@ >>>> (pc)))] >>>> "TARGET_32BIT" >>>> "* >>>> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >>>> - { >>>> - arm_ccfsm_state += 2; >>>> - return \"\"; >>>> - } >>>> - return \"b%d1\\t%l0\"; >>>> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >>>> + { >>>> + arm_ccfsm_state += 2; >>>> + return \"\"; >>>> + } >>>> + switch (get_attr_length (insn)) >>>> + { >>>> + // Thumb2 16-bit b{cond} >>>> + case 2: >>>> + >>>> + // Thumb2 32-bit b{cond} >>>> + case 4: return \"b%d1\\t%l0\";break; >>>> + >>>> + // Thumb2 b{cond} out of range. Use unconditional branch. >>>> + case 8: return arm_gen_far_branch \ >>>> + (operands, 0, \"Lbcond\", \"b%D1\t\"); >>>> + break; >>>> + >>>> + // A32 b{cond} >>>> + default: return \"b%d1\\t%l0\"; >>>> + } >>> >>> Please fix indentation here. >>> >> >> .Fixed together with below changes. >> >>>> " >>>> [(set_attr "conds" "use") >>>> (set_attr "type" "branch") >>>> (set (attr "length") >>>> - (if_then_else >>>> - (and (match_test "TARGET_THUMB2") >>>> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>>> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >>>> - (const_int 2) >>>> - (const_int 4)))] >>>> + (if_then_else (match_test "TARGET_THUMB2") >>>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int >>>> -250)) >>>> + (le (minus (match_dup 0) (pc)) (const_int 256))) >>>> + (const_int 2) >>>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>>> + (const_int -1048568)) >>>> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >>>> + (const_int 4) >>>> + (const_int 8))) >>>> + (const_int 10))) >>>> + ] >>> >>> This conditional is unreadable and is getting quite complex. >>> >>> Please fix the indentation and add some comments to indicate when >>> this is 2, 4, 8, 10 above the pattern and ask for the comment to >>> be in sync with this. >>> >>> How did we end up with length 10 ? That indicates 2 4 byte instructions >>> and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in >>> the switch - is length 10 going to be a single A32 b<cond> instruction ? >>> >>> What am I missing ? >>> >>> >> >> Ah sorry, I had not realised that the "length" related to the number >> of bytes in the instruction, so I just used it as a variable to then >> check in the switch(). >> And yes, you are correct in assuming that length 10 would have been >> the A32 b<cond> version. >> So the mapping I had in mind was: >> 2-> Thumb2 b<cond> - narrow 16bit version >> 4-> Thumb2 b<cond> - wide 32bit version >> 8-> Thumb2 b - "far branch". >> 10-> A32 b<cond> >> >> The new version that maintains the "length=number of bytes" would be: >> >> 2-> Thumb2 b<cond> - narrow 16bit version >> 4-> Thumb2 b<cond> - wide 32bit version OR A32 b<cond> >> 6-> Thumb2 "far branch" made up from one b<cond> to a very close >> Lbcond label (so 16 bits) and one b for 32 bits. (so 2+4 == 6) >> >> I've gone ahead and done this in the new proposed patch. Let me know >> if it's ok! (also I changed the first check to !TARGET_THUMB2 - this >> makes it slightly more readable). I'm still not sure about this, so >> any suggestions are welcome! > > Ping :) >
.Ping >> >>> >>>> ) >>>> (define_insn "*arm_cond_branch_reversed" >>>> @@ -6978,17 +7005,36 @@ >>>> arm_ccfsm_state += 2; >>>> return \"\"; >>>> } >>>> - return \"b%D1\\t%l0\"; >>>> + switch (get_attr_length (insn)) >>>> + { >>>> + // Thumb2 16-bit b{cond} >>>> + case 2: >>>> + >>>> + // Thumb2 32-bit b{cond} >>>> + case 4: return \"b%D1\\t%l0\";break; >>>> + >>>> + // Thumb2 b{cond} out of range. Use unconditional branch. >>>> + case 8: return arm_gen_far_branch \ >>>> + (operands, 0, \"Lbcond\", \"b%d1\t\"); >>>> + break; >>>> + // A32 b{cond} >>>> + default: return \"b%D1\\t%l0\"; >>>> + } >>>> " >>>> [(set_attr "conds" "use") >>>> (set_attr "type" "branch") >>>> (set (attr "length") >>>> - (if_then_else >>>> - (and (match_test "TARGET_THUMB2") >>>> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>>> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >>>> - (const_int 2) >>>> - (const_int 4)))] >>>> + (if_then_else (match_test "TARGET_THUMB2") >>>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int >>>> -250)) >>>> + (le (minus (match_dup 0) (pc)) (const_int 256))) >>>> + (const_int 2) >>>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>>> + (const_int -1048568)) >>>> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >>>> + (const_int 4) >>>> + (const_int 8))) >>>> + (const_int 10))) >>>> + ] >>> >>> Same comments as above apply here too. >>> >> >> Same as above. > > Ping :) .Ping > >> >> Thank you for the feedback and apologies for being a clueless :) >> >> And, of course, let me know of any problems or queries! >> >> Cheers, >> Stam >> >>> Ramana >>> >>>> ) >>>> >>>> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c >>>> b/gcc/testsuite/gcc.target/arm/pr91816.c >>>> new file mode 100644 >>>> index 00000000000..176bf61780b >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c >>>> @@ -0,0 +1,102 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ >>>> +int printf(const char *, ...); >>>> + >>>> +__attribute__((noinline,noclone)) void f1(int a) >>>> +{ >>>> + if (a) { >>>> +#define HW0 printf("Hello World!\n"); >>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>>> + HW0 >>>> + } >>>> +} >>>> + >>>> +__attribute__((noinline,noclone)) void f2(int a) >>>> +{ >>>> + if (a) { >>>> +#define HW0 printf("Hello World!\n"); >>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>>> + HW3 >>>> + } >>>> +} >>>> + >>>> + >>>> +__attribute__((noinline,noclone)) void f3(int a) >>>> +{ >>>> + if (a) { >>>> +#define HW0 printf("Hello World!\n"); >>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>>> + HW5 >>>> + } >>>> +} >>>> + >>>> +__attribute__((noinline,noclone)) void f4(int a) >>>> +{ >>>> + if (a==1) { >>>> +#define HW0 printf("Hello World!\n"); >>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>>> + HW0 >>>> + } >>>> +} >>>> + >>>> +__attribute__((noinline,noclone)) void f5(int a) >>>> +{ >>>> + if (a==1) { >>>> +#define HW0 printf("Hello World!\n"); >>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>>> + HW3 >>>> + } >>>> +} >>>> + >>>> + >>>> +__attribute__((noinline,noclone)) void f6(int a) >>>> +{ >>>> + if (a==1) { >>>> +#define HW0 printf("Hello World!\n"); >>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>>> + HW5 >>>> + } >>>> +} >>>> + >>>> + >>>> +int main(void) >>>> +{ >>>> + f1(0); >>>> + f2(0); >>>> + f3(0); >>>> + f4(0); >>>> + f5(0); >>>> + f6(0); >>>> + return 0; >>>> +} >>>> + >>>> + >>>> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ >>>> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ >>>> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ >>>> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ >>>> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */ >>> >> >>