Hi Kyrill,
On Tue, 12 Nov 2019 at 11:34, Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> wrote: > > Hi Christophe, > > On 10/18/19 2:18 PM, Christophe Lyon wrote: > > Hi, > > > > This patch extends support for -mpure-code to all thumb-1 processors, > > by removing the need for MOVT. > > > > Symbol addresses are built using upper8_15, upper0_7, lower8_15 and > > lower0_7 relocations, and constants are built using sequences of > > movs/adds and lsls instructions. > > > > The extension of the *thumb1_movhf pattern uses always the same size > > (6) although it can emit a shorter sequence when possible. This is > > similar to what *arm32_movhf already does. > > > > CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid > > generating invalid assembly code with differences from symbols from > > two different sections (the difference cannot be computed by the > > assembler). > > > > Tests pr45701-[12].c needed a small adjustment to avoid matching > > upper8_15 when looking for the r8 register. > > > > Test no-literal-pool.c is augmented with __fp16, so it now uses > > -mfp16-format=ieee. > > > > Test thumb1-Os-mult.c generates an inline code sequence with > > -mpure-code and computes the multiplication by using a sequence of > > add/shift rather than using the multiply instruction, so we skip it in > > presence of -mpure-code. > > > > With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because > > code like: > > static char *p = "Hello World"; > > char * > > testchar () > > { > > return p + 4; > > } > > generates 2 indirections (I removed non-essential directives/code) > > .section .rodata > > .LC0: > > .ascii "Hello World\000" > > .data > > p: > > .word .LC0 > > .section .rodata > > .LC2: > > .word p > > .section .text,"0x20000006",%progbits > > testchar: > > push {r7, lr} > > add r7, sp, #0 > > movs r3, #:upper8_15:#.LC2 > > lsls r3, #8 > > adds r3, #:upper0_7:#.LC2 > > lsls r3, #8 > > adds r3, #:lower8_15:#.LC2 > > lsls r3, #8 > > adds r3, #:lower0_7:#.LC2 > > ldr r3, [r3] > > ldr r3, [r3] > > adds r3, r3, #4 > > movs r0, r3 > > mov sp, r7 > > @ sp needed > > pop {r7, pc} > > > > By contrast, when using -mcpu=cortex-m4, the code looks like: > > .section .rodata > > .LC0: > > .ascii "Hello World\000" > > .data > > p: > > .word .LC0 > > testchar: > > push {r7} > > add r7, sp, #0 > > movw r3, #:lower16:p > > movt r3, #:upper16:p > > ldr r3, [r3] > > adds r3, r3, #4 > > mov r0, r3 > > mov sp, r7 > > pop {r7} > > bx lr > > > > I haven't found yet how to make code for cortex-m0 apply upper/lower > > relocations to "p" instead of .LC2. The current code looks functional, > > but could be improved. > > > > OK as-is? > > > > Thanks, > > > > Christophe > > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index f995974..beb8411 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -66,6 +66,7 @@ extern bool arm_small_register_classes_for_mode_p > (machine_mode); > extern int const_ok_for_arm (HOST_WIDE_INT); > extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); > extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code); > +extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT); > extern int arm_split_constant (RTX_CODE, machine_mode, rtx, > HOST_WIDE_INT, rtx, rtx, int); > extern int legitimate_pic_operand_p (rtx); > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 9f0975d..836f147 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -2882,13 +2882,19 @@ arm_option_check_internal (struct gcc_options *opts) > { > const char *flag = (target_pure_code ? "-mpure-code" : > "-mslow-flash-data"); > + bool not_supported = arm_arch_notm || flag_pic || TARGET_NEON; > > - /* We only support -mpure-code and -mslow-flash-data on M-profile > targets > - with MOVT. */ > - if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON) > + /* We only support -mslow-flash-data on M-profile targets with > + MOVT. */ > + if (target_slow_flash_data && (!TARGET_HAVE_MOVT || not_supported)) > error ("%s only supports non-pic code on M-profile targets with the " > "MOVT instruction", flag); > > + /* We only support -mpure-code-flash-data on M-profile > + targets. */ > > > Typo in the option name. > Thanks for catching this. > + if (target_pure_code && not_supported) > + error ("%s only supports non-pic code on M-profile targets", flag); > + > /* Cannot load addresses: -mslow-flash-data forbids literal pool and > -mword-relocations forbids relocation of MOVT/MOVW. */ > if (target_word_relocations) > @@ -4400,6 +4406,38 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code > code) > } > } > > +/* Emit a sequence of movs/adds/shift to produce a 32-bit constant. > + Avoid generating useless code when one of the bytes is zero. */ > +void > +thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1) > +{ > + bool mov_done_p = false; > + int i; > + > + /* Emit upper 3 bytes if needed. */ > + for (i = 0; i < 3; i++) > + { > + int byte = (op1 >> (8 * (3 - i))) & 0xff; > + > + if (byte) > + { > + emit_set_insn (op0, mov_done_p > + ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte)) > + : GEN_INT (byte)); > + mov_done_p = true; > + } > + > + if (mov_done_p) > + emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8))); > + } > + > + /* Emit lower byte if needed. */ > + if (!mov_done_p) > + emit_set_insn (op0, GEN_INT (op1 & 0xff)); > + else if (op1 & 0xff) > + emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff))); > +} > > AFAIK we already have functions like arm_gen_constant that are supposed to > generate optimal immediate sequences. > Do they not fit the usecase here? I tried to replace thumb1_gen_const_int (operands[0], INTVAL (operands[1])); with arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (operands[1]), operands[0], NULL_RTX, 1, 1); in thumb1.md, but this does work. One of the test cases (testsfp16 in no-literal-pool.c) tries to build a floating-point constant 0x3f8fcb92, and arm_gen_constant generates (insn 66 34 72 (set (reg:SI 3 r3 [130]) (const_int 1065353216 [0x3f800000])) "/testsuite/gcc.target/arm/pure-code/no-literal-pool.c":16:6 818 {*thumb1_movsi_insn} (nil)) which leads to error: could not split insn Christophe > > Looks reasonable to me otherwise. > Thanks, > Kyrill > > > + > /* Emit a sequence of insns to handle a large constant. > CODE is the code of the operation required, it can be any of SET, PLUS, > IOR, AND, XOR, MINUS; > @@ -8530,7 +8568,8 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, > int strict_p) > /* This is PC relative data before arm_reorg runs. */ > else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) > && GET_CODE (x) == SYMBOL_REF > - && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic) > + && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic > + && !arm_disable_literal_pool) > return 1; > > /* This is PC relative data after arm_reorg runs. */ > @@ -8598,6 +8637,7 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, > int strict_p) > && GET_MODE_SIZE (mode) == 4 > && GET_CODE (x) == SYMBOL_REF > && CONSTANT_POOL_ADDRESS_P (x) > + && !arm_disable_literal_pool > && ! (flag_pic > && symbol_mentioned_p (get_pool_constant (x)) > && ! pcrel_constant_p (get_pool_constant (x)))) > @@ -9278,7 +9318,9 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum > rtx_code outer) > return 0; > if (thumb_shiftable_const (INTVAL (x))) > return COSTS_N_INSNS (2); > - return COSTS_N_INSNS (3); > + return arm_disable_literal_pool > + ? COSTS_N_INSNS (8) > + : COSTS_N_INSNS (3); > } > else if ((outer == PLUS || outer == COMPARE) > && INTVAL (x) < 256 && INTVAL (x) > -256) > @@ -9435,7 +9477,9 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum > rtx_code outer) > /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ > if (thumb_shiftable_const (INTVAL (x))) > return COSTS_N_INSNS (2); > - return COSTS_N_INSNS (3); > + return arm_disable_literal_pool > + ? COSTS_N_INSNS (8) > + : COSTS_N_INSNS (3); > } > else if ((outer == PLUS || outer == COMPARE) > && INTVAL (x) < 256 && INTVAL (x) > -256) > @@ -27073,14 +27117,41 @@ arm_thumb1_mi_thunk (FILE *file, tree, > HOST_WIDE_INT delta, > /* push r3 so we can use it as a temporary. */ > /* TODO: Omit this save if r3 is not used. */ > fputs ("\tpush {r3}\n", file); > - fputs ("\tldr\tr3, ", file); > + > + /* With -mpure-code, we cannot load the address from the > + constant pool: we build it explicitly. */ > + if (target_pure_code) > + { > + fputs ("\tmovs\tr3, #:upper8_15:#", file); > + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); > + fputc ('\n', file); > + fputs ("\tlsls r3, #8\n", file); > + fputs ("\tadds\tr3, #:upper0_7:#", file); > + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); > + fputc ('\n', file); > + fputs ("\tlsls r3, #8\n", file); > + fputs ("\tadds\tr3, #:lower8_15:#", file); > + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); > + fputc ('\n', file); > + fputs ("\tlsls r3, #8\n", file); > + fputs ("\tadds\tr3, #:lower0_7:#", file); > + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); > + fputc ('\n', file); > + } > + else > + fputs ("\tldr\tr3, ", file); > } > else > { > fputs ("\tldr\tr12, ", file); > } > - assemble_name (file, label); > - fputc ('\n', file); > + > + if (!target_pure_code) > + { > + assemble_name (file, label); > + fputc ('\n', file); > + } > + > if (flag_pic) > { > /* If we are generating PIC, the ldr instruction below loads > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > index 8b67c9c..d842448 100644 > --- a/gcc/config/arm/arm.h > +++ b/gcc/config/arm/arm.h > @@ -1850,9 +1850,11 @@ enum arm_auto_incmodes > for the index in the tablejump instruction. */ > #define CASE_VECTOR_MODE Pmode > > -#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \ > - || (TARGET_THUMB1 \ > - && (optimize_size || flag_pic))) > +#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2 > \ > + || (TARGET_THUMB1 \ > + && (optimize_size || flag_pic))) \ > + && (!target_pure_code)) > + > > #define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ > (TARGET_THUMB1 \ > diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md > index 5c70200..dd758eb 100644 > --- a/gcc/config/arm/thumb1.md > +++ b/gcc/config/arm/thumb1.md > @@ -43,6 +43,41 @@ > > > > +(define_insn "thumb1_movsi_symbol_ref" > + [(set (match_operand:SI 0 "register_operand" "=l") > + (match_operand:SI 1 "general_operand" "")) > + ] > + "TARGET_THUMB1 > + && arm_disable_literal_pool > + && GET_CODE (operands[1]) == SYMBOL_REF" > + "* > + output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands); > + output_asm_insn (\"lsls\\t%0, #8\", operands); > + output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands); > + output_asm_insn (\"lsls\\t%0, #8\", operands); > + output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands); > + output_asm_insn (\"lsls\\t%0, #8\", operands); > + output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands); > + return \"\"; > + " > + [(set_attr "length" "14") > + (set_attr "conds" "clob")] > +) > + > +(define_split > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "immediate_operand" ""))] > + "TARGET_THUMB1 > + && arm_disable_literal_pool > + && GET_CODE (operands[1]) == CONST_INT > + && !satisfies_constraint_I (operands[1])" > + [(clobber (const_int 0))] > + " > + thumb1_gen_const_int (operands[0], INTVAL (operands[1])); > + DONE; > + " > +) > + > (define_insn "*thumb1_adddi3" > [(set (match_operand:DI 0 "register_operand" "=l") > (plus:DI (match_operand:DI 1 "register_operand" "%0") > @@ -829,8 +864,8 @@ > (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) > > (define_insn "*thumb1_movhf" > - [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") > - (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] > + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,l,m,*r,*h") > + (match_operand:HF 1 "general_operand" "l, m,F,l,*h,*r"))] > "TARGET_THUMB1 > && ( s_register_operand (operands[0], HFmode) > || s_register_operand (operands[1], HFmode))" > @@ -855,14 +890,34 @@ > } > return \"ldrh\\t%0, %1\"; > } > - case 2: return \"strh\\t%1, %0\"; > + case 2: > + { > + int bits; > + int high; > + rtx ops[3]; > + > + bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]), > + HFmode); > + ops[0] = operands[0]; > + high = (bits >> 8) & 0xff; > + ops[1] = GEN_INT (high); > + ops[2] = GEN_INT (bits & 0xff); > + if (high != 0) > + output_asm_insn (\"movs\\t%0, %1\;lsls\\t%0, #8\;adds\\t%0, %2\", > ops); > + else > + output_asm_insn (\"movs\\t%0, %2\", ops); > + > + return \"\"; > + } > + case 3: return \"strh\\t%1, %0\"; > default: return \"mov\\t%0, %1\"; > } > " > - [(set_attr "length" "2") > - (set_attr "type" "mov_reg,load_4,store_4,mov_reg,mov_reg") > - (set_attr "pool_range" "*,1018,*,*,*") > - (set_attr "conds" "clob,nocond,nocond,nocond,nocond")]) > + [(set_attr "length" "2,2,6,2,2,2") > + (set_attr "type" "mov_reg,load_4,mov_reg,store_4,mov_reg,mov_reg") > + (set_attr "pool_range" "*,1018,*,*,*,*") > + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond")]) > + > ;;; ??? This should have alternatives for constants. > (define_insn "*thumb1_movsf_insn" > [(set (match_operand:SF 0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h") > diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c > b/gcc/testsuite/gcc.target/arm/pr45701-1.c > index b26011b..15913d8 100644 > --- a/gcc/testsuite/gcc.target/arm/pr45701-1.c > +++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c > @@ -2,7 +2,7 @@ > /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */ > /* { dg-options "-mthumb -Os" } */ > /* { dg-final { scan-assembler "push\t\{r3" { target { ! > arm*-*-uclinuxfdpiceabi } } } } */ > -/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { ! > arm*-*-uclinuxfdpiceabi } } } } */ > +/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { ! > arm*-*-uclinuxfdpiceabi } } } } */ > > extern int hist_verify; > extern int a1; > diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c > b/gcc/testsuite/gcc.target/arm/pr45701-2.c > index 32eed4d..bb2d36e 100644 > --- a/gcc/testsuite/gcc.target/arm/pr45701-2.c > +++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c > @@ -2,7 +2,7 @@ > /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */ > /* { dg-options "-mthumb -Os" } */ > /* { dg-final { scan-assembler "push\t\{r3" { target { ! > arm*-*-uclinuxfdpiceabi } } } } */ > -/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { ! > arm*-*-uclinuxfdpiceabi } } } } */ > +/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { ! > arm*-*-uclinuxfdpiceabi } } } } */ > > extern int hist_verify; > extern int a1; > diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c > b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c > index 4b893fd..3de1620 100644 > --- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c > +++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c > @@ -1,12 +1,24 @@ > /* { dg-do compile } */ > -/* { dg-options "-mpure-code" } */ > +/* { dg-options "-mpure-code -mfp16-format=ieee" } */ > /* { dg-skip-if "" { *-*-* } { "-g" "-fpic" "-fPIC" } { "" } } */ > > +__fp16 hf; > float sf; > double df; > long long l; > static char *p = "Hello World"; > > +__fp16 > +testsfp16 (__fp16 *p) > +{ > + hf = 1.3; > + *p += hf; > + if (*p > 1.1234f) > + return 2.1234f; > + else > + return 3.1234f; > +} > + > float > testsf (float *p) > { > diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp > b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp > index bf7e4ad..b05cfd6 100644 > --- a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp > +++ b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp > @@ -25,11 +25,8 @@ if ![info exists DEFAULT_CFLAGS] then { > set DEFAULT_CFLAGS " -ansi -pedantic-errors" > } > > -# The -mpure-code option is only available for M-profile targets that support > -# the MOVT instruction. > -if {([check_effective_target_arm_thumb2_ok] > - || [check_effective_target_arm_thumb1_movt_ok]) > - && [check_effective_target_arm_cortex_m]} then { > +# The -mpure-code option is only available for M-profile targets. > +if {[check_effective_target_arm_cortex_m]} then { > # Initialize `dg'. > dg-init > > @@ -56,4 +53,4 @@ set LTO_TORTURE_OPTIONS ${saved-lto_torture_options} > > # All done. > dg-finish > -} > +#} > diff --git a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c > b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c > index b989c42..92772d4 100644 > --- a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c > +++ b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c > @@ -1,6 +1,7 @@ > /* { dg-do compile } */ > /* { dg-require-effective-target arm_thumb1_ok } */ > /* { dg-options "-Os" } */ > +/* { dg-skip-if "-mpure-code generates an inline multiplication code > sequence" { *-*-* } { "-mpure-code" } } */ > /* { dg-skip-if "" { ! { arm_thumb1 } } } */ > > int >