Re: [PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

Christophe Lyon Wed, 13 Nov 2019 06:41:57 -0800

Hi Kyrill,


On Tue, 12 Nov 2019 at 11:34, Kyrill Tkachov
<kyrylo.tkac...@foss.arm.com> wrote:
>
> Hi Christophe,
>
> On 10/18/19 2:18 PM, Christophe Lyon wrote:
> > Hi,
> >
> > This patch extends support for -mpure-code to all thumb-1 processors,
> > by removing the need for MOVT.
> >
> > Symbol addresses are built using upper8_15, upper0_7, lower8_15 and
> > lower0_7 relocations, and constants are built using sequences of
> > movs/adds and lsls instructions.
> >
> > The extension of the *thumb1_movhf pattern uses always the same size
> > (6) although it can emit a shorter sequence when possible. This is
> > similar to what *arm32_movhf already does.
> >
> > CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid
> > generating invalid assembly code with differences from symbols from
> > two different sections (the difference cannot be computed by the
> > assembler).
> >
> > Tests pr45701-[12].c needed a small adjustment to avoid matching
> > upper8_15 when looking for the r8 register.
> >
> > Test no-literal-pool.c is augmented with __fp16, so it now uses
> > -mfp16-format=ieee.
> >
> > Test thumb1-Os-mult.c generates an inline code sequence with
> > -mpure-code and computes the multiplication by using a sequence of
> > add/shift rather than using the multiply instruction, so we skip it in
> > presence of -mpure-code.
> >
> > With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because
> > code like:
> > static char *p = "Hello World";
> > char *
> > testchar ()
> > {
> >   return p + 4;
> > }
> > generates 2 indirections (I removed non-essential directives/code)
> >         .section        .rodata
> > .LC0:
> > .ascii  "Hello World\000"
> > .data
> > p:
> > .word   .LC0
> > .section        .rodata
> > .LC2:
> > .word   p
> > .section .text,"0x20000006",%progbits
> > testchar:
> > push    {r7, lr}
> > add     r7, sp, #0
> > movs    r3, #:upper8_15:#.LC2
> > lsls    r3, #8
> > adds    r3, #:upper0_7:#.LC2
> > lsls    r3, #8
> > adds    r3, #:lower8_15:#.LC2
> > lsls    r3, #8
> > adds    r3, #:lower0_7:#.LC2
> > ldr     r3, [r3]
> > ldr     r3, [r3]
> > adds    r3, r3, #4
> > movs    r0, r3
> > mov     sp, r7
> > @ sp needed
> > pop     {r7, pc}
> >
> > By contrast, when using -mcpu=cortex-m4, the code looks like:
> >         .section        .rodata
> > .LC0:
> > .ascii  "Hello World\000"
> > .data
> > p:
> > .word   .LC0
> > testchar:
> > push    {r7}
> > add     r7, sp, #0
> > movw    r3, #:lower16:p
> > movt    r3, #:upper16:p
> > ldr     r3, [r3]
> > adds    r3, r3, #4
> > mov     r0, r3
> > mov     sp, r7
> > pop     {r7}
> > bx      lr
> >
> > I haven't found yet how to make code for cortex-m0 apply upper/lower
> > relocations to "p" instead of .LC2. The current code looks functional,
> > but could be improved.
> >
> > OK as-is?
> >
> > Thanks,
> >
> > Christophe
>
>
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index f995974..beb8411 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -66,6 +66,7 @@ extern bool arm_small_register_classes_for_mode_p 
> (machine_mode);
>   extern int const_ok_for_arm (HOST_WIDE_INT);
>   extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
>   extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
> +extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT);
>   extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
>                                HOST_WIDE_INT, rtx, rtx, int);
>   extern int legitimate_pic_operand_p (rtx);
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 9f0975d..836f147 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -2882,13 +2882,19 @@ arm_option_check_internal (struct gcc_options *opts)
>       {
>         const char *flag = (target_pure_code ? "-mpure-code" :
>                                              "-mslow-flash-data");
> +      bool not_supported = arm_arch_notm || flag_pic || TARGET_NEON;
>
> -      /* We only support -mpure-code and -mslow-flash-data on M-profile 
> targets
> -        with MOVT.  */
> -      if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
> +      /* We only support -mslow-flash-data on M-profile targets with
> +        MOVT.  */
> +      if (target_slow_flash_data && (!TARGET_HAVE_MOVT || not_supported))
>         error ("%s only supports non-pic code on M-profile targets with the "
>                "MOVT instruction", flag);
>
> +      /* We only support -mpure-code-flash-data on M-profile
> +        targets.  */
>
>
> Typo in the option name.
>
Thanks for catching this.

> +      if (target_pure_code && not_supported)
> +       error ("%s only supports non-pic code on M-profile targets", flag);
> +
>         /* Cannot load addresses: -mslow-flash-data forbids literal pool and
>          -mword-relocations forbids relocation of MOVT/MOVW.  */
>         if (target_word_relocations)
> @@ -4400,6 +4406,38 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code 
> code)
>       }
>   }
>
> +/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
> +   Avoid generating useless code when one of the bytes is zero.  */
> +void
> +thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
> +{
> +  bool mov_done_p = false;
> +  int i;
> +
> +  /* Emit upper 3 bytes if needed.  */
> +  for (i = 0; i < 3; i++)
> +    {
> +      int byte = (op1 >> (8 * (3 - i))) & 0xff;
> +
> +      if (byte)
> +       {
> +         emit_set_insn (op0, mov_done_p
> +                        ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
> +                        : GEN_INT (byte));
> +         mov_done_p = true;
> +       }
> +
> +      if (mov_done_p)
> +       emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
> +    }
> +
> +  /* Emit lower byte if needed.  */
> +  if (!mov_done_p)
> +    emit_set_insn (op0, GEN_INT (op1 & 0xff));
> +  else if (op1 & 0xff)
> +    emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
> +}
>
> AFAIK we already have functions like arm_gen_constant that are supposed to 
> generate optimal immediate sequences.
> Do they not fit the usecase here?

I tried to replace
thumb1_gen_const_int (operands[0], INTVAL (operands[1]));
with
arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (operands[1]),
operands[0], NULL_RTX, 1, 1);
in thumb1.md, but this does work.
One of the test cases (testsfp16 in no-literal-pool.c) tries to build
a floating-point constant 0x3f8fcb92,
and arm_gen_constant generates
(insn 66 34 72 (set (reg:SI 3 r3 [130])
        (const_int 1065353216 [0x3f800000]))
"/testsuite/gcc.target/arm/pure-code/no-literal-pool.c":16:6 818
{*thumb1_movsi_insn}
     (nil))
which leads to error: could not split insn

Christophe



>
> Looks reasonable to me otherwise.
> Thanks,
> Kyrill
>
>
>   +
>   /* Emit a sequence of insns to handle a large constant.
>      CODE is the code of the operation required, it can be any of SET, PLUS,
>      IOR, AND, XOR, MINUS;
> @@ -8530,7 +8568,8 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, 
> int strict_p)
>     /* This is PC relative data before arm_reorg runs.  */
>     else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
>            && GET_CODE (x) == SYMBOL_REF
> -           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
> +          && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
> +          && !arm_disable_literal_pool)
>       return 1;
>
>     /* This is PC relative data after arm_reorg runs.  */
> @@ -8598,6 +8637,7 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, 
> int strict_p)
>            && GET_MODE_SIZE (mode) == 4
>            && GET_CODE (x) == SYMBOL_REF
>            && CONSTANT_POOL_ADDRESS_P (x)
> +          && !arm_disable_literal_pool
>            && ! (flag_pic
>                  && symbol_mentioned_p (get_pool_constant (x))
>                  && ! pcrel_constant_p (get_pool_constant (x))))
> @@ -9278,7 +9318,9 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum 
> rtx_code outer)
>             return 0;
>           if (thumb_shiftable_const (INTVAL (x)))
>             return COSTS_N_INSNS (2);
> -         return COSTS_N_INSNS (3);
> +         return arm_disable_literal_pool
> +           ? COSTS_N_INSNS (8)
> +           : COSTS_N_INSNS (3);
>         }
>         else if ((outer == PLUS || outer == COMPARE)
>                && INTVAL (x) < 256 && INTVAL (x) > -256)
> @@ -9435,7 +9477,9 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum 
> rtx_code outer)
>           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
>             if (thumb_shiftable_const (INTVAL (x)))
>               return COSTS_N_INSNS (2);
> -          return COSTS_N_INSNS (3);
> +         return arm_disable_literal_pool
> +           ? COSTS_N_INSNS (8)
> +           : COSTS_N_INSNS (3);
>           }
>         else if ((outer == PLUS || outer == COMPARE)
>                  && INTVAL (x) < 256 && INTVAL (x) > -256)
> @@ -27073,14 +27117,41 @@ arm_thumb1_mi_thunk (FILE *file, tree, 
> HOST_WIDE_INT delta,
>           /* push r3 so we can use it as a temporary.  */
>           /* TODO: Omit this save if r3 is not used.  */
>           fputs ("\tpush {r3}\n", file);
> -         fputs ("\tldr\tr3, ", file);
> +
> +         /* With -mpure-code, we cannot load the address from the
> +            constant pool: we build it explicitly.  */
> +         if (target_pure_code)
> +           {
> +             fputs ("\tmovs\tr3, #:upper8_15:#", file);
> +             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
> +             fputc ('\n', file);
> +             fputs ("\tlsls r3, #8\n", file);
> +             fputs ("\tadds\tr3, #:upper0_7:#", file);
> +             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
> +             fputc ('\n', file);
> +             fputs ("\tlsls r3, #8\n", file);
> +             fputs ("\tadds\tr3, #:lower8_15:#", file);
> +             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
> +             fputc ('\n', file);
> +             fputs ("\tlsls r3, #8\n", file);
> +             fputs ("\tadds\tr3, #:lower0_7:#", file);
> +             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
> +             fputc ('\n', file);
> +           }
> +         else
> +           fputs ("\tldr\tr3, ", file);
>         }
>         else
>         {
>           fputs ("\tldr\tr12, ", file);
>         }
> -      assemble_name (file, label);
> -      fputc ('\n', file);
> +
> +      if (!target_pure_code)
> +       {
> +         assemble_name (file, label);
> +         fputc ('\n', file);
> +       }
> +
>         if (flag_pic)
>         {
>           /* If we are generating PIC, the ldr instruction below loads
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index 8b67c9c..d842448 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -1850,9 +1850,11 @@ enum arm_auto_incmodes
>      for the index in the tablejump instruction.  */
>   #define CASE_VECTOR_MODE Pmode
>
> -#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2                         \
> -                                || (TARGET_THUMB1                      \
> -                                    && (optimize_size || flag_pic)))
> +#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2                              
>   \
> +                                 || (TARGET_THUMB1                     \
> +                                     && (optimize_size || flag_pic)))  \
> +                                && (!target_pure_code))
> +
>
>   #define CASE_VECTOR_SHORTEN_MODE(min, max, body)                      \
>     (TARGET_THUMB1                                                      \
> diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> index 5c70200..dd758eb 100644
> --- a/gcc/config/arm/thumb1.md
> +++ b/gcc/config/arm/thumb1.md
> @@ -43,6 +43,41 @@
>
>
>
> +(define_insn "thumb1_movsi_symbol_ref"
> +  [(set (match_operand:SI 0 "register_operand" "=l")
> +       (match_operand:SI 1 "general_operand" ""))
> +   ]
> +  "TARGET_THUMB1
> +   && arm_disable_literal_pool
> +   && GET_CODE (operands[1]) == SYMBOL_REF"
> +  "*
> +  output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands);
> +  output_asm_insn (\"lsls\\t%0, #8\", operands);
> +  output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands);
> +  output_asm_insn (\"lsls\\t%0, #8\", operands);
> +  output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands);
> +  output_asm_insn (\"lsls\\t%0, #8\", operands);
> +  output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands);
> +  return \"\";
> +  "
> +  [(set_attr "length" "14")
> +   (set_attr "conds" "clob")]
> +)
> +
> +(define_split
> +  [(set (match_operand:SI 0 "register_operand" "")
> +       (match_operand:SI 1 "immediate_operand" ""))]
> +  "TARGET_THUMB1
> +   && arm_disable_literal_pool
> +   && GET_CODE (operands[1]) == CONST_INT
> +   && !satisfies_constraint_I (operands[1])"
> +  [(clobber (const_int 0))]
> +  "
> +    thumb1_gen_const_int (operands[0], INTVAL (operands[1]));
> +    DONE;
> +  "
> +)
> +
>   (define_insn "*thumb1_adddi3"
>     [(set (match_operand:DI          0 "register_operand" "=l")
>         (plus:DI (match_operand:DI 1 "register_operand" "%0")
> @@ -829,8 +864,8 @@
>      (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
>
>   (define_insn "*thumb1_movhf"
> -  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,m,*r,*h")
> -       (match_operand:HF     1 "general_operand"      "l,mF,l,*h,*r"))]
> +  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,l,m,*r,*h")
> +       (match_operand:HF     1 "general_operand"      "l, m,F,l,*h,*r"))]
>     "TARGET_THUMB1
>      && (         s_register_operand (operands[0], HFmode)
>          || s_register_operand (operands[1], HFmode))"
> @@ -855,14 +890,34 @@
>           }
>         return \"ldrh\\t%0, %1\";
>         }
> -    case 2: return \"strh\\t%1, %0\";
> +    case 2:
> +    {
> +      int bits;
> +      int high;
> +      rtx ops[3];
> +
> +      bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
> +                            HFmode);
> +      ops[0] = operands[0];
> +      high = (bits >> 8) & 0xff;
> +      ops[1] = GEN_INT (high);
> +      ops[2] = GEN_INT (bits & 0xff);
> +      if (high != 0)
> +       output_asm_insn (\"movs\\t%0, %1\;lsls\\t%0, #8\;adds\\t%0, %2\", 
> ops);
> +      else
> +       output_asm_insn (\"movs\\t%0, %2\", ops);
> +
> +      return \"\";
> +    }
> +    case 3: return \"strh\\t%1, %0\";
>       default: return \"mov\\t%0, %1\";
>       }
>     "
> -  [(set_attr "length" "2")
> -   (set_attr "type" "mov_reg,load_4,store_4,mov_reg,mov_reg")
> -   (set_attr "pool_range" "*,1018,*,*,*")
> -   (set_attr "conds" "clob,nocond,nocond,nocond,nocond")])
> +  [(set_attr "length" "2,2,6,2,2,2")
> +   (set_attr "type" "mov_reg,load_4,mov_reg,store_4,mov_reg,mov_reg")
> +   (set_attr "pool_range" "*,1018,*,*,*,*")
> +   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond")])
> +
>   ;;; ??? This should have alternatives for constants.
>   (define_insn "*thumb1_movsf_insn"
>     [(set (match_operand:SF     0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h")
> diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c 
> b/gcc/testsuite/gcc.target/arm/pr45701-1.c
> index b26011b..15913d8 100644
> --- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
> +++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
> @@ -2,7 +2,7 @@
>   /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
>   /* { dg-options "-mthumb -Os" }  */
>   /* { dg-final { scan-assembler "push\t\{r3" { target { ! 
> arm*-*-uclinuxfdpiceabi } } } } */
> -/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { ! 
> arm*-*-uclinuxfdpiceabi } } } } */
> +/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { ! 
> arm*-*-uclinuxfdpiceabi } } } } */
>
>   extern int hist_verify;
>   extern int a1;
> diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c 
> b/gcc/testsuite/gcc.target/arm/pr45701-2.c
> index 32eed4d..bb2d36e 100644
> --- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
> +++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
> @@ -2,7 +2,7 @@
>   /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
>   /* { dg-options "-mthumb -Os" }  */
>   /* { dg-final { scan-assembler "push\t\{r3" { target { ! 
> arm*-*-uclinuxfdpiceabi } } } } */
> -/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { ! 
> arm*-*-uclinuxfdpiceabi } } } } */
> +/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { ! 
> arm*-*-uclinuxfdpiceabi } } } } */
>
>   extern int hist_verify;
>   extern int a1;
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c 
> b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
> index 4b893fd..3de1620 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
> @@ -1,12 +1,24 @@
>   /* { dg-do compile } */
> -/* { dg-options "-mpure-code" } */
> +/* { dg-options "-mpure-code -mfp16-format=ieee" } */
>   /* { dg-skip-if "" { *-*-* } { "-g" "-fpic" "-fPIC" } { "" } } */
>
> +__fp16 hf;
>   float sf;
>   double df;
>   long long l;
>   static char *p = "Hello World";
>
> +__fp16
> +testsfp16 (__fp16 *p)
> +{
> +  hf = 1.3;
> +  *p += hf;
> +  if (*p > 1.1234f)
> +    return 2.1234f;
> +  else
> +    return 3.1234f;
> +}
> +
>   float
>   testsf (float *p)
>   {
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp 
> b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
> index bf7e4ad..b05cfd6 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
> @@ -25,11 +25,8 @@ if ![info exists DEFAULT_CFLAGS] then {
>       set DEFAULT_CFLAGS " -ansi -pedantic-errors"
>   }
>
> -# The -mpure-code option is only available for M-profile targets that support
> -# the MOVT instruction.
> -if {([check_effective_target_arm_thumb2_ok]
> -     || [check_effective_target_arm_thumb1_movt_ok])
> -    && [check_effective_target_arm_cortex_m]} then {
> +# The -mpure-code option is only available for M-profile targets.
> +if {[check_effective_target_arm_cortex_m]} then {
>   # Initialize `dg'.
>   dg-init
>
> @@ -56,4 +53,4 @@ set LTO_TORTURE_OPTIONS ${saved-lto_torture_options}
>
>   # All done.
>   dg-finish
> -}
> +#}
> diff --git a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c 
> b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
> index b989c42..92772d4 100644
> --- a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
> +++ b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
> @@ -1,6 +1,7 @@
>   /* { dg-do compile } */
>   /* { dg-require-effective-target arm_thumb1_ok } */
>   /* { dg-options "-Os" } */
> +/* { dg-skip-if "-mpure-code generates an inline multiplication code 
> sequence" { *-*-* } { "-mpure-code" } } */
>   /* { dg-skip-if "" { ! { arm_thumb1 } } } */
>
>   int
>

Re: [PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

Reply via email to