On 29/09/2020 20:50, Christophe Lyon via Gcc-patches wrote:
> When mi_delta is > 255 and -mpure-code is used, we cannot load delta
> from code memory (like we do without -mpure-code).
> 
> This patch builds the value of mi_delta into r3 with a series of
> movs/adds/lsls.
> 
> We also do some cleanup by not emitting the function address and delta
> via .word directives at the end of the thunk since we don't use them
> with -mpure-code.
> 
> No need for new testcases, this bug was already identified by
> eg. pr46287-3.C
> 
> 2020-09-29  Christophe Lyon  <christophe.l...@linaro.org>
> 
>       gcc/
>       * config/arm/arm.c (arm_thumb1_mi_thunk): Build mi_delta in r3 and
>       do not emit function address and delta when -mpure-code is used.

There are some optimizations you can make to this code.

Firstly, for values between 256 and 510 (inclusive), it would be better
to just expand a mov of 255 followed by an add.  This is also true for
the literal pools alternative as well, so should be handled before all
this.  I also suspect (but haven't check) that the base adjustment will
most commonly be a multiple of the machine word size (ie 4).  If that is
the case then you could generate n/4 and then shift it left by 2 for an
even greater range of literals.  More generally, any sequence of up to
three thumb1 instructions will be no larger, and probably as fast as the
existing literal pool fall back.

Secondly, if the value is, for example, 65536 (0x10000), your code will
emit a mov followed by two shift-by-8 instructions; the two shifts could
be merged into a single shift-by-16.

Finally, I'd really like to see some executable tests for this, if at
all possible.

R.

> 
> k#   (use "git pull" to merge the remote branch into yours)
> ---
>  gcc/config/arm/arm.c | 91 
> +++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 66 insertions(+), 25 deletions(-)
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index ceeb91f..62abeb5 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -28342,9 +28342,43 @@ arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT 
> delta,
>      {
>        if (mi_delta > 255)
>       {
> -       fputs ("\tldr\tr3, ", file);
> -       assemble_name (file, label);
> -       fputs ("+4\n", file);
> +       /* With -mpure-code, we cannot load delta from the constant
> +          pool: we build it explicitly.  */
> +       if (target_pure_code)
> +         {
> +           bool mov_done_p = false;
> +           int i;
> +
> +           /* Emit upper 3 bytes if needed.  */
> +           for (i = 0; i < 3; i++)
> +             {
> +               int byte = (mi_delta >> (8 * (3 - i))) & 0xff;
> +
> +               if (byte)
> +                 {
> +                   if (mov_done_p)
> +                     asm_fprintf (file, "\tadds\tr3, #%d\n", byte);
> +                   else
> +                     asm_fprintf (file, "\tmovs\tr3, #%d\n", byte);
> +                   mov_done_p = true;
> +                 }
> +
> +               if (mov_done_p)
> +                 asm_fprintf (file, "\tlsls\tr3, #8\n");
> +             }
> +
> +           /* Emit lower byte if needed.  */
> +           if (!mov_done_p)
> +             asm_fprintf (file, "\tmovs\tr3, #%d\n", mi_delta & 0xff);
> +           else if (mi_delta & 0xff)
> +             asm_fprintf (file, "\tadds\tr3, #%d\n", mi_delta & 0xff);
> +         }
> +       else
> +         {
> +           fputs ("\tldr\tr3, ", file);
> +           assemble_name (file, label);
> +           fputs ("+4\n", file);
> +         }
>         asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
>                      mi_op, this_regno, this_regno);
>       }
> @@ -28380,30 +28414,37 @@ arm_thumb1_mi_thunk (FILE *file, tree, 
> HOST_WIDE_INT delta,
>       fputs ("\tpop\t{r3}\n", file);
>  
>        fprintf (file, "\tbx\tr12\n");
> -      ASM_OUTPUT_ALIGN (file, 2);
> -      assemble_name (file, label);
> -      fputs (":\n", file);
> -      if (flag_pic)
> +
> +      /* With -mpure-code, we don't need to emit literals for the
> +      function address and delta since we emitted code to build
> +      them.  */
> +      if (!target_pure_code)
>       {
> -       /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
> -       rtx tem = XEXP (DECL_RTL (function), 0);
> -       /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
> -          pipeline offset is four rather than eight.  Adjust the offset
> -          accordingly.  */
> -       tem = plus_constant (GET_MODE (tem), tem,
> -                            TARGET_THUMB1_ONLY ? -3 : -7);
> -       tem = gen_rtx_MINUS (GET_MODE (tem),
> -                            tem,
> -                            gen_rtx_SYMBOL_REF (Pmode,
> -                                                ggc_strdup (labelpc)));
> -       assemble_integer (tem, 4, BITS_PER_WORD, 1);
> -     }
> -      else
> -     /* Output ".word .LTHUNKn".  */
> -     assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
> +       ASM_OUTPUT_ALIGN (file, 2);
> +       assemble_name (file, label);
> +       fputs (":\n", file);
> +       if (flag_pic)
> +         {
> +           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
> +           rtx tem = XEXP (DECL_RTL (function), 0);
> +           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
> +              pipeline offset is four rather than eight.  Adjust the offset
> +              accordingly.  */
> +           tem = plus_constant (GET_MODE (tem), tem,
> +                                TARGET_THUMB1_ONLY ? -3 : -7);
> +           tem = gen_rtx_MINUS (GET_MODE (tem),
> +                                tem,
> +                                gen_rtx_SYMBOL_REF (Pmode,
> +                                                    ggc_strdup (labelpc)));
> +           assemble_integer (tem, 4, BITS_PER_WORD, 1);
> +         }
> +       else
> +         /* Output ".word .LTHUNKn".  */
> +         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 
> 1);
>  
> -      if (TARGET_THUMB1_ONLY && mi_delta > 255)
> -     assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
> +       if (TARGET_THUMB1_ONLY && mi_delta > 255)
> +         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
> +     }
>      }
>    else
>      {
> 

Reply via email to