Re: [PATCH][AArch64] Improve TI mode address offsets

Richard Earnshaw Fri, 11 Nov 2016 02:27:07 -0800

On 10/11/16 17:16, Wilco Dijkstra wrote:
> Improve TI mode address offsets - these may either use LDP of 64-bit or
> LDR of 128-bit, so we need to use the correct intersection of offsets.
> When splitting a large offset into base and offset, use a signed 9-bit 
> unscaled offset.
> 
> Remove the Ump constraint on movti and movtf instructions as this blocks
> the reload optimizer from merging address CSEs (is this supposed to work
> only on 'm' constraints?).  The result is improved codesize, especially
> wrf and gamess in SPEC2006.
> 
> 
> int f (int x)
> {
>   __int128_t arr[100];
>   arr[31] = 0;
>   arr[48] = 0;
>   arr[79] = 0;
>   arr[65] = 0;
>   arr[70] = 0;
>   return arr[x];
> }
> 
> Before patch (note the multiple redundant add x1, sp, 1024):
>         sub     sp, sp, #1600
>         sbfiz   x0, x0, 4, 32
>         add     x1, sp, 256
>         stp     xzr, xzr, [x1, 240]
>         add     x1, sp, 768
>         stp     xzr, xzr, [x1]
>         add     x1, sp, 1024
>         stp     xzr, xzr, [x1, 240]
>         add     x1, sp, 1024
>         stp     xzr, xzr, [x1, 16]
>         add     x1, sp, 1024
>         stp     xzr, xzr, [x1, 96]
>         ldr     w0, [sp, x0]
>         add     sp, sp, 1600
>         ret
> 
> After patch:
>         sub     sp, sp, #1600
>         sbfiz   x0, x0, 4, 32
>         add     x1, sp, 1024
>         stp     xzr, xzr, [sp, 496]
>         stp     xzr, xzr, [x1, -256]
>         stp     xzr, xzr, [x1, 240]
>         stp     xzr, xzr, [x1, 16]
>         stp     xzr, xzr, [x1, 96]
>         ldr     w0, [sp, x0]
>         add     sp, sp, 1600
>         ret
> 
> 
> Bootstrap & regress OK.
> 
> ChangeLog:
> 2015-11-10  Wilco Dijkstra  <wdijk...@arm.com>
> 
>     gcc/
>       * config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
>       (movtf_aarch64): Likewise.
>       * config/aarch64/aarch64.c (aarch64_classify_address):
>       Use correct intersection of offsets.
>       (aarch64_legitimize_address_displacement): Use 9-bit signed offsets.
>       (aarch64_legitimize_address): Use 9-bit signed offsets for TI/TF mode.
>       Use 7-bit signed scaled mode for modes > 16 bytes.
> 
> --
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info 
> *info,
>            instruction memory accesses.  */
>         if (mode == TImode || mode == TFmode)
>           return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> -                 && offset_9bit_signed_unscaled_p (mode, offset));
> +                 && (offset_9bit_signed_unscaled_p (mode, offset)
> +                     || offset_12bit_unsigned_scaled_p (mode, offset)));
>  
>         /* A 7bit offset check because OImode will emit a ldp/stp
>            instruction (only big endian will get here).
> @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx 
> x,
>  /* Split an out-of-range address displacement into a base and offset.
>     Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
>     to increase opportunities for sharing the base address of different sizes.
> -   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> +   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
>  static bool
>  aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode 
> mode)
>  {
> -  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +  HOST_WIDE_INT offset = INTVAL (*disp);
> +  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
>  
> -  if (mode == TImode || mode == TFmode ||
> -      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> -    mask = 0xff;
> +  if (mode == TImode || mode == TFmode
> +      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
> +    base = (offset + 0x100) & ~0x1ff;
>  
> -  *off = GEN_INT (INTVAL (*disp) & ~mask);
> -  *disp = GEN_INT (INTVAL (*disp) & mask);
> +  *off = GEN_INT (base);
> +  *disp = GEN_INT (offset - base);
>    return true;
>  }
>  
> @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, 
> machine_mode mode)
>         x = gen_rtx_PLUS (Pmode, base, offset_rtx);
>       }
>  
> -      /* Does it look like we'll need a load/store-pair operation?  */
> +      /* Does it look like we'll need a 16-byte load/store-pair operation?  
> */
>        HOST_WIDE_INT base_offset;
> -      if (GET_MODE_SIZE (mode) > 16
> -       || mode == TImode)
> -     base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
> -                    & ~((128 * GET_MODE_SIZE (mode)) - 1));
> +      if (GET_MODE_SIZE (mode) > 16)
> +     base_offset = (offset + 0x400) & ~0x7f0;
>        /* For offsets aren't a multiple of the access size, the limit is
>        -256...255.  */
>        else if (offset & (GET_MODE_SIZE (mode) - 1))
> @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, 
> machine_mode mode)
>        /* Small negative offsets are supported.  */
>        else if (IN_RANGE (offset, -256, 0))
>       base_offset = 0;
> +      else if (mode == TImode || mode == TFmode)
> +     base_offset = (offset + 0x100) & ~0x1ff;
>        /* Use 12-bit offset by access size.  */
>        else
>       base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1094,9 +1094,9 @@
>  
>  (define_insn "*movti_aarch64"
>    [(set (match_operand:TI 0
> -      "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
> +      "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
>       (match_operand:TI 1
> -      "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
> +      "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
>    "(register_operand (operands[0], TImode)
>      || aarch64_reg_or_zero (operands[1], TImode))"
>    "@
> @@ -1211,9 +1211,9 @@
>  
>  (define_insn "*movtf_aarch64"
>    [(set (match_operand:TF 0
> -      "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump
>



Has this patch been truncated?  The last line above looks to be part-way
through a hunk.

Re: [PATCH][AArch64] Improve TI mode address offsets

Reply via email to