On Wed, Sep 02, 2015 at 01:35:19PM +0100, Wilco Dijkstra wrote:
> aarch64_internal_mov_immediate uses loops iterating over all legal bitmask
> immediates to find 2-instruction immediate combinations. One loop is
> quadratic and despite being extremely expensive very rarely finds a matching
> immediate (43 matches in all of SPEC2006 but none are emitted in final code),
> so it can be removed without any effect on code quality. The other loop can
> be replaced by a constant-time search: rather than iterating over all legal
> bitmask values, reconstruct a potential bitmask and query the fast
> aarch64_bitmask_imm.
> 
> No change in generated code, passes GCC regression tests/bootstrap.

Well, presumably those 43 cases in SPEC2006 changed...

The code looks OK, and I haven't seen any objections from Marcus or
Richard on the overall direction of the patch set.

OK for trunk.

Thanks,
James

> 
> ChangeLog:
> 2015-09-02  Wilco Dijkstra  <wdijk...@arm.com>
> 
>       * gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
>       Replace slow immediate matching loops with a faster algorithm.
> 
> ---
>  gcc/config/aarch64/aarch64.c | 96 
> +++++++++++---------------------------------
>  1 file changed, 23 insertions(+), 73 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index c0280e6..d6f7cb0 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool 
> generate,
>    unsigned HOST_WIDE_INT mask;
>    int i;
>    bool first;
> -  unsigned HOST_WIDE_INT val;
> +  unsigned HOST_WIDE_INT val, val2;
>    bool subtargets;
>    rtx subtarget;
>    int one_match, zero_match, first_not_ffff_match;
> @@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, 
> bool generate,
>       }
>      }
>  
> -  /* See if we can do it by arithmetically combining two
> -     immediates.  */
> -  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
> +  if (zero_match != 2 && one_match != 2)
>      {
> -      int j;
> -      mask = 0xffff;
> +      /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> +      For a 64-bit bitmask try whether changing 16 bits to all ones or
> +      zeroes creates a valid bitmask.  To check any repeated bitmask,
> +      try using 16 bits from the other 32-bit half of val.  */
>  
> -      if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
> -       || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
> +      for (i = 0; i < 64; i += 16, mask <<= 16)
>       {
> -       if (generate)
> -         {
> -           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
> -           emit_insn (gen_rtx_SET (subtarget,
> -                                   GEN_INT (aarch64_bitmasks[i])));
> -           emit_insn (gen_adddi3 (dest, subtarget,
> -                                  GEN_INT (val - aarch64_bitmasks[i])));
> -         }
> -       num_insns += 2;
> -       return num_insns;
> +       val2 = val & ~mask;
> +       if (val2 != val && aarch64_bitmask_imm (val2, mode))
> +         break;
> +       val2 = val | mask;
> +       if (val2 != val && aarch64_bitmask_imm (val2, mode))
> +         break;
> +       val2 = val2 & ~mask;
> +       val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
> +       if (val2 != val && aarch64_bitmask_imm (val2, mode))
> +         break;
>       }
> -
> -      for (j = 0; j < 64; j += 16, mask <<= 16)
> +      if (i != 64)
>       {
> -       if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
> +       if (generate)
>           {
> -           if (generate)
> -             {
> -               emit_insn (gen_rtx_SET (dest,
> -                                       GEN_INT (aarch64_bitmasks[i])));
> -               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
> -                                          GEN_INT ((val >> j) & 0xffff)));
> -             }
> -           num_insns += 2;
> -           return num_insns;
> +           emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> +           emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> +                      GEN_INT ((val >> i) & 0xffff)));
>           }
> -     }
> -    }
> -
> -  /* See if we can do it by logically combining two immediates.  */
> -  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
> -    {
> -      if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
> -     {
> -       int j;
> -
> -       for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
> -         if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
> -           {
> -             if (generate)
> -               {
> -                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
> -                 emit_insn (gen_rtx_SET (subtarget,
> -                                         GEN_INT (aarch64_bitmasks[i])));
> -                 emit_insn (gen_iordi3 (dest, subtarget,
> -                                        GEN_INT (aarch64_bitmasks[j])));
> -               }
> -             num_insns += 2;
> -             return num_insns;
> -           }
> -     }
> -      else if ((val & aarch64_bitmasks[i]) == val)
> -     {
> -       int j;
> -
> -       for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
> -         if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
> -           {
> -             if (generate)
> -               {
> -                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
> -                 emit_insn (gen_rtx_SET (subtarget,
> -                                         GEN_INT (aarch64_bitmasks[j])));
> -                 emit_insn (gen_anddi3 (dest, subtarget,
> -                                        GEN_INT (aarch64_bitmasks[i])));
> -               }
> -             num_insns += 2;
> -             return num_insns;
> -           }
> +       return 2;
>       }
>      }
>  
> -- 
> 1.8.3
> 
> 

Reply via email to