Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> This patch extends our immediate SIMD generation cases to support generating
> integer immediates using floating point operation if the integer immediate 
> maps
> to an exact FP value.
>
> As an example:
>
> uint32x4_t f1() {
>     return vdupq_n_u32(0x3f800000);
> }
>
> currently generates:
>
> f1:
>         adrp    x0, .LC0
>         ldr     q0, [x0, #:lo12:.LC0]
>         ret
>
> i.e. a load, but with this change:
>
> f1:
>         fmov    v0.4s, 1.0e+0
>         ret
>
> Such immediates are common in e.g. our Math routines in glibc because they are
> created to extract or mark part of an FP immediate as masks.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64.cc (aarch64_sve_valid_immediate,
>       aarch64_simd_valid_immediate): Refactor accepting modes and values.
>       (aarch64_float_const_representable_p): Refactor and extract FP checks
>       into ...
>       (aarch64_real_float_const_representable_p): ...This.
>       (aarch64_advsimd_valid_immediate): Use it.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/const_create_using_fmov.c: New test.

Looks good.  I think it's worth mentioning in the changelog that you
also fix the previous behaviour of ignoring the "fail" pass-back from
real_to_integer.

Some formatting trivia below, and a question:

> @@ -22919,10 +22919,9 @@ aarch64_advsimd_valid_immediate (unsigned 
> HOST_WIDE_INT val64,
>                                                simd_immediate_info::MVN))
>       return true;
>  
> +

Excess blank line.

>        /* Try using a replicated byte.  */
> -      if (which == AARCH64_CHECK_MOV
> -       && val16 == (val32 >> 16)
> -       && val8 == (val16 >> 8))
> +      if (which == AARCH64_CHECK_MOV && mode == QImode)
>       {
>         if (info)
>           *info = simd_immediate_info (QImode, val8);
> @@ -22950,28 +22949,14 @@ aarch64_advsimd_valid_immediate (unsigned 
> HOST_WIDE_INT val64,
>    return false;
>  }
>  
> -/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
> +/* Return true if replicating IVAL with MODE gives a valid immediate for an 
> SVE MOV

Long line.

>     instruction.  If INFO is nonnull, use it to describe valid immediates.  */
>  
>  static bool
> -aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
> +aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode 
> mode,
>                            simd_immediate_info *info)
>  {
> -  scalar_int_mode mode = DImode;
> -  unsigned int val32 = val64 & 0xffffffff;
> -  if (val32 == (val64 >> 32))
> -    {
> -      mode = SImode;
> -      unsigned int val16 = val32 & 0xffff;
> -      if (val16 == (val32 >> 16))
> -     {
> -       mode = HImode;
> -       unsigned int val8 = val16 & 0xff;
> -       if (val8 == (val16 >> 8))
> -         mode = QImode;
> -     }
> -    }
> -  HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
> +  HOST_WIDE_INT val = trunc_int_for_mode (ival, mode);
>    if (IN_RANGE (val, -0x80, 0x7f))
>      {
>        /* DUP with no shift.  */
> @@ -22986,7 +22971,7 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT 
> val64,
>       *info = simd_immediate_info (mode, val);
>        return true;
>      }
> -  if (aarch64_bitmask_imm (val64, mode))
> +  if (aarch64_bitmask_imm (ival, mode))
>      {
>        /* DUPM.  */
>        if (info)
> @@ -23067,6 +23052,91 @@ aarch64_sve_pred_valid_immediate (rtx x, 
> simd_immediate_info *info)
>    return false;
>  }
>  
> +/* We can only represent floating point constants which will fit in
> +   "quarter-precision" values.  These values are characterised by
> +   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
> +   by:
> +
> +   (-1)^s * (n/16) * 2^r
> +
> +   Where:
> +     's' is the sign bit.
> +     'n' is an integer in the range 16 <= n <= 31.
> +     'r' is an integer in the range -3 <= r <= 4.
> +
> +   Return true iff R represents a vale encodable into an AArch64 floating 
> point
> +   move instruction as an immediate.  Othewise false.  */
> +
> +static bool
> +aarch64_real_float_const_representable_p (REAL_VALUE_TYPE r)
> +{
> +  /* This represents our current view of how many bits
> +     make up the mantissa.  */
> +  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
> +  int exponent;
> +  unsigned HOST_WIDE_INT mantissa, mask;
> +  REAL_VALUE_TYPE m;
> +  bool fail = false;
> +
> + /* We cannot represent infinities, NaNs or +/-zero.  We won't

Should be indented by two spaces.

> +     know if we have +zero until we analyse the mantissa, but we
> +     can reject the other invalid values.  */
> +  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
> +      || REAL_VALUE_MINUS_ZERO (r))
> +    return false;
> [...]
> @@ -23195,10 +23251,57 @@ aarch64_simd_valid_immediate (rtx op, 
> simd_immediate_info *info,
>      val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
>             << (i * BITS_PER_UNIT));
>  
> +  /* Try encoding the integer immediate as a floating point value if it's an 
> exact

Long line.

> +     value.  */
> +  scalar_float_mode fmode = DFmode;
> +  scalar_int_mode imode = DImode;
> +  unsigned HOST_WIDE_INT ival = val64;
> +  unsigned int val32 = val64 & 0xffffffff;
> +  if (val32 == (val64 >> 32))
> +    {
> +      fmode = SFmode;
> +      imode = SImode;
> +      ival = val32;
> +      unsigned int val16 = val32 & 0xffff;
> +      if (val16 == (val32 >> 16))
> +     {
> +       fmode = HFmode;
> +       imode = HImode;
> +       ival = val16;
> +       unsigned int val8 = val16 & 0xff;
> +       if (val8 == (val16 >> 8))
> +         {
> +           imode = QImode;
> +           ival = val8;
> +         }
> +     }
> +    }
> +
> +  if (which == AARCH64_CHECK_MOV
> +      && imode != QImode
> +      && (imode != HImode || TARGET_FP_F16INST))
> +    {
> +      long int as_long_ints[2];
> +      as_long_ints[0] = ival & 0xFFFFFFFF;
> +      as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF;
> +
> +      REAL_VALUE_TYPE r;
> +      real_from_target (&r, as_long_ints, fmode);
> +      if (aarch64_real_float_const_representable_p (r))
> +     {
> +       if (info)
> +         {
> +           rtx float_val = const_double_from_real_value (r, fmode);
> +           *info = simd_immediate_info (fmode, float_val);
> +         }
> +       return true;
> +     }
> +    }
> +
>    if (vec_flags & VEC_SVE_DATA)
> -    return aarch64_sve_valid_immediate (val64, info);
> +    return aarch64_sve_valid_immediate (ival, imode, info);
>    else
> -    return aarch64_advsimd_valid_immediate (val64, info, which);
> +    return aarch64_advsimd_valid_immediate (val64, imode, info, which);
>  }
>  
>  /* Check whether X is a VEC_SERIES-like constant that starts at 0 and
> @@ -25201,106 +25304,32 @@ aarch64_c_mode_for_suffix (char suffix)
>    return VOIDmode;
>  }
>  
> -/* We can only represent floating point constants which will fit in
> -   "quarter-precision" values.  These values are characterised by
> -   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
> -   by:
> -
> -   (-1)^s * (n/16) * 2^r
> -
> -   Where:
> -     's' is the sign bit.
> -     'n' is an integer in the range 16 <= n <= 31.
> -     'r' is an integer in the range -3 <= r <= 4.  */
> -
> -/* Return true iff X can be represented by a quarter-precision
> +/* Return true iff X with mode MODE can be represented by a quarter-precision
>     floating point immediate operand X.  Note, we cannot represent 0.0.  */

OK with the changes above.

This is pre-existing, but I suppose the comment is out of date, since...

> +
>  bool
>  aarch64_float_const_representable_p (rtx x)
>  {
> -  /* This represents our current view of how many bits
> -     make up the mantissa.  */
> -  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
> -  int exponent;
> -  unsigned HOST_WIDE_INT mantissa, mask;
> -  REAL_VALUE_TYPE r, m;
> -  bool fail;
> -
>    x = unwrap_const_vec_duplicate (x);
> +  machine_mode mode = GET_MODE (x);
>    if (!CONST_DOUBLE_P (x))
>      return false;
>  
> -  if (GET_MODE (x) == VOIDmode
> -      || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
> +  if (mode == HFmode && !TARGET_FP_F16INST)
>      return false;
>  
> -  r = *CONST_DOUBLE_REAL_VALUE (x);
> -
> -  /* We cannot represent infinities, NaNs or +/-zero.  We won't
> -     know if we have +zero until we analyse the mantissa, but we
> -     can reject the other invalid values.  */
> -  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
> -      || REAL_VALUE_MINUS_ZERO (r))
> -    return false;
> +    REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (x);
>  
>    /* For BFmode, only handle 0.0. */
> -  if (GET_MODE (x) == BFmode)
> +  if (mode == BFmode)
>      return real_iszero (&r, false);

...we do now handle zero for BFmode.  But that's probably a bug.
With this patch, it should be "even more true" that this routine
isn't supposed to handle zero, although it was also true before.

Does everything work with the final line above as "return false" instead?
If so, that's ok as part of this patch, as a separate patch, or not at
all if you'd rather leave it.

Thanks,
Richard

Reply via email to