Re: [AArch64] Enable generation of FRINTNZ instructions

Richard Sandiford via Gcc-patches Wed, 17 Nov 2021 07:38:57 -0800

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 4035e061706793849c68ae09bcb2e4b9580ab7b6..62adbc4cb6bbbe0c856f9fbe451aee08f2dea3b5
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -7345,6 +7345,14 @@ (define_insn "despeculate_simpleti"
>     (set_attr "speculation_barrier" "true")]
>  )
>  
> +(define_expand "ftrunc<mode><frintnz_mode>2"
> +  [(set (match_operand:VSFDF 0 "register_operand" "=w")
> +        (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
> +                   FRINTNZ))]
> +  "TARGET_FRINT && TARGET_FLOAT
> +   && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
> +)


Probably just me, but this condition seems quite hard to read.
I think it'd be better to add conditions to the VSFDF definition instead,
a bit like we do for the HF entries in VHSDF_HSDF and VHSDF_DF.  I.e.:

(define_mode_iterator VSFDF [(V2SF "TARGET_SIMD")
                             (V4SF "TARGET_SIMD")
                             (V2DF "TARGET_SIMD")
                             (SF "TARGET_FLOAT")
                             (DF "TARGET_FLOAT")])

Then the condition can be "TARGET_FRINT".

Same for the existing aarch64_<frintnzs_op><mode>.

> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 
> bb13c6cce1bf55633760bc14980402f1f0ac1689..fb97d37cecae17cdb6444e7f3391361b214f0712
>  100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -269,6 +269,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
> +DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int)

ftrunc_int should be described in the comment at the top of the file.
E.g.:

  - ftrunc_int: a unary conversion optab that takes and returns values
    of the same mode, but internally converts via another mode.  This
    second mode is specified using a dummy final function argument.

> diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c 
> b/gcc/testsuite/gcc.target/aarch64/frintnz.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..2e1971f8aa11d8b95f454d03a03e050a3bf96747
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c
> @@ -0,0 +1,88 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8.5-a" } */
> +/* { dg-require-effective-target arm_v8_5a_frintnzx_ok } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** f1:
> +**   ...
> +**   frint32z        s0, s0
> +**   ...

Are these functions ever more than just:

f1:
        frint32z        s0, s0
        ret

?  If not, I think we should match that sequence and “defend” the
good codegen.  The problem with ... on both sides is that it's
then not clear why we can rely on register 0 being used.

> +*/
> +float
> +f1 (float x)
> +{
> +  int y = x;
> +  return (float) y;
> +}
> +
> +/*
> +** f2:
> +**   ...
> +**   frint64z        s0, s0
> +**   ...
> +*/
> +float
> +f2 (float x)
> +{
> +  long long int y = x;
> +  return (float) y;
> +}
> +
> +/*
> +** f3:
> +**   ...
> +**   frint32z        d0, d0
> +**   ...
> +*/
> +double
> +f3 (double x)
> +{
> +  int y = x;
> +  return (double) y;
> +}
> +
> +/*
> +** f4:
> +**   ...
> +**   frint64z        d0, d0
> +**   ...
> +*/
> +double
> +f4 (double x)
> +{
> +  long long int y = x;
> +  return (double) y;
> +}
> +
> +float
> +f1_dont (float x)
> +{
> +  unsigned int y = x;
> +  return (float) y;
> +}
> +
> +float
> +f2_dont (float x)
> +{
> +  unsigned long long int y = x;
> +  return (float) y;
> +}
> +
> +double
> +f3_dont (double x)
> +{
> +  unsigned int y = x;
> +  return (double) y;
> +}
> +
> +double
> +f4_dont (double x)
> +{
> +  unsigned long long int y = x;
> +  return (double) y;
> +}
> +
> +/* Make sure the 'dont's don't generate any frintNz.  */
> +/* { dg-final { scan-assembler-times {frint32z} 2 } } */
> +/* { dg-final { scan-assembler-times {frint64z} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c 
> b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> index 
> 07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3b34dc3ad79f1406a41ec4c00db10347ba1ca2c4
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -ffast-math" } */
> +/* { dg-skip-if "" { arm_v8_5a_frintnzx_ok } } */
>  
>  float
>  f1 (float x)
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 
> 8cbda192fe0fae59ea208ee43696b4d22c43e61e..7fa1659ce734257f3cd96f1e2e50ace4d02dcf51
>  100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -11365,6 +11365,33 @@ proc check_effective_target_arm_v8_3a_bkey_directive 
> { } {
>       }]
>  }
>  
> +# Return 1 if the target supports ARMv8.5 scalar and Adv.Simd FRINT32[ZX]

Armv8.5-A

> +# and FRINT64[ZX] instructions, 0 otherwise. The test is valid for AArch64.
> +# Record the command line options needed.
> +
> +proc check_effective_target_arm_v8_5a_frintnzx_ok_nocache { } {
> +
> +    if { ![istarget aarch64*-*-*] } {
> +        return 0;
> +    }
> +
> +    if { [check_no_compiler_messages_nocache \
> +           arm_v8_5a_frintnzx_ok assembly {
> +     #if !defined (__ARM_FEATURE_FRINT)
> +     #error "__ARM_FEATURE_FRINT not defined"
> +     #endif
> +    } [current_compiler_flags]] } {
> +     return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +proc check_effective_target_arm_v8_5a_frintnzx_ok { } {

The new condition should be documented in sourcebuild.texi, near
the existing arm_v8_* tests.

OK for the non-match.pd parts with those changes.  I don't feel
qualified to review the match.pd bits. :-)

Thanks,
Richard

> +    return [check_cached_effective_target arm_v8_5a_frintnzx_ok \
> +                check_effective_target_arm_v8_5a_frintnzx_ok_nocache] 
> +}
> +
>  # Return 1 if the target supports executing the Armv8.1-M Mainline Low
>  # Overhead Loop, 0 otherwise.  The test is valid for ARM.
>

Re: [AArch64] Enable generation of FRINTNZ instructions

Reply via email to