Re: [PATCH 14/20] testsuite: aarch64: Add fusion tests for FP vml[as] intrinsics

Richard Sandiford via Gcc-patches Fri, 30 Apr 2021 09:57:03 -0700

Jonathan Wright <jonathan.wri...@arm.com> writes:
> Updated the patch to implement suggestions - restricting these tests to run on
> only aarch64 targets.
>
> Tested and all new tests pass on aarch64-none-linux-gnu.
>
> Ok for master?


OK, thanks.

Richard

> Thanks,
> Jonathan
> -------------------------------------------------------------------------------
> From: Richard Sandiford <richard.sandif...@arm.com>
> Sent: 28 April 2021 16:46
> To: Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org>
> Cc: Jonathan Wright <jonathan.wri...@arm.com>
> Subject: Re: [PATCH 14/20] testsuite: aarch64: Add fusion tests for FP vml[as]
> intrinsics
>  
> Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>> Hi,
>>
>> As subject, this patch adds compilation tests to make sure that the output
>> of vmla/vmls floating-point Neon intrinsics (fmul, fadd/fsub) is not fused
>> into fmla/fmls instructions.
>>
>> Ok for master?
>>
>> Thanks,
>> Jonathan
>>
>> ---
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2021-02-16  Jonathan Wright  <jonathan.wri...@arm.com>
>>
>>        * gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c:
>>        New test.
>>        * gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c:
>>        New test.
>>        * gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c:
>>        New test.
>>        * gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c:
>>        New test.
>>
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmla_float_not_fused.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmla_float_not_fused.c
>> new file mode 100644
>> index
> 0000000000000000000000000000000000000000..402c4ef414558767c7d7ddc21817093a80d2a06d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmla_float_not_fused.c
>> @@ -0,0 +1,42 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>
> Could you test this on an arm*-*-* target too?  I'd expect the
> dg-finals to fail there, since the syntax is vmul.f32 etc. instead.
> Alternatively, we could just skip this for arm*-*-*, like you do
> with the by-lane tests.
>
>> +
>> +
>> +#include <arm_neon.h>
>> +
>> +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
>> +{
>> +  return vmla_f32 (a, b, c);
>> +}
>> +
>> +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
>> +{
>> +  return vmlaq_f32 (a, b, c);
>> +}
>> +
>> +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
>> +{
>> +  return vmla_n_f32 (a, b, c);
>> +}
>> +
>> +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
>> +{
>> +  return vmlaq_n_f32 (a, b, c);
>> +}
>> +
>> +float32x2_t foo_lane_f32 (float32x2_t a,
>> +                       float32x2_t b,
>> +                       float32x2_t v)
>> +{
>> +  return vmla_lane_f32 (a, b, v, 0);
>> +}
>> +
>> +float32x4_t fooq_lane_f32 (float32x4_t a,
>> +                        float32x4_t b,
>> +                        float32x2_t v)
>> +{
>> +  return vmlaq_lane_f32 (a, b, v, 0);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {fmul} 6} }  */
>> +/* { dg-final { scan-assembler-times {fadd} 6} }  */
>
> It'd be safer to match {\tfmul\t} etc. instead.  Matching bare words
> runs the risk of picking up things like directory names that happen
> to contain “fmul” as a substring.
>
> Thanks,
> Richard
>
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmla_float_not_fused_A64.c b/gcc/testsuite/gcc.target/aarch64/
> advsimd-intrinsics/vmla_float_not_fused_A64.c
>> new file mode 100644
>> index
> 0000000000000000000000000000000000000000..08a9590e2572fa78c8360f09c8353a0d23678ec1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmla_float_not_fused_A64.c
>> @@ -0,0 +1,33 @@
>> +/* { dg-skip-if "" { arm*-*-* } } */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +
>> +#include <arm_neon.h>
>> +
>> +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c)
>> +{
>> +  return vmla_f64 (a, b, c);
>> +}
>> +
>> +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
>> +{
>> +  return vmlaq_f64 (a, b, c);
>> +}
>> +
>> +float32x2_t foo_laneq_f32 (float32x2_t a,
>> +                        float32x2_t b,
>> +                        float32x4_t v)
>> +{
>> +  return vmla_laneq_f32 (a, b, v, 0);
>> +}
>> +
>> +float32x4_t fooq_laneq_f32 (float32x4_t a,
>> +                         float32x4_t b,
>> +                         float32x4_t v)
>> +{
>> +  return vmlaq_laneq_f32 (a, b, v, 0);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {fmul} 4} }  */
>> +/* { dg-final { scan-assembler-times {fadd} 4} }  */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmls_float_not_fused.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmls_float_not_fused.c
>> new file mode 100644
>> index
> 0000000000000000000000000000000000000000..0846b7cf5d2c332175235c15bbe534b2558960ef
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmls_float_not_fused.c
>> @@ -0,0 +1,42 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +
>> +#include <arm_neon.h>
>> +
>> +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
>> +{
>> +  return vmls_f32 (a, b, c);
>> +}
>> +
>> +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
>> +{
>> +  return vmlsq_f32 (a, b, c);
>> +}
>> +
>> +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
>> +{
>> +  return vmls_n_f32 (a, b, c);
>> +}
>> +
>> +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
>> +{
>> +  return vmlsq_n_f32 (a, b, c);
>> +}
>> +
>> +float32x2_t foo_lane_f32 (float32x2_t a,
>> +                       float32x2_t b,
>> +                       float32x2_t v)
>> +{
>> +  return vmls_lane_f32 (a, b, v, 0);
>> +}
>> +
>> +float32x4_t fooq_lane_f32 (float32x4_t a,
>> +                        float32x4_t b,
>> +                        float32x2_t v)
>> +{
>> +  return vmlsq_lane_f32 (a, b, v, 0);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {fmul} 6} }  */
>> +/* { dg-final { scan-assembler-times {fsub} 6} }  */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmls_float_not_fused_A64.c b/gcc/testsuite/gcc.target/aarch64/
> advsimd-intrinsics/vmls_float_not_fused_A64.c
>> new file mode 100644
>> index
> 0000000000000000000000000000000000000000..856d46757de6418ee18873ec73bc670ec481dd1c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/
> vmls_float_not_fused_A64.c
>> @@ -0,0 +1,33 @@
>> +/* { dg-skip-if "" { arm*-*-* } } */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3" } */
>> +
>> +
>> +#include <arm_neon.h>
>> +
>> +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c)
>> +{
>> +  return vmls_f64 (a, b, c);
>> +}
>> +
>> +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
>> +{
>> +  return vmlsq_f64 (a, b, c);
>> +}
>> +
>> +float32x2_t foo_laneq_f32 (float32x2_t a,
>> +                        float32x2_t b,
>> +                        float32x4_t v)
>> +{
>> +  return vmls_laneq_f32 (a, b, v, 0);
>> +}
>> +
>> +float32x4_t fooq_laneq_f32 (float32x4_t a,
>> +                         float32x4_t b,
>> +                         float32x4_t v)
>> +{
>> +  return vmlsq_laneq_f32 (a, b, v, 0);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {fmul} 4} }  */
>> +/* { dg-final { scan-assembler-times {fsub} 4} }  */
>
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..b14b25949bfe4615b8f8c173a128b6c73de3cf18
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c
> @@ -0,0 +1,67 @@
> +/* { dg-skip-if "" { arm*-*-* } } */
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +#include <arm_neon.h>
> +
> +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
> +{
> +  return vmla_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
> +{
> +  return vmlaq_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
> +{
> +  return vmla_n_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
> +{
> +  return vmlaq_n_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_lane_f32 (float32x2_t a,
> +                       float32x2_t b,
> +                       float32x2_t v)
> +{
> +  return vmla_lane_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_lane_f32 (float32x4_t a,
> +                        float32x4_t b,
> +                        float32x2_t v)
> +{
> +  return vmlaq_lane_f32 (a, b, v, 0);
> +}
> +
> +float32x2_t foo_laneq_f32 (float32x2_t a,
> +                        float32x2_t b,
> +                        float32x4_t v)
> +{
> +  return vmla_laneq_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_laneq_f32 (float32x4_t a,
> +                         float32x4_t b,
> +                         float32x4_t v)
> +{
> +  return vmlaq_laneq_f32 (a, b, v, 0);
> +}
> +
> +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c)
> +{
> +  return vmla_f64 (a, b, c);
> +}
> +
> +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
> +{
> +  return vmlaq_f64 (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfmul\t} 10} }  */
> +/* { dg-final { scan-assembler-times {\tfadd\t} 10} }  */
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..c6f62c59fffbd9ef9b8b26fa265f630d53a308e6
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c
> @@ -0,0 +1,67 @@
> +/* { dg-skip-if "" { arm*-*-* } } */
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +#include <arm_neon.h>
> +
> +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
> +{
> +  return vmls_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
> +{
> +  return vmlsq_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
> +{
> +  return vmls_n_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
> +{
> +  return vmlsq_n_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_lane_f32 (float32x2_t a,
> +                       float32x2_t b,
> +                       float32x2_t v)
> +{
> +  return vmls_lane_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_lane_f32 (float32x4_t a,
> +                        float32x4_t b,
> +                        float32x2_t v)
> +{
> +  return vmlsq_lane_f32 (a, b, v, 0);
> +}
> +
> +float32x2_t foo_laneq_f32 (float32x2_t a,
> +                        float32x2_t b,
> +                        float32x4_t v)
> +{
> +  return vmls_laneq_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_laneq_f32 (float32x4_t a,
> +                         float32x4_t b,
> +                         float32x4_t v)
> +{
> +  return vmlsq_laneq_f32 (a, b, v, 0);
> +}
> +
> +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c)
> +{
> +  return vmls_f64 (a, b, c);
> +}
> +
> +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
> +{
> +  return vmlsq_f64 (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfmul\t} 10} }  */
> +/* { dg-final { scan-assembler-times {\tfsub\t} 10} }  */

Re: [PATCH 14/20] testsuite: aarch64: Add fusion tests for FP vml[as] intrinsics

Reply via email to