<saurabh....@arm.com> writes:
> The AArch64 FEAT_FAMINMAX extension introduces instructions for
> computing the floating point absolute maximum and minimum of the
> two vectors element-wise.
>
> This patch adds code generation for famax and famin in terms of existing
> unspecs. With this patch:
> 1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands
>    and then taking absolute value of their result.
> 2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands
>    and then taking absolute value of their result.
>
> This fusion of operators is only possible when
> -march=armv9-a+faminmax+sve flags are passed. We also need to pass
> -ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX
> and UNSPEC_COND_SMIN.
>
> This code generation is only available on -O2 or -O3 as that is when
> auto-vectorization is enabled.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-sve2.md
>       (*aarch64_pred_faminmax_fused): Instruction pattern for faminmax
>       codegen.
>       * config/aarch64/iterators.md: Iterator and attribute for
>       faminmax codegen.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/sve/faminmax_1.c: New test.
>       * gcc.target/aarch64/sve/faminmax_2.c: New test.

OK, thanks.

Richard

> ---
>  gcc/config/aarch64/aarch64-sve2.md            | 37 +++++++++++
>  gcc/config/aarch64/iterators.md               |  6 ++
>  .../gcc.target/aarch64/sve/faminmax_1.c       | 45 ++++++++++++++
>  .../gcc.target/aarch64/sve/faminmax_2.c       | 61 +++++++++++++++++++
>  4 files changed, 149 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 725092cc95f..5f2697c3179 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -2467,6 +2467,43 @@
>    [(set_attr "movprfx" "yes")]
>  )
>  
> +;; -------------------------------------------------------------------------
> +;; -- [FP] Absolute maximum and minimum
> +;; -------------------------------------------------------------------------
> +;; Includes:
> +;; - FAMAX
> +;; - FAMIN
> +;; -------------------------------------------------------------------------
> +;; Predicated floating-point absolute maximum and minimum.
> +(define_insn_and_rewrite "*aarch64_pred_faminmax_fused"
> +  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> +     (unspec:SVE_FULL_F
> +       [(match_operand:<VPRED> 1 "register_operand")
> +        (match_operand:SI 4 "aarch64_sve_gp_strictness")
> +        (unspec:SVE_FULL_F
> +          [(match_operand 5)
> +           (const_int SVE_RELAXED_GP)
> +           (match_operand:SVE_FULL_F 2 "register_operand")]
> +          UNSPEC_COND_FABS)
> +        (unspec:SVE_FULL_F
> +          [(match_operand 6)
> +           (const_int SVE_RELAXED_GP)
> +           (match_operand:SVE_FULL_F 3 "register_operand")]
> +          UNSPEC_COND_FABS)]
> +       SVE_COND_SMAXMIN))]
> +  "TARGET_SVE_FAMINMAX"
> +  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
> +     [ w        , Upl , %0 , w ; *              ] 
> <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +     [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, 
> %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +  }
> +  "&& (!rtx_equal_p (operands[1], operands[5])
> +       || !rtx_equal_p (operands[1], operands[6]))"
> +  {
> +    operands[5] = copy_rtx (operands[1]);
> +    operands[6] = copy_rtx (operands[1]);
> +  }
> +)
> +
>  ;; =========================================================================
>  ;; == Complex arithmetic
>  ;; =========================================================================
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index c06f8c2c90f..8b18682c341 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -3143,6 +3143,9 @@
>                                        UNSPEC_COND_FMIN
>                                        UNSPEC_COND_FMINNM])
>  
> +(define_int_iterator SVE_COND_SMAXMIN [UNSPEC_COND_SMAX
> +                                    UNSPEC_COND_SMIN])
> +
>  (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
>                                         UNSPEC_COND_FMLS
>                                         UNSPEC_COND_FNMLA
> @@ -4503,6 +4506,9 @@
>  
>  (define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
>  
> +(define_int_attr faminmax_cond_uns_op
> +  [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")])
> +
>  (define_int_attr faminmax_uns_op
>    [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
> new file mode 100644
> index 00000000000..d54f5d99b5e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
> @@ -0,0 +1,45 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -ffast-math" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "arm_sve.h"
> +
> +#pragma GCC target "+sve+faminmax"
> +
> +#define TEST_FAMAX(TYPE)                                             \
> +  void fn_famax_##TYPE (TYPE * restrict a,                           \
> +                     TYPE * restrict b,                              \
> +                     TYPE * restrict c,                              \
> +                     int n) {                                        \
> +    for (int i = 0; i < n; i++) {                                    \
> +      TYPE temp1 = __builtin_fabs (a[i]);                            \
> +      TYPE temp2 = __builtin_fabs (b[i]);                            \
> +      c[i] = __builtin_fmax (temp1, temp2);                          \
> +    }                                                                        
> \
> +  }                                                                  \
> +
> +#define TEST_FAMIN(TYPE)                                             \
> +  void fn_famin_##TYPE (TYPE * restrict a,                           \
> +                     TYPE * restrict b,                              \
> +                     TYPE * restrict c,                              \
> +                     int n) {                                        \
> +    for (int i = 0; i < n; i++) {                                    \
> +      TYPE temp1 = __builtin_fabs (a[i]);                            \
> +      TYPE temp2 = __builtin_fabs (b[i]);                            \
> +      c[i] = __builtin_fmin (temp1, temp2);                          \
> +    }                                                                        
> \
> +  }                                                                  \
> +
> +TEST_FAMAX (float16_t)
> +TEST_FAMAX (float32_t)
> +TEST_FAMAX (float64_t)
> +TEST_FAMIN (float16_t)
> +TEST_FAMIN (float32_t)
> +TEST_FAMIN (float64_t)
> +
> +/* { dg-final { scan-assembler-times {\tfamax\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfamax\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfamax\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfamin\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfamin\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfamin\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
> new file mode 100644
> index 00000000000..29e12450831
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
> @@ -0,0 +1,61 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -ffast-math" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "arm_sve.h"
> +
> +#pragma GCC target "+sve+faminmax"
> +
> +#define TEST_WITH_SVMAX(TYPE)                                                
> \
> +  TYPE fn_fmax_##TYPE (TYPE x, TYPE y) {                             \
> +    svbool_t pg = svptrue_b8();                                              
> \
> +    return svmax_x(pg, svabs_x(pg, x), svabs_x(pg, y));                      
> \
> +  }                                                                  \
> +
> +#define TEST_WITH_SVMAXNM(TYPE)                                              
> \
> +  TYPE fn_fmaxnm_##TYPE (TYPE x, TYPE y) {                           \
> +    svbool_t pg = svptrue_b8();                                              
> \
> +    return svmaxnm_x(pg, svabs_x(pg, x), svabs_x(pg, y));            \
> +  }                                                                  \
> +
> +#define TEST_WITH_SVMIN(TYPE)                                                
> \
> +  TYPE fn_fmin_##TYPE (TYPE x, TYPE y) {                             \
> +    svbool_t pg = svptrue_b8();                                              
> \
> +    return svmin_x(pg, svabs_x(pg, x), svabs_x(pg, y));                      
> \
> +  }                                                                  \
> +
> +#define TEST_WITH_SVMINNM(TYPE)                                              
> \
> +  TYPE fn_fminnm_##TYPE (TYPE x, TYPE y) {                           \
> +    svbool_t pg = svptrue_b8();                                              
> \
> +    return svminnm_x(pg, svabs_x(pg, x), svabs_x(pg, y));            \
> +  }                                                                  \
> +
> +TEST_WITH_SVMAX (svfloat16_t)
> +TEST_WITH_SVMAX (svfloat32_t)
> +TEST_WITH_SVMAX (svfloat64_t)
> +
> +TEST_WITH_SVMAXNM (svfloat16_t)
> +TEST_WITH_SVMAXNM (svfloat32_t)
> +TEST_WITH_SVMAXNM (svfloat64_t)
> +
> +TEST_WITH_SVMIN (svfloat16_t)
> +TEST_WITH_SVMIN (svfloat32_t)
> +TEST_WITH_SVMIN (svfloat64_t)
> +
> +TEST_WITH_SVMINNM (svfloat16_t)
> +TEST_WITH_SVMINNM (svfloat32_t)
> +TEST_WITH_SVMINNM (svfloat64_t)
> +
> +/* { dg-final { scan-assembler-not {\tfamax\t} } }  */
> +/* { dg-final { scan-assembler-not {\tfamin\t} } }  */
> +
> +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 8 } } */
> +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 8 } } */
> +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 8 } } */
> +
> +/* { dg-final { scan-assembler-times {\tfmax\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmax\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmax\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmin\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmin\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmin\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */  

Reply via email to