Re: [PATCH v2 2/2] aarch64: Use standard names for SVE saturating arithmetic

Richard Sandiford Fri, 10 Jan 2025 06:06:18 -0800

Akram Ahmad <akram.ah...@arm.com> writes:
> Rename the existing SVE unpredicated saturating arithmetic instructions
> to use standard names which are used by IFN_SAT_ADD and IFN_SAT_SUB.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-sve.md: Rename insns
>
> gcc/testsuite/ChangeLog:
>
>       * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc:
>       Template file for auto-vectorizer tests.
>       * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c:
>       Instantiate 8-bit vector tests.
>       * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
>       Instantiate 16-bit vector tests.
>       * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
>       Instantiate 32-bit vector tests.
>       * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
>       Instantiate 64-bit vector tests.


OK, thanks.  I'll push it along with patch 1.

Sorry again for the long delay in reviewing this series.

Richard


> ---
>  gcc/config/aarch64/aarch64-sve.md             |  4 +-
>  .../aarch64/sve/saturating_arithmetic.inc     | 68 +++++++++++++++++++
>  .../aarch64/sve/saturating_arithmetic_1.c     | 60 ++++++++++++++++
>  .../aarch64/sve/saturating_arithmetic_2.c     | 60 ++++++++++++++++
>  .../aarch64/sve/saturating_arithmetic_3.c     | 62 +++++++++++++++++
>  .../aarch64/sve/saturating_arithmetic_4.c     | 62 +++++++++++++++++
>  6 files changed, 314 insertions(+), 2 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 06bd3e4bb2c..b987b292b20 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -4379,7 +4379,7 @@
>  ;; -------------------------------------------------------------------------
>  
>  ;; Unpredicated saturating signed addition and subtraction.
> -(define_insn "@aarch64_sve_<optab><mode>"
> +(define_insn "<su_optab>s<addsub><mode>3"
>    [(set (match_operand:SVE_FULL_I 0 "register_operand")
>       (SBINQOPS:SVE_FULL_I
>         (match_operand:SVE_FULL_I 1 "register_operand")
> @@ -4395,7 +4395,7 @@
>  )
>  
>  ;; Unpredicated saturating unsigned addition and subtraction.
> -(define_insn "@aarch64_sve_<optab><mode>"
> +(define_insn "<su_optab>s<addsub><mode>3"
>    [(set (match_operand:SVE_FULL_I 0 "register_operand")
>       (UBINQOPS:SVE_FULL_I
>         (match_operand:SVE_FULL_I 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc 
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
> new file mode 100644
> index 00000000000..0b3ebbcb0d6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
> @@ -0,0 +1,68 @@
> +/* Template file for vector saturating arithmetic validation.
> +
> +   This file defines saturating addition and subtraction functions for a 
> given
> +   scalar type, testing the auto-vectorization of these two operators. This
> +   type, along with the corresponding minimum and maximum values for that 
> type,
> +   must be defined by any test file which includes this template file.  */
> +
> +#ifndef SAT_ARIT_AUTOVEC_INC
> +#define SAT_ARIT_AUTOVEC_INC
> +
> +#include <limits.h>
> +#include <arm_neon.h>
> +
> +#ifndef UT
> +#define UT uint32_t
> +#define UMAX UINT_MAX
> +#define UMIN 0
> +#endif
> +
> +void uaddq (UT *out, UT *a, UT *b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      UT sum = a[i] + b[i];
> +      out[i] = sum < a[i] ? UMAX : sum;
> +    }
> +}
> +
> +void uaddq2 (UT *out, UT *a, UT *b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      UT sum;
> +      if (!__builtin_add_overflow(a[i], b[i], &sum))
> +     out[i] = sum;
> +      else
> +     out[i] = UMAX;
> +    }
> +}
> +
> +void uaddq_imm (UT *out, UT *a, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      UT sum = a[i] + 50;
> +      out[i] = sum < a[i] ? UMAX : sum;
> +    }
> +}
> +
> +void usubq (UT *out, UT *a, UT *b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      UT sum = a[i] - b[i];
> +      out[i] = sum > a[i] ? UMIN : sum;
> +    }
> +}
> +
> +void usubq_imm (UT *out, UT *a, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      UT sum = a[i] - 50;
> +      out[i] = sum > a[i] ? UMIN : sum;
> +    }
> +}
> +
> +#endif
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
> new file mode 100644
> index 00000000000..6936e9a2704
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   uqadd\tz\2.b, z\1\.b, z\2\.b
> +** ...
> +**   ldr\tb([0-9]+), .*
> +**   ldr\tb([0-9]+), .*
> +**   uqadd\tb\4, b\3, b\4
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   uqadd\tz\2.b, z\1\.b, z\2\.b
> +** ...
> +**   ldr\tb([0-9]+), .*
> +**   ldr\tb([0-9]+), .*
> +**   uqadd\tb\4, b\3, b\4
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   uqadd\tz\1.b, z\1\.b, #50
> +** ...
> +**   movi\tv([0-9]+)\.8b, 0x32
> +** ...
> +**   ldr\tb([0-9]+), .*
> +**   uqadd\tb\3, b\3, b\2
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   ld1b\tz([0-9]+)\.b, .*
> +**   uqsub\tz\2.b, z\1\.b, z\2\.b
> +** ...
> +**   ldr\tb([0-9]+), .*
> +**   ldr\tb([0-9]+), .*
> +**   uqsub\tb\4, b\3, b\4
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned char
> +#define UMAX UCHAR_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> new file mode 100644
> index 00000000000..928bc0054df
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   uqadd\tz\2.h, z\1\.h, z\2\.h
> +** ...
> +**   ldr\th([0-9]+), .*
> +**   ldr\th([0-9]+), .*
> +**   uqadd\th\4, h\3, h\4
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   uqadd\tz\2.h, z\1\.h, z\2\.h
> +** ...
> +**   ldr\th([0-9]+), .*
> +**   ldr\th([0-9]+), .*
> +**   uqadd\th\4, h\3, h\4
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   uqadd\tz\1.h, z\1\.h, #50
> +** ...
> +**   movi\tv([0-9]+)\.4h, 0x32
> +** ...
> +**   ldr\th([0-9]+), .*
> +**   uqadd\th\3, h\3, h\2
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   ld1h\tz([0-9]+)\.h, .*
> +**   usubq\tz\2.h, z\1\.h, z\2\.h
> +** ...
> +**   ldr\th([0-9]+), .*
> +**   ldr\th([0-9]+), .*
> +**   usubq\th\4, h\3, h\4
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned short
> +#define UMAX USHRT_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> new file mode 100644
> index 00000000000..14e2de59b1e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> @@ -0,0 +1,62 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   uqadd\tz\2.s, z\1\.s, z\2\.s
> +** ...
> +**   ldr\tw([0-9]+), .*
> +**   ldr\tw([0-9]+), .*
> +**   adds\tw\3, w\3, w\4
> +**   csinv\tw\3, w\3, wzr, cc
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   uqadd\tz\2.s, z\1\.s, z\2\.s
> +** ...
> +**   ldr\tw([0-9]+), .*
> +**   ldr\tw([0-9]+), .*
> +**   adds\tw\3, w\3, w\4
> +**   csinv\tw\3, w\3, wzr, cc
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   uqadd\tz\1.s, z\1\.s, #50
> +** ...
> +**   ldr\tw([0-9]+), .*
> +**   adds\tw\2, w\2, #50
> +**   csinv\tw\2, w\2, wzr, cc
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   ld1w\tz([0-9]+)\.s, .*
> +**   uqsub\tz\2.s, z\1\.s, z\2\.s
> +** ...
> +**   ldr\tw([0-9]+), .*
> +**   ldr\tw([0-9]+), .*
> +**   subs\tw\3, w\3, w\4
> +**   csel\tw\3, w\3, wzr, cs
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned int
> +#define UMAX UINT_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
> new file mode 100644
> index 00000000000..05a5786b4ab
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
> @@ -0,0 +1,62 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   uqadd\tz\2.d, z\1\.d, z\2\.d
> +** ...
> +**   ldr\tx([0-9]+), .*
> +**   ldr\tx([0-9]+), .*
> +**   adds\tx\3, x\3, x\4
> +**   csinv\tx\3, x\3, xzr, cc
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   uqadd\tz\2.d, z\1\.d, z\2\.d
> +** ...
> +**   ldr\tx([0-9]+), .*
> +**   ldr\tx([0-9]+), .*
> +**   adds\tx\3, x\3, x\4
> +**   csinv\tx\3, x\3, xzr, cc
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   uqadd\tz\1.d, z\1\.d, #50
> +** ...
> +**   ldr\tx([0-9]+), .*
> +**   adds\tx\2, x\2, #50
> +**   csinv\tx\2, x\2, xzr, cc
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   ld1d\tz([0-9]+)\.d, .*
> +**   uqsub\tz\2.d, z\1\.d, z\2\.d
> +** ...
> +**   ldr\tx([0-9]+), .*
> +**   ldr\tx([0-9]+), .*
> +**   subs\tx\3, x\3, x\4
> +**   csel\tx\3, x\3, xzr, cs
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned long
> +#define UMAX ULONG_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file

Re: [PATCH v2 2/2] aarch64: Use standard names for SVE saturating arithmetic

Reply via email to