Tamar Christina <tamar.christ...@arm.com> writes:
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de17126432e2a63a32b
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>"
>    DONE;
>  })
>  
> +;; Auto-vectorizer pattern for usdot
> +(define_expand "usdot_prod<vsi2qi>"
> +  [(set (match_operand:VS 0 "register_operand")
> +     (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
> +                         (match_operand:<VSI2QI> 2 "register_operand")]
> +              UNSPEC_USDOT)
> +             (match_operand:VS 3 "register_operand")))]
> +  "TARGET_I8MM"
> +{
> +  emit_insn (
> +    gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1],
> +                            operands[2]));
> +  emit_move_insn (operands[0], operands[3]);
> +  DONE;
> +})

We can't modify operands[3] here; it's an input rather than an output.

It looks like this would work with just the {…} removed though.
The pattern will match aarch64_usdot<vsi2qi> on its own accord.

Even better would be to rename __builtin_aarch64_usdot… to
__builtin_usdot_prod…, change its arguments so that they line up
with the optabs, and change arm_neon.h to match.

> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c 
> b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f09496dd038410d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
> +
> +#define N 480
> +#define SIGNEDNESS_1 unsigned
> +#define SIGNEDNESS_2 signed
> +#define SIGNEDNESS_3 signed
> +#define SIGNEDNESS_4 unsigned
> +
> +SIGNEDNESS_1 int __attribute__ ((noipa))
> +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
> +   SIGNEDNESS_4 char *restrict b)
> +{
> +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> +    {
> +      int av = a[i];
> +      int bv = b[i];
> +      SIGNEDNESS_2 short mult = av * bv;
> +      res += mult;
> +    }
> +  return res;
> +}
> +
> +SIGNEDNESS_1 int __attribute__ ((noipa))
> +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
> +   SIGNEDNESS_4 char *restrict a)
> +{
> +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> +    {
> +      int av = a[i];
> +      int bv = b[i];
> +      SIGNEDNESS_2 short mult = av * bv;
> +      res += mult;
> +    }
> +  return res;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec3505657bf05320e5fdbb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */
> +
> +#define N 480
> +#define SIGNEDNESS_1 unsigned
> +#define SIGNEDNESS_2 signed
> +#define SIGNEDNESS_3 signed
> +#define SIGNEDNESS_4 unsigned
> +
> +SIGNEDNESS_1 int __attribute__ ((noipa))
> +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
> +   SIGNEDNESS_4 char *restrict b)
> +{
> +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> +    {
> +      int av = a[i];
> +      int bv = b[i];
> +      SIGNEDNESS_2 short mult = av * bv;
> +      res += mult;
> +    }
> +  return res;
> +}
> +
> +SIGNEDNESS_1 int __attribute__ ((noipa))
> +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
> +   SIGNEDNESS_4 char *restrict a)
> +{
> +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> +    {
> +      int av = a[i];
> +      int bv = b[i];
> +      SIGNEDNESS_2 short mult = av * bv;
> +      res += mult;
> +    }
> +  return res;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */

Guess this is personal preference, but I don't think the SIGNEDNESS_*
macros add anything when used like this.  I remember doing something
similar in the past when including .c files from other .c files(!)
in order to avoid cut-&-paste, but there doesn't seem much benefit
for standalone files like these.

Thanks,
Richard

Reply via email to