On Wed, 5 May 2021 at 19:39, Tamar Christina via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi All,
>
> This adds optabs implementing usdot_prod.
>
> The following testcase:
>
> #define N 480
> #define SIGNEDNESS_1 unsigned
> #define SIGNEDNESS_2 signed
> #define SIGNEDNESS_3 signed
> #define SIGNEDNESS_4 unsigned
>
> SIGNEDNESS_1 int __attribute__ ((noipa))
> f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
>    SIGNEDNESS_4 char *restrict b)
> {
>   for (__INTPTR_TYPE__ i = 0; i < N; ++i)
>     {
>       int av = a[i];
>       int bv = b[i];
>       SIGNEDNESS_2 short mult = av * bv;
>       res += mult;
>     }
>   return res;
> }
>
> Generates
>
> f:
>         vmov.i32        q8, #0  @ v4si
>         add     r3, r2, #480
> .L2:
>         vld1.8  {q10}, [r2]!
>         vld1.8  {q9}, [r1]!
>         vusdot.s8       q8, q9, q10
>         cmp     r3, r2
>         bne     .L2
>         vadd.i32        d16, d16, d17
>         vpadd.i32       d16, d16, d16
>         vmov.32 r3, d16[0]
>         add     r0, r0, r3
>         bx      lr
>
> instead of
>
> f:
>         vmov.i32        q8, #0  @ v4si
>         add     r3, r2, #480
> .L2:
>         vld1.8  {q9}, [r2]!
>         vld1.8  {q11}, [r1]!
>         cmp     r3, r2
>         vmull.s8 q10, d18, d22
>         vmull.s8 q9, d19, d23
>         vaddw.s16       q8, q8, d20
>         vaddw.s16       q8, q8, d21
>         vaddw.s16       q8, q8, d18
>         vaddw.s16       q8, q8, d19
>         bne     .L2
>         vadd.i32        d16, d16, d17
>         vpadd.i32       d16, d16, d16
>         vmov.32 r3, d16[0]
>         add     r0, r0, r3
>         bx      lr
>
> For NEON.  I couldn't figure out if the MVE instruction vmlaldav.s16 could be
> used to emulate this.  Because it would require additional widening to work I
> left MVE out of this patch set but perhaps someone should take a look.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

I guess you mean arm-linux-gnueabihf ?

>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * config/arm/neon.md (usdot_prod<vsi2qi>): New.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/arm/simd/vusdot-autovec.c: New test.
>
> --- inline copy of patch --
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 
> fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452bc1070331c1aa0
>  100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>"
>    DONE;
>  })
>
> +;; Auto-vectorizer pattern for usdot
> +(define_expand "usdot_prod<vsi2qi>"
> +  [(set (match_operand:VCVTI 0 "register_operand")
> +       (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
> +                                                       "register_operand")
> +                                  (match_operand:<VSI2QI> 2
> +                                                       "register_operand")]
> +                    UNSPEC_DOT_US)
> +                   (match_operand:VCVTI 3 "register_operand")))]
> +  "TARGET_I8MM"
> +{
> +  emit_insn (
> +    gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1],
> +                           operands[2]));
> +  emit_insn (gen_rtx_SET (operands[0], operands[3]));
> +  DONE;
> +})
> +
>  (define_expand "neon_copysignf<mode>"
>    [(match_operand:VCVTF 0 "register_operand")
>     (match_operand:VCVTF 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c 
> b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0ab372d6fbaad6b3813
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
> +
> +#define N 480
> +#define SIGNEDNESS_1 unsigned
> +#define SIGNEDNESS_2 signed
> +#define SIGNEDNESS_3 signed
> +#define SIGNEDNESS_4 unsigned
> +
> +SIGNEDNESS_1 int __attribute__ ((noipa))
> +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
> +   SIGNEDNESS_4 char *restrict b)
> +{
> +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> +    {
> +      int av = a[i];
> +      int bv = b[i];
> +      SIGNEDNESS_2 short mult = av * bv;
> +      res += mult;
> +    }
> +  return res;
> +}
> +
> +SIGNEDNESS_1 int __attribute__ ((noipa))
> +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
> +   SIGNEDNESS_4 char *restrict a)
> +{
> +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> +    {
> +      int av = a[i];
> +      int bv = b[i];
> +      SIGNEDNESS_2 short mult = av * bv;
> +      res += mult;
> +    }
> +  return res;
> +}
> +
> +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { 
> arm-*-*-gnueabihf } } } } */
>
>
> --

Reply via email to