> -----Original Message-----
> From: Christophe Lyon <christophe.l...@linaro.org>
> Sent: Thursday, May 6, 2021 10:23 AM
> To: Tamar Christina <tamar.christ...@arm.com>
> Cc: gcc Patches <gcc-patches@gcc.gnu.org>; nd <n...@arm.com>
> Subject: Re: [PATCH 3/4][AArch32]: Add support for sign differing dot-
> product usdot for NEON.
> 
> On Wed, 5 May 2021 at 19:39, Tamar Christina via Gcc-patches <gcc-
> patc...@gcc.gnu.org> wrote:
> >
> > Hi All,
> >
> > This adds optabs implementing usdot_prod.
> >
> > The following testcase:
> >
> > #define N 480
> > #define SIGNEDNESS_1 unsigned
> > #define SIGNEDNESS_2 signed
> > #define SIGNEDNESS_3 signed
> > #define SIGNEDNESS_4 unsigned
> >
> > SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res,
> > SIGNEDNESS_3 char *restrict a,
> >    SIGNEDNESS_4 char *restrict b)
> > {
> >   for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> >     {
> >       int av = a[i];
> >       int bv = b[i];
> >       SIGNEDNESS_2 short mult = av * bv;
> >       res += mult;
> >     }
> >   return res;
> > }
> >
> > Generates
> >
> > f:
> >         vmov.i32        q8, #0  @ v4si
> >         add     r3, r2, #480
> > .L2:
> >         vld1.8  {q10}, [r2]!
> >         vld1.8  {q9}, [r1]!
> >         vusdot.s8       q8, q9, q10
> >         cmp     r3, r2
> >         bne     .L2
> >         vadd.i32        d16, d16, d17
> >         vpadd.i32       d16, d16, d16
> >         vmov.32 r3, d16[0]
> >         add     r0, r0, r3
> >         bx      lr
> >
> > instead of
> >
> > f:
> >         vmov.i32        q8, #0  @ v4si
> >         add     r3, r2, #480
> > .L2:
> >         vld1.8  {q9}, [r2]!
> >         vld1.8  {q11}, [r1]!
> >         cmp     r3, r2
> >         vmull.s8 q10, d18, d22
> >         vmull.s8 q9, d19, d23
> >         vaddw.s16       q8, q8, d20
> >         vaddw.s16       q8, q8, d21
> >         vaddw.s16       q8, q8, d18
> >         vaddw.s16       q8, q8, d19
> >         bne     .L2
> >         vadd.i32        d16, d16, d17
> >         vpadd.i32       d16, d16, d16
> >         vmov.32 r3, d16[0]
> >         add     r0, r0, r3
> >         bx      lr
> >
> > For NEON.  I couldn't figure out if the MVE instruction vmlaldav.s16
> > could be used to emulate this.  Because it would require additional
> > widening to work I left MVE out of this patch set but perhaps someone
> should take a look.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> I guess you mean arm-linux-gnueabihf ?
> 

Oops, yeah, automatic pilot..

> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >         * config/arm/neon.md (usdot_prod<vsi2qi>): New.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/arm/simd/vusdot-autovec.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index
> >
> fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452
> bc
> > 1070331c1aa0 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>"
> >    DONE;
> >  })
> >
> > +;; Auto-vectorizer pattern for usdot
> > +(define_expand "usdot_prod<vsi2qi>"
> > +  [(set (match_operand:VCVTI 0 "register_operand")
> > +       (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
> > +                                                       "register_operand")
> > +                                  (match_operand:<VSI2QI> 2
> > +                                                       "register_operand")]
> > +                    UNSPEC_DOT_US)
> > +                   (match_operand:VCVTI 3 "register_operand")))]
> > +  "TARGET_I8MM"
> > +{
> > +  emit_insn (
> > +    gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1],
> > +                           operands[2]));
> > +  emit_insn (gen_rtx_SET (operands[0], operands[3]));
> > +  DONE;
> > +})
> > +
> >  (define_expand "neon_copysignf<mode>"
> >    [(match_operand:VCVTF 0 "register_operand")
> >     (match_operand:VCVTF 1 "register_operand") diff --git
> > a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
> > b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0ab37
> 2d
> > 6fbaad6b3813
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
> > +
> > +#define N 480
> > +#define SIGNEDNESS_1 unsigned
> > +#define SIGNEDNESS_2 signed
> > +#define SIGNEDNESS_3 signed
> > +#define SIGNEDNESS_4 unsigned
> > +
> > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res,
> > +SIGNEDNESS_3 char *restrict a,
> > +   SIGNEDNESS_4 char *restrict b)
> > +{
> > +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > +    {
> > +      int av = a[i];
> > +      int bv = b[i];
> > +      SIGNEDNESS_2 short mult = av * bv;
> > +      res += mult;
> > +    }
> > +  return res;
> > +}
> > +
> > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res,
> > +SIGNEDNESS_3 char *restrict b,
> > +   SIGNEDNESS_4 char *restrict a)
> > +{
> > +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > +    {
> > +      int av = a[i];
> > +      int bv = b[i];
> > +      SIGNEDNESS_2 short mult = av * bv;
> > +      res += mult;
> > +    }
> > +  return res;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target {
> > +arm-*-*-gnueabihf } } } } */
> >
> >
> > --

Reply via email to