> -----Original Message----- > From: Tamar Christina <tamar.christ...@arm.com> > Sent: 25 May 2021 16:02 > To: gcc-patches@gcc.gnu.org > Cc: Richard Earnshaw <richard.earns...@arm.com>; nd <n...@arm.com>; > Ramana Radhakrishnan <ramana.radhakrish...@arm.com>; Kyrylo > Tkachov <kyrylo.tkac...@arm.com> > Subject: RE: [PATCH 3/4][AArch32]: Add support for sign differing dot- > product usdot for NEON. > > Forgot to include the list > > > -----Original Message----- > > From: Tamar Christina > > Sent: Tuesday, May 25, 2021 3:57 PM > > To: Tamar Christina <tamar.christ...@arm.com> > > Cc: Richard Earnshaw <richard.earns...@arm.com>; nd <n...@arm.com>; > > Ramana Radhakrishnan <ramana.radhakrish...@arm.com>; Kyrylo > Tkachov > > <kyrylo.tkac...@arm.com> > > Subject: RE: [PATCH 3/4][AArch32]: Add support for sign differing dot- > > product usdot for NEON. > > > > Hi All, > > > > This is a respin based on the feedback gotten from the AArch64 review. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > >
Ok. Thanks, Kyrill > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * config/arm/neon.md (usdot_prod<vsi2qi>): New. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/arm/simd/vusdot-autovec.c: New test. > > > > > -----Original Message----- > > > From: Gcc-patches <gcc-patches-boun...@gcc.gnu.org> On Behalf Of > > Tamar > > > Christina via Gcc-patches > > > Sent: Wednesday, May 5, 2021 6:42 PM > > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > > Cc: Richard Earnshaw <richard.earns...@arm.com>; nd > <n...@arm.com>; > > > Ramana Radhakrishnan <ramana.radhakrish...@arm.com> > > > Subject: FW: [PATCH 3/4][AArch32]: Add support for sign differing dot- > > > product usdot for NEON. > > > > > > Forgot to CC maintainers.. > > > > > > -----Original Message----- > > > From: Tamar Christina <tamar.christ...@arm.com> > > > Sent: Wednesday, May 5, 2021 6:39 PM > > > To: gcc-patches@gcc.gnu.org > > > Cc: nd <n...@arm.com> > > > Subject: [PATCH 3/4][AArch32]: Add support for sign differing > > > dot-product usdot for NEON. > > > > > > Hi All, > > > > > > This adds optabs implementing usdot_prod. > > > > > > The following testcase: > > > > > > #define N 480 > > > #define SIGNEDNESS_1 unsigned > > > #define SIGNEDNESS_2 signed > > > #define SIGNEDNESS_3 signed > > > #define SIGNEDNESS_4 unsigned > > > > > > SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > > SIGNEDNESS_3 char *restrict a, > > > SIGNEDNESS_4 char *restrict b) > > > { > > > for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > > { > > > int av = a[i]; > > > int bv = b[i]; > > > SIGNEDNESS_2 short mult = av * bv; > > > res += mult; > > > } > > > return res; > > > } > > > > > > Generates > > > > > > f: > > > vmov.i32 q8, #0 @ v4si > > > add r3, r2, #480 > > > .L2: > > > vld1.8 {q10}, [r2]! > > > vld1.8 {q9}, [r1]! > > > vusdot.s8 q8, q9, q10 > > > cmp r3, r2 > > > bne .L2 > > > vadd.i32 d16, d16, d17 > > > vpadd.i32 d16, d16, d16 > > > vmov.32 r3, d16[0] > > > add r0, r0, r3 > > > bx lr > > > > > > instead of > > > > > > f: > > > vmov.i32 q8, #0 @ v4si > > > add r3, r2, #480 > > > .L2: > > > vld1.8 {q9}, [r2]! > > > vld1.8 {q11}, [r1]! > > > cmp r3, r2 > > > vmull.s8 q10, d18, d22 > > > vmull.s8 q9, d19, d23 > > > vaddw.s16 q8, q8, d20 > > > vaddw.s16 q8, q8, d21 > > > vaddw.s16 q8, q8, d18 > > > vaddw.s16 q8, q8, d19 > > > bne .L2 > > > vadd.i32 d16, d16, d17 > > > vpadd.i32 d16, d16, d16 > > > vmov.32 r3, d16[0] > > > add r0, r0, r3 > > > bx lr > > > > > > For NEON. I couldn't figure out if the MVE instruction vmlaldav.s16 > > > could be used to emulate this. Because it would require additional > > > widening to work I left MVE out of this patch set but perhaps someone > > should take a look. > > > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > > > Ok for master? > > > > > > Thanks, > > > Tamar > > > > > > gcc/ChangeLog: > > > > > > * config/arm/neon.md (usdot_prod<vsi2qi>): New. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/arm/simd/vusdot-autovec.c: New test. > > > > > > --- inline copy of patch -- > > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index > > > > > > fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee74 > 52 > > > bc1070331c1aa0 100644 > > > --- a/gcc/config/arm/neon.md > > > +++ b/gcc/config/arm/neon.md > > > @@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>" > > > DONE; > > > }) > > > > > > +;; Auto-vectorizer pattern for usdot > > > +(define_expand "usdot_prod<vsi2qi>" > > > + [(set (match_operand:VCVTI 0 "register_operand") > > > + (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 > > > + "register_operand") > > > + (match_operand:<VSI2QI> 2 > > > + "register_operand")] > > > + UNSPEC_DOT_US) > > > + (match_operand:VCVTI 3 "register_operand")))] > > > + "TARGET_I8MM" > > > +{ > > > + emit_insn ( > > > + gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1], > > > + operands[2])); > > > + emit_insn (gen_rtx_SET (operands[0], operands[3])); > > > + DONE; > > > +}) > > > + > > > (define_expand "neon_copysignf<mode>" > > > [(match_operand:VCVTF 0 "register_operand") > > > (match_operand:VCVTF 1 "register_operand") diff --git > > > a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > > b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > > new file mode 100644 > > > index > > > > > > 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0 > ab37 > > > 2d6fbaad6b3813 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > > @@ -0,0 +1,38 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ > > > + > > > +#define N 480 > > > +#define SIGNEDNESS_1 unsigned > > > +#define SIGNEDNESS_2 signed > > > +#define SIGNEDNESS_3 signed > > > +#define SIGNEDNESS_4 unsigned > > > + > > > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > > +SIGNEDNESS_3 char *restrict a, > > > + SIGNEDNESS_4 char *restrict b) > > > +{ > > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > > + { > > > + int av = a[i]; > > > + int bv = b[i]; > > > + SIGNEDNESS_2 short mult = av * bv; > > > + res += mult; > > > + } > > > + return res; > > > +} > > > + > > > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, > > > +SIGNEDNESS_3 char *restrict b, > > > + SIGNEDNESS_4 char *restrict a) > > > +{ > > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > > + { > > > + int av = a[i]; > > > + int bv = b[i]; > > > + SIGNEDNESS_2 short mult = av * bv; > > > + res += mult; > > > + } > > > + return res; > > > +} > > > + > > > +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { > > > +arm-*-*-gnueabihf } } } } */ > > > > > > > > > --