On Wed, 12 May 2021 at 20:33, Richard Earnshaw <richard.earns...@foss.arm.com> wrote: > > On 12/05/2021 12:05, Prathamesh Kulkarni via Gcc-patches wrote: > > On Wed, 12 May 2021 at 16:02, Richard Earnshaw > > <richard.earns...@foss.arm.com> wrote: > >> > >> > >> > >> On 12/05/2021 08:46, Prathamesh Kulkarni via Gcc-patches wrote: > >>> On Mon, 10 May 2021 at 19:55, Richard Earnshaw > >>> <richard.earns...@foss.arm.com> wrote: > >>>> > >>>> > >>>> > >>>> On 06/05/2021 01:14, Prathamesh Kulkarni via Gcc-patches wrote: > >>>>> Hi, > >>>>> The attached patch replaces __builtin_neon_vtst* (a, b) with (a & b) != > >>>>> 0. > >>>>> Bootstrapped and tested on arm-linux-gnueabihf and cross-tested on > >>>>> arm*-*-*. > >>>>> OK to commit ? > >>>>> > >>>>> Thanks, > >>>>> Prathamesh > >>>>> > >>>> > >>>> You're missing the ChangeLog details. > >>>> > >>>> Also, if you're removing these, do we still need the neon_vtst<mode> > >>>> patterns in neon.md? They generate unspecs, so if they're no-longer > >>>> needed for these expansions, they can likely be dropped entirely. > >>> Hi Richard, > >>> Thanks for the suggestions. > >>> Does the attached patch look OK if bootstrap+test passes ? > >> > >> You're still missing the changelog description. > > Oh, I had attached it separately. > > Sorry, I hadn't noticed that. The attachments were in > application/octet-stream format so don't show up when viewing the > message - I have to download them and then load them into a separate > editor or whatever in order to view them. Please don't use octet-stream > attachments for plain text files. Oops, sorry about that. > > 2021-05-12 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> > > PR target/66791 > * config/arm/arm_neon.h: Replace calls to __builtin_neon_vtst* (a, b) > with > (a & b) != 0. > > For a ChangeLog description it's usually better to avoid the exact > expression. I think it would be better to write. > > Replace calls to __builtin_neon_vtst* (a, b) with boolean logic equivalent. > > Also, pedantically, you should list every function you've changed (just > in case somebody searches the ChangeLog for changes that affect, say, > vtstq_p8). There aren't that many affected functions in this specific > patch that that is an unreasonable thing to do > > * config/arm/arm_neon_builtins.def: Remove entry for vtst. > * config/arm/neon.md (neon_vtst<mode>): Remove pattern. > > OK with those changes. Is the attached version OK to commit ? Bootstrapped + tested on arm-linux-gnueabihf and cross-tested on arm*-*-*.
Thanks, Prathamesh > > R. > > > Does this version look OK ? > > > > Thanks, > > Prathamesh > >> > >> R. > >>> > >>> Thanks, > >>> Prathamesh > >>>> > >>>> R. >
2021-05-14 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> PR target/66791 * config/arm/arm_neon.h (vtst_s8): Replace call to vtst builtin with it's boolean logic equivalent. (vtst_s16): Likewise. (vtst_s32): Likewise. (vtst_u8): Likewise. (vtst_u16): Likewise. (vtst_u32): Likewise. (vtst_p8): Likewise. (vtst_p16): Likewise. (vtstq_s8): Likewise. (vtstq_s16): Likewise. (vtstq_s32): Likewise. (vtstq_u8): Likewise. (vtstq_u16): Likewise. (vtstq_u32): Likewise. (vtstq_p8): Likewise. (vtstq_p16): Likewise. * config/arm/arm_neon_builtins.def: Remove entry for vtst. * config/arm/neon.md (neon_vtst<mode>): Remove pattern. diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index dc28b92b5af..dcd533fd003 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -2919,112 +2919,112 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_s8 (int8x8_t __a, int8x8_t __b) { - return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b); + return (uint8x8_t) ((__a & __b) != 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_s16 (int16x4_t __a, int16x4_t __b) { - return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b); + return (uint16x4_t) ((__a & __b) != 0); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_s32 (int32x2_t __a, int32x2_t __b) { - return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b); + return (uint32x2_t) ((__a & __b) != 0); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b); + return (uint8x8_t) ((__a & __b) != 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b); + return (uint16x4_t) ((__a & __b) != 0); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b); + return (uint32x2_t) ((__a & __b) != 0); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p8 (poly8x8_t __a, poly8x8_t __b) { - return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b); + return (uint8x8_t) ((__a & __b) != 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p16 (poly16x4_t __a, poly16x4_t __b) { - return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b); + return (uint16x4_t) ((__a & __b) != 0); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_s8 (int8x16_t __a, int8x16_t __b) { - return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b); + return (uint8x16_t) ((__a & __b) != 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_s16 (int16x8_t __a, int16x8_t __b) { - return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b); + return (uint16x8_t) ((__a & __b) != 0); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_s32 (int32x4_t __a, int32x4_t __b) { - return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b); + return (uint32x4_t) ((__a & __b) != 0); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b); + return (uint8x16_t) ((__a & __b) != 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b); + return (uint16x8_t) ((__a & __b) != 0); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b); + return (uint32x4_t) ((__a & __b) != 0); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p8 (poly8x16_t __a, poly8x16_t __b) { - return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b); + return (uint8x16_t) ((__a & __b) != 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p16 (poly16x8_t __a, poly16x8_t __b) { - return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b); + return (uint16x8_t) ((__a & __b) != 0); } __extension__ extern __inline int8x8_t diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 97e4f9c9be3..70438ac1848 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -142,7 +142,6 @@ VAR2 (UNOP, vcgez, v4hf, v8hf) VAR2 (UNOP, vcgtz, v4hf, v8hf) VAR2 (UNOP, vclez, v4hf, v8hf) VAR2 (UNOP, vcltz, v4hf, v8hf) -VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vabds, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vabdu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vabdf, v2sf, v4sf) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 2a1e304f2ba..641d26fed47 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2578,16 +2578,6 @@ DONE; }) -(define_insn "neon_vtst<mode>" - [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") - (match_operand:VDQIW 2 "s_register_operand" "w")] - UNSPEC_VTST))] - "TARGET_NEON" - "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set_attr "type" "neon_tst<q>")] -) - (define_insn "neon_vtst_combine<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (plus:VDQIW