Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> This defines VECTOR_STORE_FLAG_VALUE to CONST1_RTX for AArch64
> so we simplify vector comparisons in AArch64.
>
> With this enabled
>
> res:
>         movi    v0.4s, 0
>         cmeq    v0.4s, v0.4s, v0.4s
>         ret
>
> is simplified to:
>
> res:
>         mvni    v0.4s, 0
>         ret
>
> NOTE: I don't really like the testcase as it depends on an
> uninitialised value to hide the constant from GIMPLE.
>
> Happy to go with something else if there are any suggestions.
> I thought about an RTL testcase, but those seem painful.

Like you say, I think an RTL testcase would be better.  Could you use
the attached (for gcc.dg/rtl/aarch64)?

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64.h (VECTOR_STORE_FLAG_VALUE): New.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/vector-cmp-rtl-elim.c: New test.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 
> 2dfb999bea53414498a2355bb30db938f6b94100..b99f69103ab7e1d44e5e41ee89fb9a74450c57ca
>  100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -156,6 +156,8 @@
>  
>  #define PCC_BITFIELD_TYPE_MATTERS    1
>  
> +#define VECTOR_STORE_FLAG_VALUE(MODE) CONST1_RTX (GET_MODE_INNER (MODE))
> +

I think it'd be useful to capture the reasons we discussed internally
for preferring this choice.

/* Use the same RTL truth representation for vector elements as we do
   for scalars.  This maintains the property that a comparison like
   eq:V4SI is a composition of 4 individual eq:SIs, just like plus:V4SI
   is a composition of 4 individual plus:SIs.

   This means that Advanced SIMD comparisons are represented in RTL as
   (neg (op ...)).  */

OK with those changes, thanks.

Richard

>  #ifndef USED_FOR_TARGET
>  
>  /* Define an enum of all features (ISA modes, architectures and extensions).
> diff --git a/gcc/testsuite/gcc.target/aarch64/vector-cmp-rtl-elim.c 
> b/gcc/testsuite/gcc.target/aarch64/vector-cmp-rtl-elim.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..d67baa216d8332a26bdc64350402b77d87379f28
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vector-cmp-rtl-elim.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_neon.h>
> +
> +/*
> +** res:
> +**   mvni    v0.4s, 0
> +**   ret
> +*/
> +uint32x4_t res ()
> +{
> +  uint32x4_t a;
> +  uint32x4_t b = {0, 0, 0, 0};
> +  return vceqq_u32 (a, b);
> +}
> +

/* { dg-do compile { target aarch64-*-* } } */
/* { dg-additional-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" "" } } */

/*
** foo:
**	mvni	v0.4s, 0
**	ret
*/
__Uint32x4_t __RTL (startwith ("vregs")) foo (void)
{
(function "foo"
  (insn-chain
    (block 2
      (edge-from entry (flags "FALLTHRU"))
      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
      (cnote 2 NOTE_INSN_FUNCTION_BEG)
      (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])))
      (cinsn 4 (set (reg:V4SI <1>) (reg:V4SI <0>)))
      (cinsn 5 (set (reg:V4SI <2>)
		    (neg:V4SI (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>)))))
      (cinsn 6 (set (reg:V4SI v0) (reg:V4SI <2>)))
      (edge-to exit (flags "FALLTHRU"))
    )
  )
  (crtl (return_rtx (reg/i:V4SI v0)))
)
}

Reply via email to