On Wed, Dec 4, 2013 at 8:06 AM, Tejas Belagod <tbela...@arm.com> wrote:
> Thanks Richard. Here is a revised patch. Sorry about the delay - I was
> investigating to make sure an LRA ICE I was seeing on aarch64 was unrelated
> to this patch. I've added a test case that I expect to pass for aarch64.
> I've also added the tests that you suggested for MIPS, but haven't checked
> for the target because I'm not sure what optimizations happen on MIPS.
>
> OK for trunk?
>
> Thanks,
> Tejas.
>
> 2013-12-04  Tejas Belagod  <tejas.bela...@arm.com>
>
>
> gcc/
>         * rtlanal.c (set_noop_p): Return nonzero in case of redundant
> vec_select
>         for overlapping register lanes.
>
> testsuite/
>         * config/gcc.dg/vect/vect-nop-move.c: New.
>
>
> diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
> index 0cd0c7e..e1388c8 100644
> --- a/gcc/rtlanal.c
> +++ b/gcc/rtlanal.c
> @@ -1180,6 +1180,26 @@ set_noop_p (const_rtx set)
>        dst = SUBREG_REG (dst);
>      }
>
> +  /* It is a NOOP if destination overlaps with selected src vector
> +     elements.  */
> +  if (GET_CODE (src) == VEC_SELECT
> +      && REG_P (XEXP (src, 0)) && REG_P (dst)
> +      && HARD_REGISTER_P (XEXP (src, 0))
> +      && HARD_REGISTER_P (dst))
> +    {
> +      int i;
> +      rtx par = XEXP (src, 1);
> +      rtx src0 = XEXP (src, 0);
> +      int c0 = INTVAL (XVECEXP (par, 0, 0));
> +      HOST_WIDE_INT offset = GET_MODE_UNIT_SIZE (GET_MODE (src0)) * c0;
> +
> +      for (i = 1; i < XVECLEN (par, 0); i++)
> +       if (INTVAL (XVECEXP (par, 0, i)) != c0 + i)
> +         return 0;
> +      return simplify_subreg_regno (REGNO (src0), GET_MODE (src0),
> +                                   offset, GET_MODE (dst)) == (int)REGNO
> (dst);
> +    }
> +
>    return (REG_P (src) && REG_P (dst)
>           && REGNO (src) == REGNO (dst));
>  }
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
> b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
> new file mode 100644
> index 0000000..1941933
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
> @@ -0,0 +1,64 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-options "-O3 -fdump-rtl-combine-details" } */
> +
> +extern void abort (void);
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +typedef float float32x4_t __attribute__ ((__vector_size__ (16)));
> +typedef float float32x2_t __attribute__ ((__vector_size__ (8)));
> +
> +NOINLINE float
> +foo32x4_be (float32x4_t x)
> +{
> +  return x[3];
> +}
> +
> +NOINLINE float
> +foo32x4_le (float32x4_t x)
> +{
> +  return x[0];
> +}
> +
> +NOINLINE float
> +bar (float a)
> +{
> +  return a;
> +}
> +
> +NOINLINE float
> +foo32x2_be (float32x2_t x)
> +{
> +  return bar (x[1]);
> +}
> +
> +NOINLINE float
> +foo32x2_le (float32x2_t x)
> +{
> +  return bar (x[0]);
> +}
> +
> +int
> +main()
> +{
> +  float32x4_t a = { 0.0f, 1.0f, 2.0f, 3.0f };
> +  float32x2_t b = { 0.0f, 1.0f };
> +
> +  if (foo32x4_be (a) != 3.0f)
> +    abort ();
> +
> +  if (foo32x4_le (a) != 0.0f)
> +    abort ();
> +
> +  if (foo32x2_be (b) != 1.0f)
> +    abort ();
> +
> +  if (foo32x2_le (b) != 0.0f)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target
> aarch64*-*-* } } } */

Any particular reason why it doesn't work for x86?

> +/* { dg-final { cleanup-rtl-dump "combine" } } */

Thanks.

-- 
H.J.

Reply via email to