On Wed, Dec 4, 2013 at 8:06 AM, Tejas Belagod <tbela...@arm.com> wrote: > Thanks Richard. Here is a revised patch. Sorry about the delay - I was > investigating to make sure an LRA ICE I was seeing on aarch64 was unrelated > to this patch. I've added a test case that I expect to pass for aarch64. > I've also added the tests that you suggested for MIPS, but haven't checked > for the target because I'm not sure what optimizations happen on MIPS. > > OK for trunk? > > Thanks, > Tejas. > > 2013-12-04 Tejas Belagod <tejas.bela...@arm.com> > > > gcc/ > * rtlanal.c (set_noop_p): Return nonzero in case of redundant > vec_select > for overlapping register lanes. > > testsuite/ > * config/gcc.dg/vect/vect-nop-move.c: New. > > > diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c > index 0cd0c7e..e1388c8 100644 > --- a/gcc/rtlanal.c > +++ b/gcc/rtlanal.c > @@ -1180,6 +1180,26 @@ set_noop_p (const_rtx set) > dst = SUBREG_REG (dst); > } > > + /* It is a NOOP if destination overlaps with selected src vector > + elements. */ > + if (GET_CODE (src) == VEC_SELECT > + && REG_P (XEXP (src, 0)) && REG_P (dst) > + && HARD_REGISTER_P (XEXP (src, 0)) > + && HARD_REGISTER_P (dst)) > + { > + int i; > + rtx par = XEXP (src, 1); > + rtx src0 = XEXP (src, 0); > + int c0 = INTVAL (XVECEXP (par, 0, 0)); > + HOST_WIDE_INT offset = GET_MODE_UNIT_SIZE (GET_MODE (src0)) * c0; > + > + for (i = 1; i < XVECLEN (par, 0); i++) > + if (INTVAL (XVECEXP (par, 0, i)) != c0 + i) > + return 0; > + return simplify_subreg_regno (REGNO (src0), GET_MODE (src0), > + offset, GET_MODE (dst)) == (int)REGNO > (dst); > + } > + > return (REG_P (src) && REG_P (dst) > && REGNO (src) == REGNO (dst)); > } > diff --git a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c > b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c > new file mode 100644 > index 0000000..1941933 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c > @@ -0,0 +1,64 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target vect_float } */ > +/* { dg-options "-O3 -fdump-rtl-combine-details" } */ > + > +extern void abort (void); > + > +#define NOINLINE __attribute__((noinline)) > + > +typedef float float32x4_t __attribute__ ((__vector_size__ (16))); > +typedef float float32x2_t __attribute__ ((__vector_size__ (8))); > + > +NOINLINE float > +foo32x4_be (float32x4_t x) > +{ > + return x[3]; > +} > + > +NOINLINE float > +foo32x4_le (float32x4_t x) > +{ > + return x[0]; > +} > + > +NOINLINE float > +bar (float a) > +{ > + return a; > +} > + > +NOINLINE float > +foo32x2_be (float32x2_t x) > +{ > + return bar (x[1]); > +} > + > +NOINLINE float > +foo32x2_le (float32x2_t x) > +{ > + return bar (x[0]); > +} > + > +int > +main() > +{ > + float32x4_t a = { 0.0f, 1.0f, 2.0f, 3.0f }; > + float32x2_t b = { 0.0f, 1.0f }; > + > + if (foo32x4_be (a) != 3.0f) > + abort (); > + > + if (foo32x4_le (a) != 0.0f) > + abort (); > + > + if (foo32x2_be (b) != 1.0f) > + abort (); > + > + if (foo32x2_le (b) != 0.0f) > + abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target > aarch64*-*-* } } } */
Any particular reason why it doesn't work for x86? > +/* { dg-final { cleanup-rtl-dump "combine" } } */ Thanks. -- H.J.