On Mon, Nov 19, 2012 at 12:24 PM, Eric Botcazou <ebotca...@adacore.com> wrote: >> Yes, I'll be looking into this soon. > > We have a related regression on SPARC: > > FAIL: gfortran.dg/minmaxloc_5.f90 -O3 -fomit-frame-pointer -funroll-loops > execution test > FAIL: gfortran.dg/minmaxloc_5.f90 -O3 -fomit-frame-pointer -funroll-all-loops > -finline-functions execution test > FAIL: gfortran.dg/minmaxloc_6.f90 -O3 -fomit-frame-pointer -funroll-loops > execution test > FAIL: gfortran.dg/minmaxloc_6.f90 -O3 -fomit-frame-pointer -funroll-all-loops > -finline-functions execution test > > whose failure mode is exactly the same as for Honza's testcase. I even have a > more reduced testcase (6 lines, attached) in case you'd prefer working on it. > > Reproducible with a cross to sparc-linux with -mcpu=v8 -O3 -funroll-loops and > grepping for mem:SF (const_int 0 [0]) in the RTL dumps.
Thanks for this reduced test case, that's saving me a lot of work! Can you please try and see if the following C test case also fails? It looks like the memcpy is expanded to a loop that is unrolled, and then mis-optimized. I haven't found out yet why... Ciao! Steven extern void abort (void) __attribute__ ((__noreturn__)); extern void *memcpy (void *__restrict, const void *__restrict, __SIZE_TYPE__); static void __attribute__ ((__noinline__, __noclone__)) ga4076 (void) { int D833; float D834; int D837; int D840; float D841; int D844; float dda[100]; int ids; static float A0[100] = { 10e+0, 20e+0, 30e+0, 40e+0, 50e+0, 60e+0, 70e+0, 80e+0, 90e+0, 10e+1, 11e+1, 12e+1, 13e+1, 14e+1, 15e+1, 16e+1, 17e+1, 18e+1, 19e+1, 20e+1, 21e+1, 22e+1, 23e+1, 24e+1, 25e+1, 26e+1, 27e+1, 28e+1, 29e+1, 30e+1, 31e+1, 32e+1, 33e+1, 34e+1, 35e+1, 36e+1, 37e+1, 38e+1, 39e+1, 40e+1, 41e+1, 42e+1, 43e+1, 44e+1, 45e+1, 46e+1, 47e+1, 48e+1, 49e+1, 50e+1, 51e+1, 52e+1, 53e+1, 54e+1, 55e+1, 56e+1, 57e+1, 58e+1, 59e+1, 60e+1, 61e+1, 62e+1, 63e+1, 64e+1, 65e+1, 66e+1, 67e+1, 68e+1, 69e+1, 70e+1, 71e+1, 72e+1, 73e+1, 74e+1, 75e+1, 76e+1, 77e+1, 78e+1, 79e+1, 80e+1, 81e+1, 82e+1, 83e+1, 84e+1, 85e+1, 86e+1, 87e+1, 88e+1, 89e+1, 90e+1, 91e+1, 92e+1, 93e+1, 94e+1, 95e+1, 96e+1, 97e+1, 98e+1, 99e+1, 10e+2 }; memcpy (dda, A0, sizeof (dda)); float limit3; int offset2; int pos1; int S4; limit3 = -1. * __builtin_inf(); pos1 = 0; offset2 = 0; S4 = 1; D831: if (S4 > 100) goto L3; else goto D832; D832: D833 = S4 - 1; D834 = dda[D833]; if (D834 >= limit3) goto D835; else goto D836; D835: D837 = S4 - 1; limit3 = dda[D837]; pos1 = S4 + offset2; goto L1; D836: S4 = S4 + 1; goto D831; L3: pos1 = 1; goto L2; L1: if (S4 > 100) goto L2; else goto D839; D839: D840 = S4 - 1; D841 = dda[D840]; if (D841 > limit3) goto D842; else goto D843; D842: D844 = S4 - 1; limit3 = dda[D844]; pos1 = S4 + offset2; D843: S4 = S4 + 1; goto L1; L2: ids = pos1; if (ids != 100) abort (); } int main (void) { ga4076 (); return 0; }