http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46886
Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |rakdver at gcc dot gnu.org, | |spop at gcc dot gnu.org --- Comment #1 from Jakub Jelinek <jakub at gcc dot gnu.org> 2011-01-18 17:00:29 UTC --- Seems one extra incorrect iteration is added after GOMP_parallel_end: <bb 16>: D.1592_69 = D.1591_183 * 4; <bb 17>: # D.1592_15 = PHI <0(15), D.1592_69(16)> D.1749_71 = (<unnamed-unsigned:64>) D.1592_15; D.1750_72 = MAX_EXPR <D.1749_71, 1>; D.1751_73 = __builtin_malloc (D.1750_72); D.1594_74 = (void * restrict) D.1751_73; D.1898_232 = (<unnamed-unsigned:64>) D.1591_183; D.1899_233 = D.1591_183 >= 0; D.1900_234 = D.1898_232 > 199; D.1901_235 = D.1899_233 && D.1900_234; if (D.1901_235 != 0) goto <bb 18>; else goto <bb 19>; <bb 18>: .paral_data_store.64.D.1910 = D.1578_1; .paral_data_store.64.D.1911 = D.1594_74; .paral_data_store.64.D.1912 = D.1898_232; __builtin_GOMP_parallel_start (build._loopfn.1, &.paral_data_store.64, 2); build._loopfn.1 (&.paral_data_store.64); __builtin_GOMP_parallel_end (); ivtmp.60_247 = D.1898_232; S.7_248 = (integer(kind=8)) ivtmp.60_247; D.1755_249 = *D.1578_1[S.7_248]; MEM[(real(kind=4)[0:] *)D.1594_74][S.7_248] = D.1755_249; S.7_251 = S.7_248 + 1; ivtmp.60_252 = ivtmp.60_247 + 1; S.7_248 is equal to the number of ints allocated, so stores after the end of malloced area. The loop is initially: loop_4 (header = 18, latch = 19, niter = , upper_bound = 9223372036854775808, estimate = 9223372036854775808) { bb_18 (preds = {bb_17 bb_19 }, succs = {bb_20 bb_19 }) { <bb 18>: # S.7_7 = PHI <0(17), S.7_80(19)> # .MEM_152 = PHI <.MEM_182(17), .MEM_185(19)> if (S.7_7 > D.1586_64) goto <bb 20>; else goto <bb 19>; } bb_19 (preds = {bb_18 }, succs = {bb_18 }) { <bb 19>: # VUSE <.MEM_152> D.1755_79 = *D.1578_1[S.7_7]; # .MEM_185 = VDEF <.MEM_152> MEM[(real(kind=4)[0:] *)D.1594_74][S.7_7] = D.1755_79; S.7_80 = S.7_7 + 1; goto <bb 18>; } } and try_get_loop_niter computes niter->niter as (<unnamed-unsigned:64>) D.1591_183. On: __attribute__((noinline, noclone)) void foo (int *__restrict__ p, int *__restrict__ q, int n) { int i; for (i = 0; i < n; i++) p[i] = q[i]; } int main (void) { int *p = __builtin_calloc (1024, sizeof (int)); int *q = __builtin_calloc (1024, sizeof (int)); foo (p, q, 1024); return 0; } try_get_loop_niter instead computes niter->niter as n - 1 (which is strange to me), but then it works even with the extra iteration added after GOMP_parallel_end (as that iteration is done with iv set to n - 1).