Hi! There were 2 issues visible on this new testcase, one that we didn't have special POINTER_TYPE_P handling in a few spots of expand_omp_simd - for pointers we need to use POINTER_PLUS_EXPR and need to have the non-pointer part in sizetype, for non-rectangular loop on the other side we can rely on multiplication factor 1, pointers can't be multiplied, without those changes we'd ICE. The other issue was that we put n2 expression directly into a comparison in a condition and regimplified that, for the &a[512] case that and with gimplification being destructed that unfortunately meant modification of original fd->loops[?].n2. Fixed by unsharing the expression. This was causing a runtime failure on the testcase.
Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2022-07-29 Jakub Jelinek <ja...@redhat.com> PR middle-end/106449 * omp-expand.cc (expand_omp_simd): Fix up handling of pointer iterators in non-rectangular simd loops. Unshare fd->loops[i].n2 or n2 before regimplifying it inside of a condition. * testsuite/libgomp.c-c++-common/pr106449.c: New test. --- gcc/omp-expand.cc.jj 2022-06-28 13:03:30.930689700 +0200 +++ gcc/omp-expand.cc 2022-07-28 10:47:03.138939297 +0200 @@ -6714,7 +6696,7 @@ expand_omp_simd (struct omp_region *regi if (fd->loops[i].m2) t = n2v = create_tmp_var (itype); else - t = fold_convert (itype, fd->loops[i].n2); + t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); tree v = fd->loops[i].v; @@ -6728,7 +6710,7 @@ expand_omp_simd (struct omp_region *regi if (fd->collapse > 1 && !broken_loop) t = n2var; else - t = fold_convert (type, n2); + t = fold_convert (type, unshare_expr (n2)); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); tree v = fd->loop.v; @@ -6840,7 +6819,7 @@ expand_omp_simd (struct omp_region *regi if (fd->loops[i].m2) t = nextn2v = create_tmp_var (itype); else - t = fold_convert (itype, fd->loops[i].n2); + t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); tree v = fd->loops[i].v; @@ -6870,17 +6849,25 @@ expand_omp_simd (struct omp_region *regi ne->probability = e->probability.invert (); gsi = gsi_after_labels (init_bb); - t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), - fd->loops[i + 1].n1); if (fd->loops[i + 1].m1) { - tree t2 = fold_convert (TREE_TYPE (t), + tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), fd->loops[i + 1 - fd->loops[i + 1].outer].v); - tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1); - t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + if (POINTER_TYPE_P (TREE_TYPE (t2))) + t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1); + else + { + t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].n1); + tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1); + t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + } } + else + t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].n1); expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t); if (fd->loops[i + 1].m2) { @@ -6889,14 +6876,19 @@ expand_omp_simd (struct omp_region *regi gcc_assert (n2v == NULL_TREE); n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v)); } - t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), - fd->loops[i + 1].n2); - tree t2 = fold_convert (TREE_TYPE (t), + tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), fd->loops[i + 1 - fd->loops[i + 1].outer].v); - tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2); - t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + if (POINTER_TYPE_P (TREE_TYPE (t2))) + t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2); + else + { + t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].n2); + tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2); + t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + } expand_omp_build_assign (&gsi, n2v, t); } if (i + 2 == fd->collapse && n2var) @@ -6912,17 +6904,25 @@ expand_omp_simd (struct omp_region *regi tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v); if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2) { + tree itype = TREE_TYPE (fd->loops[i].v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR ? -1 : 1)); t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, fd->loops[i + 1].step), t); - if (fd->loops[i + 1].m2) - t = fold_build2 (PLUS_EXPR, itype, t, n2v); - else + if (fd->loops[i + 1].m2 == NULL_TREE) t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, fd->loops[i + 1].n2)); + else if (POINTER_TYPE_P (TREE_TYPE (n2v))) + { + t = fold_build_pointer_plus (n2v, t); + t = fold_convert (itype, t); + } + else + t = fold_build2 (PLUS_EXPR, itype, t, n2v); t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loops[i + 1].v)); tree step = fold_convert (itype, fd->loops[i + 1].step); --- libgomp/testsuite/libgomp.c-c++-common/pr106449.c.jj 2022-07-28 11:35:50.276558893 +0200 +++ libgomp/testsuite/libgomp.c-c++-common/pr106449.c 2022-07-28 12:03:27.495268905 +0200 @@ -0,0 +1,62 @@ +/* PR middle-end/106449 */ +/* { dg-do run } */ + +void +foo (void) +{ + int a[1024], *b[65536]; + int *p, *q, **r = &b[0], i; + #pragma omp simd collapse(2) linear(r : 2) + for (p = &a[0]; p < &a[512]; p++) + for (q = p + 64; q < p + 128; q++) + { + *r++ = p; + *r++ = q; + } + for (i = 0; i < 32768; i++) + if (b[2 * i] != &a[i / 64] || b[2 * i + 1] != &a[(i / 64) + 64 + (i % 64)]) + __builtin_abort (); +} + +void +bar (int n, int m) +{ + int a[1024], *b[65536]; + int *p, *q, **r = &b[0], i; + #pragma omp parallel for simd collapse(2) linear(r : 2) + for (p = &a[0]; p < &a[512]; p++) + for (q = p + n; q < p + m; q++) + { + *r++ = p; + *r++ = q; + } + for (i = 0; i < 32768; i++) + if (b[2 * i] != &a[i / 64] || b[2 * i + 1] != &a[(i / 64) + 64 + (i % 64)]) + __builtin_abort (); +} + +void +baz (int n, int m) +{ + int a[1024], *b[8192]; + int *p, *q, **r = &b[0], i; + #pragma omp parallel for simd collapse(2) linear(r : 2) + for (p = &a[0]; p < &a[512]; p += 4) + for (q = p + n; q < p + m; q += 2) + { + *r++ = p; + *r++ = q; + } + for (i = 0; i < 4096; i++) + if (b[2 * i] != &a[(i / 32) * 4] || b[2 * i + 1] != &a[(i / 32) * 4 + 64 + (i % 32) * 2]) + __builtin_abort (); +} + +int +main () +{ + foo (); + bar (64, 128); + baz (64, 128); + return 0; +} Jakub