I tested this case with -fno-tree-loop-im and -fno-tree-pre, and it seems that GCC could hoist j+1 outside of the i loop:
t3.c:5:5: note: hoisting out of the vectorized loop: _10 = (sizetype) j_25; t3.c:5:5: note: hoisting out of the vectorized loop: _11 = _10 + 1; t3.c:5:5: note: hoisting out of the vectorized loop: _12 = _11 * 4; t3.c:5:5: note: hoisting out of the vectorized loop: _14 = b_13(D) + _12; t3.c:5:5: note: hoisting out of the vectorized loop: _15 = *_14; t3.c:5:5: note: hoisting out of the vectorized loop: _16 = _15 + 1; But your suggestion is still nice as it can remove a branch and make the code more brief. I have updated the patch and also included the nested loop example into the test case. Thank you! Cong diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8a38316..2637309 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2013-10-15 Cong Hou <co...@google.com> + + * tree-vect-loop-manip.c (vect_loop_versioning): Hoist loop invariant + statement that contains data refs with zero-step. + 2013-10-14 David Malcolm <dmalc...@redhat.com> * dumpfile.h (gcc::dump_manager): New class, to hold state diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 075d071..9d0f4a5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-10-15 Cong Hou <co...@google.com> + + * gcc.dg/vect/pr58508.c: New test. + 2013-10-14 Tobias Burnus <bur...@net-b.de> PR fortran/58658 diff --git a/gcc/testsuite/gcc.dg/vect/pr58508.c b/gcc/testsuite/gcc.dg/vect/pr58508.c new file mode 100644 index 0000000..6484a65 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr58508.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ + + +/* The GCC vectorizer generates loop versioning for the following loop + since there may exist aliasing between A and B. The predicate checks + if A may alias with B across all iterations. Then for the loop in + the true body, we can assert that *B is a loop invariant so that + we can hoist the load of *B before the loop body. */ + +void test1 (int* a, int* b) +{ + int i; + for (i = 0; i < 100000; ++i) + a[i] = *b + 1; +} + +/* A test case with nested loops. The load of b[j+1] in the inner + loop should be hoisted. */ + +void test2 (int* a, int* b) +{ + int i, j; + for (j = 0; j < 100000; ++j) + for (i = 0; i < 100000; ++i) + a[i] = b[j+1] + 1; +} + +/* A test case with ifcvt transformation. */ + +void test3 (int* a, int* b) +{ + int i, t; + for (i = 0; i < 10000; ++i) + { + if (*b > 0) + t = *b * 2; + else + t = *b / 2; + a[i] = t; + } +} + +/* A test case in which the store in the loop can be moved outside + in the versioned loop with alias checks. Note this loop won't + be vectorized. */ + +void test4 (int* a, int* b) +{ + int i; + for (i = 0; i < 100000; ++i) + *a += b[i]; +} + +/* A test case in which the load and store in the loop to b + can be moved outside in the versioned loop with alias checks. + Note this loop won't be vectorized. */ + +void test5 (int* a, int* b) +{ + int i; + for (i = 0; i < 100000; ++i) + { + *b += a[i]; + a[i] = *b; + } +} + +/* { dg-final { scan-tree-dump-times "hoist" 8 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 574446a..1cc563c 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2477,6 +2477,73 @@ vect_loop_versioning (loop_vec_info loop_vinfo, adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); } + + /* Extract load statements on memrefs with zero-stride accesses. */ + + if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) + { + /* In the loop body, we iterate each statement to check if it is a load. + Then we check the DR_STEP of the data reference. If DR_STEP is zero, + then we will hoist the load statement to the loop preheader. */ + + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop->num_nodes; + + for (int i = 0; i < nbbs; ++i) + { + for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]); + !gsi_end_p (si);) + { + gimple stmt = gsi_stmt (si); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + + if (is_gimple_assign (stmt) + && (!dr + || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr))))) + { + bool hoist = true; + ssa_op_iter iter; + tree var; + + /* We hoist a statement if all SSA uses in it are defined + outside of the loop. */ + FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) + { + gimple def = SSA_NAME_DEF_STMT (var); + if (!gimple_nop_p (def) + && flow_bb_inside_loop_p (loop, gimple_bb (def))) + { + hoist = false; + break; + } + } + + if (hoist) + { + if (dr) + gimple_set_vuse (stmt, NULL); + + gsi_remove (&si, false); + gsi_insert_on_edge_immediate (loop_preheader_edge (loop), + stmt); + + if (dump_enabled_p ()) + { + dump_printf_loc + (MSG_NOTE, vect_location, + "hoisting out of the vectorized loop: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); + dump_printf (MSG_NOTE, "\n"); + } + continue; + } + } + gsi_next (&si); + } + } + } + /* End loop-exit-fixes after versioning. */ if (cond_expr_stmt_list) ^^^^^^^^^^^^^^^END OF PATCH^^^^^^^^^^^^^^^^^^^ On Thu, Oct 17, 2013 at 2:17 AM, Richard Biener <rguent...@suse.de> wrote: > On Wed, 16 Oct 2013, Cong Hou wrote: > >> On Wed, Oct 16, 2013 at 2:02 AM, Richard Biener <rguent...@suse.de> wrote: >> > On Tue, 15 Oct 2013, Cong Hou wrote: >> > >> >> Thank you for your reminder, Jeff! I just noticed Richard's comment. I >> >> have modified the patch according to that. >> >> >> >> The new patch is attached. >> > >> > (posting patches inline is easier for review, now you have to deal >> > with no quoting markers ;)) >> > >> > Comments inline. >> > >> > diff --git a/gcc/ChangeLog b/gcc/ChangeLog >> > index 8a38316..2637309 100644 >> > --- a/gcc/ChangeLog >> > +++ b/gcc/ChangeLog >> > @@ -1,3 +1,8 @@ >> > +2013-10-15 Cong Hou <co...@google.com> >> > + >> > + * tree-vect-loop-manip.c (vect_loop_versioning): Hoist loop >> > invariant >> > + statement that contains data refs with zero-step. >> > + >> > 2013-10-14 David Malcolm <dmalc...@redhat.com> >> > >> > * dumpfile.h (gcc::dump_manager): New class, to hold state >> > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog >> > index 075d071..9d0f4a5 100644 >> > --- a/gcc/testsuite/ChangeLog >> > +++ b/gcc/testsuite/ChangeLog >> > @@ -1,3 +1,7 @@ >> > +2013-10-15 Cong Hou <co...@google.com> >> > + >> > + * gcc.dg/vect/pr58508.c: New test. >> > + >> > 2013-10-14 Tobias Burnus <bur...@net-b.de> >> > >> > PR fortran/58658 >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr58508.c >> > b/gcc/testsuite/gcc.dg/vect/pr58508.c >> > new file mode 100644 >> > index 0000000..cb22b50 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/vect/pr58508.c >> > @@ -0,0 +1,20 @@ >> > +/* { dg-do compile } */ >> > +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ >> > + >> > + >> > +/* The GCC vectorizer generates loop versioning for the following loop >> > + since there may exist aliasing between A and B. The predicate checks >> > + if A may alias with B across all iterations. Then for the loop in >> > + the true body, we can assert that *B is a loop invariant so that >> > + we can hoist the load of *B before the loop body. */ >> > + >> > +void foo (int* a, int* b) >> > +{ >> > + int i; >> > + for (i = 0; i < 100000; ++i) >> > + a[i] = *b + 1; >> > +} >> > + >> > + >> > +/* { dg-final { scan-tree-dump-times "hoist" 2 "vect" } } */ >> > +/* { dg-final { cleanup-tree-dump "vect" } } */ >> > diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c >> > index 574446a..f4fdec2 100644 >> > --- a/gcc/tree-vect-loop-manip.c >> > +++ b/gcc/tree-vect-loop-manip.c >> > @@ -2477,6 +2477,92 @@ vect_loop_versioning (loop_vec_info loop_vinfo, >> > adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); >> > } >> > >> > >> > Note that applying this kind of transform at this point invalidates >> > some of the earlier analysis the vectorizer performed (namely the >> > def-kind which now effectively gets vect_external_def from >> > vect_internal_def). In this case it doesn't seem to cause any >> > issues (we re-compute the def-kind everytime we need it (how wasteful)). >> > >> > + /* Extract load and store statements on pointers with zero-stride >> > + accesses. */ >> > + if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) >> > + { >> > + /* In the loop body, we iterate each statement to check if it is a >> > load >> > + or store. Then we check the DR_STEP of the data reference. If >> > + DR_STEP is zero, then we will hoist the load statement to the loop >> > + preheader, and move the store statement to the loop exit. */ >> > >> > We don't move the store yet. Micha has a patch pending that enables >> > vectorization of zero-step stores. >> > >> > + for (gimple_stmt_iterator si = gsi_start_bb (loop->header); >> > + !gsi_end_p (si);) >> > >> > While technically ok now (vectorized loops contain a single basic block) >> > please use LOOP_VINFO_BBS () to get at the vector of basic-blcoks >> > and iterate over them like other code does. >> >> >> Have done it. >> >> >> > >> > + { >> > + gimple stmt = gsi_stmt (si); >> > + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); >> > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); >> > + >> > + if (dr && integer_zerop (DR_STEP (dr))) >> > + { >> > + if (DR_IS_READ (dr)) >> > + { >> > + if (dump_enabled_p ()) >> > + { >> > + dump_printf_loc >> > + (MSG_NOTE, vect_location, >> > + "hoist the statement to outside of the loop "); >> > >> > "hoisting out of the vectorized loop: " >> > >> > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); >> > + dump_printf (MSG_NOTE, "\n"); >> > + } >> > + >> > + gsi_remove (&si, false); >> > + gsi_insert_on_edge_immediate (loop_preheader_edge >> > (loop), stmt); >> > >> > Note that this will result in a bogus VUSE on the stmt at this point which >> > will be only fixed because of implementation details of loop versioning. >> > Either get the correct VUSE from the loop header virtual PHI node >> > preheader edge (if there is none then the current VUSE is the correct one >> > to use) or clear it. >> >> >> I just cleared the VUSE since I noticed that after the vectorization >> pass the correct VUSE is reassigned to the load. >> >> >> > >> > + } >> > + /* TODO: We also consider vectorizing loops containing >> > zero-step >> > + data refs as writes. For example: >> > + >> > + int a[N], *s; >> > + for (i = 0; i < N; i++) >> > + *s += a[i]; >> > + >> > + In this case the write to *s can be also moved after the >> > + loop. */ >> > >> > Note that you then invalidate even more vectorizer analysis - you >> > basically introduce a scalar reduction (you have to add a PHI node). >> > Which means that the transform has to happen elsewhere. >> > >> > As Jakub now tries with if-conversion this would also be a candidate >> > for applying the loop versioning before even starting the rest of the >> > vectorizer analysis code. >> > >> > That said, I'd remove the TODO at this point because it's clearly not >> > possible to implement just here ;) >> >> >> Yes. This comment is removed. >> >> >> > >> > + continue; >> > + } >> > + else if (!dr) >> > + { >> > + bool hoist = true; >> > + for (size_t i = 0; i < gimple_num_ops (stmt); i++) >> > >> > You are checking all kinds of statements, including assignments >> > of which you are also checking the LHS ... restricting to >> > assignments you can start walking at i = 1. >> >> >> Done. >> >> >> > >> > + { >> > + tree op = gimple_op (stmt, i); >> > + if (TREE_CODE (op) == INTEGER_CST >> > + || TREE_CODE (op) == REAL_CST) >> > >> > There are other constants as well - just check >> > >> > if (is_gimple_min_invariant (op)) >> > >> > + continue; >> > + if (TREE_CODE (op) == SSA_NAME) >> > + { >> > + gimple def = SSA_NAME_DEF_STMT (op); >> > + if (def == stmt >> > >> > with starting at op 1 you can drop this. >> > >> > + || gimple_nop_p (def) >> > + || !flow_bb_inside_loop_p (loop, gimple_bb (def))) >> > + continue; >> > + } >> > >> > Note that you fail to hoist if-converted code this way because >> > op1 of >> > >> > name_1 = a_2 < b_4 ? x_5 : y_6; >> > >> > is 'a_2 < b_4', a tree expression and not an SSA name (ugh). Maybe >> > we don't care (it's just a missed optimization), but if you are >> > restricting yourself to hoisting assignments without a data-ref >> > then you can walk over SSA uses on the stmt (instead of over gimple >> > ops) with >> > >> > FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_USE) >> > >> > which would automagically take care of that case. >> >> >> I used FOR_EACH_SSA_TREE_OPERAND() here, and then I don't have to deal >> with different constant types. >> >> >> > >> > Can you add a testcase which involves invariant if-conversion? >> > Can you add a testcase with just an invariant store to make sure >> > we don't wreck it? Can you add a testcase with an invariant store >> > and load (the reduction case), too? >> >> >> The if-conversion case is added. But the current vectorizer will not >> vectorize loops with stores to zero-stride memory access >> (vect_analyze_loop() fails in this case). Are you sure to add the >> testcase with this? (I still added two tests for those two cases). >> >> >> > >> > + hoist = false; >> > + break; >> > + } >> > >> > For example you'll hoist all labels this way (no ops), as well as the >> > loop exit GIMPLE_COND in case it's operands were loop invariant, >> > breaking the CFG ... so please add && is_gimple_assign () to the >> > if (!dr) check ;) >> >> >> Done. >> >> >> I appreciate your detailed comments! The new patch is pasted below >> (since tabs cannot show here I also attached a text file with the >> patch including tabs). > > (supposedly messed up on mine or your end somehow) > > It just occured to me that you have to verify that you can hoist > the load, as for example with > > void test1 (int* a, int* b) > { > int i, j; > for (j = 0; j < 100; ++j) > for (i = 0; i < 100000; ++i) > a[i] = b[j+1] + 1; > } > > DR_STEP will be zero, but if you build with -fno-tree-loop-im > -fno-tree-pre the stmt computing j+1 will still be in the i loop > and thus you would need to move that as well. > > A way out is to double-check all SSA uses on the load as well, > like you do on other assignments. That is, combine both > if arms under a single > > if (is_gimple_assign (stmt) > && (!dr > || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))) > > Thanks, > Richard. > > >> >> thanks, >> Cong >> >> >> >> >> diff --git a/gcc/ChangeLog b/gcc/ChangeLog >> index 8a38316..2637309 100644 >> --- a/gcc/ChangeLog >> +++ b/gcc/ChangeLog >> @@ -1,3 +1,8 @@ >> +2013-10-15 Cong Hou <co...@google.com> >> + >> + * tree-vect-loop-manip.c (vect_loop_versioning): Hoist loop invariant >> + statement that contains data refs with zero-step. >> + >> 2013-10-14 David Malcolm <dmalc...@redhat.com> >> >> * dumpfile.h (gcc::dump_manager): New class, to hold state >> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog >> index 075d071..9d0f4a5 100644 >> --- a/gcc/testsuite/ChangeLog >> +++ b/gcc/testsuite/ChangeLog >> @@ -1,3 +1,7 @@ >> +2013-10-15 Cong Hou <co...@google.com> >> + >> + * gcc.dg/vect/pr58508.c: New test. >> + >> 2013-10-14 Tobias Burnus <bur...@net-b.de> >> >> PR fortran/58658 >> diff --git a/gcc/testsuite/gcc.dg/vect/pr58508.c >> b/gcc/testsuite/gcc.dg/vect/pr58508.c >> new file mode 100644 >> index 0000000..a3f6a06 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr58508.c >> @@ -0,0 +1,60 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ >> + >> + >> +/* The GCC vectorizer generates loop versioning for the following loop >> + since there may exist aliasing between A and B. The predicate checks >> + if A may alias with B across all iterations. Then for the loop in >> + the true body, we can assert that *B is a loop invariant so that >> + we can hoist the load of *B before the loop body. */ >> + >> +void test1 (int* a, int* b) >> +{ >> + int i; >> + for (i = 0; i < 100000; ++i) >> + a[i] = *b + 1; >> +} >> + >> + >> +/* A test case with ifcvt transformation. */ >> + >> +void test2 (int* a, int* b) >> +{ >> + int i, t; >> + for (i = 0; i < 10000; ++i) >> + { >> + if (*b > 0) >> + t = *b * 2; >> + else >> + t = *b / 2; >> + a[i] = t; >> + } >> +} >> + >> +/* A test case in which the store in the loop can be moved outside >> + in the versioned loop with alias checks. Note this loop won't >> + be vectorized. */ >> + >> +void test3 (int* a, int* b) >> +{ >> + int i; >> + for (i = 0; i < 100000; ++i) >> + *a += b[i]; >> +} >> + >> +/* A test case in which the load and store in the loop to b >> + can be moved outside in the versioned loop with alias checks. >> + Note this loop won't be vectorized. */ >> + >> +void test4 (int* a, int* b) >> +{ >> + int i; >> + for (i = 0; i < 100000; ++i) >> + { >> + *b += a[i]; >> + a[i] = *b; >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump-times "hoist" 6 "vect" } } */ >> +/* { dg-final { cleanup-tree-dump "vect" } } */ >> diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c >> index 574446a..ff0403b 100644 >> --- a/gcc/tree-vect-loop-manip.c >> +++ b/gcc/tree-vect-loop-manip.c >> @@ -2477,6 +2477,88 @@ vect_loop_versioning (loop_vec_info loop_vinfo, >> adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); >> } >> >> + >> + /* Extract load statements on memrefs with zero-stride accesses. */ >> + >> + if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) >> + { >> + /* In the loop body, we iterate each statement to check if it is a >> load. >> + Then we check the DR_STEP of the data reference. If DR_STEP is zero, >> + then we will hoist the load statement to the loop preheader. */ >> + >> + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); >> + int nbbs = loop->num_nodes; >> + >> + for (int i = 0; i < nbbs; ++i) >> + { >> + for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]); >> + !gsi_end_p (si);) >> + { >> + gimple stmt = gsi_stmt (si); >> + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); >> + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); >> + >> + if (dr && integer_zerop (DR_STEP (dr))) >> + { >> + if (DR_IS_READ (dr)) >> + { >> + if (dump_enabled_p ()) >> + { >> + dump_printf_loc >> + (MSG_NOTE, vect_location, >> + "hoisting out of the vectorized loop: "); >> + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); >> + dump_printf (MSG_NOTE, "\n"); >> + } >> + >> + gimple_set_vuse (stmt, NULL); >> + gsi_remove (&si, false); >> + gsi_insert_on_edge_immediate (loop_preheader_edge (loop), >> + stmt); >> + } >> + continue; >> + } >> + else if (!dr && is_gimple_assign (stmt)) >> + { >> + bool hoist = true; >> + ssa_op_iter iter; >> + tree var; >> + >> + /* We hoist a statement if all SSA uses in it are defined >> + outside of the loop. */ >> + FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) >> + { >> + gimple def = SSA_NAME_DEF_STMT (var); >> + if (!gimple_nop_p (def) >> + && flow_bb_inside_loop_p (loop, gimple_bb (def))) >> + { >> + hoist = false; >> + break; >> + } >> + } >> + >> + if (hoist) >> + { >> + gsi_remove (&si, false); >> + gsi_insert_on_edge_immediate (loop_preheader_edge (loop), >> + stmt); >> + >> + if (dump_enabled_p ()) >> + { >> + dump_printf_loc >> + (MSG_NOTE, vect_location, >> + "hoisting out of the vectorized loop: "); >> + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); >> + dump_printf (MSG_NOTE, "\n"); >> + } >> + continue; >> + } >> + } >> + gsi_next (&si); >> + } >> + } >> + } >> + >> /* End loop-exit-fixes after versioning. */ >> >> if (cond_expr_stmt_list) >> >> >> >> >> >> > >> > + if (hoist) >> > + { >> > + gsi_remove (&si, false); >> > + gsi_insert_on_edge_immediate (loop_preheader_edge (loop), >> > stmt); >> > + >> > + if (dump_enabled_p ()) >> > + { >> > + dump_printf_loc >> > + (MSG_NOTE, vect_location, >> > + "hoist the statement to outside of the loop "); >> > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); >> > + dump_printf (MSG_NOTE, "\n"); >> > + } >> > + continue; >> > + } >> > + } >> > + gsi_next (&si); >> > + } >> > + } >> > + >> > /* End loop-exit-fixes after versioning. */ >> > >> > if (cond_expr_stmt_list) >> > >> >> >> >> thanks, >> >> Cong >> >> >> >> >> >> On Tue, Oct 15, 2013 at 12:33 PM, Jeff Law <l...@redhat.com> wrote: >> >> > On 10/14/13 17:31, Cong Hou wrote: >> >> >> >> >> >> Any comment on this patch? >> >> > >> >> > Richi replied in the BZ you opened. >> >> > >> >> > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58508 >> >> > >> >> > Essentially he said emit the load on the edge rather than in the block >> >> > itself. >> >> > jeff >> >> > >> > > -- > Richard Biener <rguent...@suse.de> > SUSE / SUSE Labs > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > GF: Jeff Hawn, Jennifer Guild, Felix Imend
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8a38316..2637309 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2013-10-15 Cong Hou <co...@google.com> + + * tree-vect-loop-manip.c (vect_loop_versioning): Hoist loop invariant + statement that contains data refs with zero-step. + 2013-10-14 David Malcolm <dmalc...@redhat.com> * dumpfile.h (gcc::dump_manager): New class, to hold state diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 075d071..9d0f4a5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-10-15 Cong Hou <co...@google.com> + + * gcc.dg/vect/pr58508.c: New test. + 2013-10-14 Tobias Burnus <bur...@net-b.de> PR fortran/58658 diff --git a/gcc/testsuite/gcc.dg/vect/pr58508.c b/gcc/testsuite/gcc.dg/vect/pr58508.c new file mode 100644 index 0000000..6484a65 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr58508.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ + + +/* The GCC vectorizer generates loop versioning for the following loop + since there may exist aliasing between A and B. The predicate checks + if A may alias with B across all iterations. Then for the loop in + the true body, we can assert that *B is a loop invariant so that + we can hoist the load of *B before the loop body. */ + +void test1 (int* a, int* b) +{ + int i; + for (i = 0; i < 100000; ++i) + a[i] = *b + 1; +} + +/* A test case with nested loops. The load of b[j+1] in the inner + loop should be hoisted. */ + +void test2 (int* a, int* b) +{ + int i, j; + for (j = 0; j < 100000; ++j) + for (i = 0; i < 100000; ++i) + a[i] = b[j+1] + 1; +} + +/* A test case with ifcvt transformation. */ + +void test3 (int* a, int* b) +{ + int i, t; + for (i = 0; i < 10000; ++i) + { + if (*b > 0) + t = *b * 2; + else + t = *b / 2; + a[i] = t; + } +} + +/* A test case in which the store in the loop can be moved outside + in the versioned loop with alias checks. Note this loop won't + be vectorized. */ + +void test4 (int* a, int* b) +{ + int i; + for (i = 0; i < 100000; ++i) + *a += b[i]; +} + +/* A test case in which the load and store in the loop to b + can be moved outside in the versioned loop with alias checks. + Note this loop won't be vectorized. */ + +void test5 (int* a, int* b) +{ + int i; + for (i = 0; i < 100000; ++i) + { + *b += a[i]; + a[i] = *b; + } +} + +/* { dg-final { scan-tree-dump-times "hoist" 8 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 574446a..1cc563c 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2477,6 +2477,73 @@ vect_loop_versioning (loop_vec_info loop_vinfo, adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); } + + /* Extract load statements on memrefs with zero-stride accesses. */ + + if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) + { + /* In the loop body, we iterate each statement to check if it is a load. + Then we check the DR_STEP of the data reference. If DR_STEP is zero, + then we will hoist the load statement to the loop preheader. */ + + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop->num_nodes; + + for (int i = 0; i < nbbs; ++i) + { + for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]); + !gsi_end_p (si);) + { + gimple stmt = gsi_stmt (si); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + + if (is_gimple_assign (stmt) + && (!dr + || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr))))) + { + bool hoist = true; + ssa_op_iter iter; + tree var; + + /* We hoist a statement if all SSA uses in it are defined + outside of the loop. */ + FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) + { + gimple def = SSA_NAME_DEF_STMT (var); + if (!gimple_nop_p (def) + && flow_bb_inside_loop_p (loop, gimple_bb (def))) + { + hoist = false; + break; + } + } + + if (hoist) + { + if (dr) + gimple_set_vuse (stmt, NULL); + + gsi_remove (&si, false); + gsi_insert_on_edge_immediate (loop_preheader_edge (loop), + stmt); + + if (dump_enabled_p ()) + { + dump_printf_loc + (MSG_NOTE, vect_location, + "hoisting out of the vectorized loop: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); + dump_printf (MSG_NOTE, "\n"); + } + continue; + } + } + gsi_next (&si); + } + } + } + /* End loop-exit-fixes after versioning. */ if (cond_expr_stmt_list)