> > + bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index); > > + bitmap_set_bit (exit_bbs, loop->latch->index); > > treating the latch as exit is probably premature optimization (yes, it's > empty). > > > + > > + do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); > > + > > + BITMAP_FREE (exit_bbs); > > ... deallocation can go. Note I wonder whether, if we are already spinning up > VN, we should include the preheader in the operation? > We regularly end up emitting redundant vector initializers that could be > cleaned up earlier this way.
I've change it to include the preheader but it looks like this breaks bootstrap on both x86 and AArch64. On x86 the following testcase double matmul_c8_vanilla_bbase_0; double *matmul_c8_vanilla_dest; matmul_c8_vanilla_x; matmul_c8_vanilla() { for (; matmul_c8_vanilla_x; matmul_c8_vanilla_x++) matmul_c8_vanilla_dest[matmul_c8_vanilla_x] += matmul_c8_vanilla_bbase_0; } ICEs with -std=gnu11 -ffast-math -ftree-vectorize -O2 with: internal compiler error: tree check: expected ssa_name, have var_decl in SSA_VAL, at tree-ssa-sccvn.c:535 0x80731c tree_check_failed(tree_node const*, char const*, int, char const*, ...) ../gcc-dsg/gcc/tree.c:8689 0x7ebda2 tree_check(tree_node*, char const*, int, char const*, tree_code) ../gcc-dsg/gcc/tree.h:3433 0x7ebda2 SSA_VAL(tree_node*, bool*) ../gcc-dsg/gcc/tree-ssa-sccvn.c:535 0x7ebda2 vuse_ssa_val ../gcc-dsg/gcc/tree-ssa-sccvn.c:553 0x7ebda2 vn_reference_lookup(tree_node*, tree_node*, vn_lookup_kind, vn_reference_s**, bool, tree_node**, tree_node*) ../gcc-dsg/gcc/tree-ssa-sccvn.c:3664 0x10d8ca5 visit_reference_op_load ../gcc-dsg/gcc/tree-ssa-sccvn.c:5166 0x10d8ca5 visit_stmt ../gcc-dsg/gcc/tree-ssa-sccvn.c:5615 0x10d976c process_bb ../gcc-dsg/gcc/tree-ssa-sccvn.c:7344 0x10dafe5 do_rpo_vn ../gcc-dsg/gcc/tree-ssa-sccvn.c:7942 0x10dc828 do_rpo_vn(function*, edge_def*, bitmap_head*) ../gcc-dsg/gcc/tree-ssa-sccvn.c:8039 0x119c39c vectorize_loops() ../gcc-dsg/gcc/tree-vectorizer.c:1304 on AArch64 this one ICEs with -ffast-math -ftree-vectorize -O2 _Complex *a; _Complex b; c, d; fn1() { _Complex e; for (; c; ++c) e = d * a[c]; b = e; } With the message internal compiler error: tree check: expected ssa_name, have var_decl in VN_INFO, at tree-ssa-sccvn.c:451 0x734073 tree_check_failed(tree_node const*, char const*, int, char const*, ...) ../../gcc-fsf/gcc/tree.c:8691 0x10e2e2f tree_check(tree_node*, char const*, int, char const*, tree_code) ../../gcc-fsf/gcc/tree.h:3433 0x10e2e2f VN_INFO(tree_node*) ../../gcc-fsf/gcc/tree-ssa-sccvn.c:451 0x10ed223 process_bb ../../gcc-fsf/gcc/tree-ssa-sccvn.c:7331 0x10eea43 do_rpo_vn ../../gcc-fsf/gcc/tree-ssa-sccvn.c:7944 0x10efe2b do_rpo_vn(function*, edge_def*, bitmap_head*) ../../gcc-fsf/gcc/tree-ssa-sccvn.c:8039 0x11c436b vectorize_loops() ../../gcc-fsf/gcc/tree-vectorizer.c:1304 Any ideas? Thanks, Tamar > > Otherwise the change looks OK. > --- inline copy of patch --- diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index edb7538a67f00cd80a608ee82510cf437fe88083..029d59016c9652f87d80fc5500f89532c79a66d0 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -81,7 +81,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-pretty-print.h" #include "opt-problem.h" #include "internal-fn.h" - +#include "tree-ssa-sccvn.h" /* Loop or bb location, with hotness information. */ dump_user_location_t vect_location; @@ -1298,6 +1298,17 @@ vectorize_loops (void) if (has_mask_store && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE)) optimize_mask_stores (loop); + + auto_bitmap exit_bbs; + /* Perform local CSE, this esp. helps because we emit code for + predicates that need to be shared for optimal predicate usage. + However reassoc will re-order them and prevent CSE from working + as it should. CSE only the loop body, not the entry. */ + bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index); + + edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0); + do_rpo_vn (cfun, entry, exit_bbs); + loop->aux = NULL; }
rb15007 (2).patch
Description: rb15007 (2).patch