> > +      bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
> > +      bitmap_set_bit (exit_bbs, loop->latch->index);
> 
> treating the latch as exit is probably premature optimization (yes, it's 
> empty).
> 
> > +
> > +      do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
> > +
> > +      BITMAP_FREE (exit_bbs);
> 
> ... deallocation can go.  Note I wonder whether, if we are already spinning up
> VN, we should include the preheader in the operation?
> We regularly end up emitting redundant vector initializers that could be
> cleaned up earlier this way.

I've change it to include the preheader but it looks like this breaks bootstrap 
on both
x86 and AArch64.

On x86 the following testcase

double matmul_c8_vanilla_bbase_0;
double *matmul_c8_vanilla_dest;
matmul_c8_vanilla_x;
matmul_c8_vanilla() {
  for (; matmul_c8_vanilla_x; matmul_c8_vanilla_x++)
    matmul_c8_vanilla_dest[matmul_c8_vanilla_x] += matmul_c8_vanilla_bbase_0;
}

ICEs with -std=gnu11 -ffast-math -ftree-vectorize -O2 with:

internal compiler error: tree check: expected ssa_name, have var_decl in 
SSA_VAL, at tree-ssa-sccvn.c:535
0x80731c tree_check_failed(tree_node const*, char const*, int, char const*, ...)
        ../gcc-dsg/gcc/tree.c:8689
0x7ebda2 tree_check(tree_node*, char const*, int, char const*, tree_code)
        ../gcc-dsg/gcc/tree.h:3433
0x7ebda2 SSA_VAL(tree_node*, bool*)
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:535
0x7ebda2 vuse_ssa_val
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:553
0x7ebda2 vn_reference_lookup(tree_node*, tree_node*, vn_lookup_kind, 
vn_reference_s**, bool, tree_node**, tree_node*)
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:3664
0x10d8ca5 visit_reference_op_load
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:5166
0x10d8ca5 visit_stmt
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:5615
0x10d976c process_bb
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:7344
0x10dafe5 do_rpo_vn
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:7942
0x10dc828 do_rpo_vn(function*, edge_def*, bitmap_head*)
        ../gcc-dsg/gcc/tree-ssa-sccvn.c:8039
0x119c39c vectorize_loops()
        ../gcc-dsg/gcc/tree-vectorizer.c:1304

on AArch64 this one ICEs with -ffast-math -ftree-vectorize -O2

_Complex *a;
_Complex b;
c, d;
fn1() {
  _Complex e;
  for (; c; ++c)
    e = d * a[c];
  b = e;
}

With the message 

internal compiler error: tree check: expected ssa_name, have var_decl in 
VN_INFO, at tree-ssa-sccvn.c:451
0x734073 tree_check_failed(tree_node const*, char const*, int, char const*, ...)
        ../../gcc-fsf/gcc/tree.c:8691
0x10e2e2f tree_check(tree_node*, char const*, int, char const*, tree_code)
        ../../gcc-fsf/gcc/tree.h:3433
0x10e2e2f VN_INFO(tree_node*)
        ../../gcc-fsf/gcc/tree-ssa-sccvn.c:451
0x10ed223 process_bb
        ../../gcc-fsf/gcc/tree-ssa-sccvn.c:7331
0x10eea43 do_rpo_vn
        ../../gcc-fsf/gcc/tree-ssa-sccvn.c:7944
0x10efe2b do_rpo_vn(function*, edge_def*, bitmap_head*)
        ../../gcc-fsf/gcc/tree-ssa-sccvn.c:8039
0x11c436b vectorize_loops()
        ../../gcc-fsf/gcc/tree-vectorizer.c:1304

Any ideas?

Thanks,
Tamar

> 
> Otherwise the change looks OK.
> 

--- inline copy of patch ---

diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 
edb7538a67f00cd80a608ee82510cf437fe88083..029d59016c9652f87d80fc5500f89532c79a66d0
 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-pretty-print.h"
 #include "opt-problem.h"
 #include "internal-fn.h"
-
+#include "tree-ssa-sccvn.h"
 
 /* Loop or bb location, with hotness information.  */
 dump_user_location_t vect_location;
@@ -1298,6 +1298,17 @@ vectorize_loops (void)
       if (has_mask_store
          && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
        optimize_mask_stores (loop);
+
+      auto_bitmap exit_bbs;
+      /* Perform local CSE, this esp. helps because we emit code for
+        predicates that need to be shared for optimal predicate usage.
+        However reassoc will re-order them and prevent CSE from working
+        as it should.  CSE only the loop body, not the entry.  */
+      bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+
+      edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
+      do_rpo_vn (cfun, entry, exit_bbs);
+
       loop->aux = NULL;
     }

Attachment: rb15007 (2).patch
Description: rb15007 (2).patch

Reply via email to