[Bug tree-optimization/98176] Loop invariant memory could not be hoisted when nonpure_call in loop body

wwwhhhyyy333 at gmail dot com via Gcc-bugs Tue, 15 Dec 2020 01:14:01 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98176


--- Comment #7 from Hongyu Wang <wwwhhhyyy333 at gmail dot com> ---
(In reply to Richard Biener from comment #5)
> Yes.
> 
> For a LIM testcase an example with a memcpy might be more practically
> relevant.
> 
> For refactoring I'd start with classifying the unanalyzable refs as
> separate ref ID, marking it with another bit like ref_unanalyzed in
> in_mem_ref and asserting there's a single access of such refs.
> The mem_refs_may_alias_p code then needs to use stmt-based alias
> queries instead of refs_may_alias_p_1 using accesses_in_loop[0]->stmt.
> 
> And code testing for UNANALYZABLE_MEM_ID now needs to look at the
> ref_unanalyzed flag to not consider those refs for transforms.
> 
> Note this may blow up the memory requirements for testcases with lots
> of "unanalyzable" refs.
> 
> The nonpure-call code is more difficult to improve, even sincos can not
> return
> when the access to s or c traps.  Analyzing the arguments might help here.
> If you disregard that detail I think all ECF_LEAF|ECF_NOTHROW functions
> return normally.

Thanks for the suggestion, I did some refactor accordingly and this case could
be vectorized. 

diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 92e5a8dd774..3e3e81bc36f 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -119,6 +119,8 @@ public:
                                   (its index in memory_accesses.refs_list)  */
   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
+  unsigned ref_unanalyzed : 1; /* Whether the ref was unanalyzed memory.  */
+
   hashval_t hash;              /* Its hash value.  */

   /* The memory access itself and associated caching of alias-oracle
@@ -260,7 +262,14 @@ static bool refs_independent_p (im_mem_ref *, im_mem_ref
*, bool = true);
 #define UNANALYZABLE_MEM_ID 0

 /* Whether the reference was analyzable.  */
-#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
+#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID          \
+                            && !(REF)->ref_unanalyzed)
+
+#define REF_ID_UNANALYZABLE(id)                                               
\
+  (id == UNANALYZABLE_MEM_ID                                           \
+   || ((memory_accesses.refs_list[id])                                 \
+       && (memory_accesses.refs_list[id]->ref_unanalyzed)) \
+   )

 static struct lim_aux_data *
 init_lim_data (gimple *stmt)
@@ -829,7 +838,8 @@ set_profitable_level (gimple *stmt)
   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data
(stmt)->max_loop);
 }

-/* Returns true if STMT is a call that has side effects.  */
+/* Returns true if STMT is a call that has side effects, or it is
+   not a function call with ECF_LEAF | ECF_NOTHROW.  */

 static bool
 nonpure_call_p (gimple *stmt)
@@ -837,6 +847,11 @@ nonpure_call_p (gimple *stmt)
   if (gimple_code (stmt) != GIMPLE_CALL)
     return false;

+  /* Simplified here, better to analyze call parameter.  */
+  int flags = gimple_call_flags (stmt);
+  if (flags & (ECF_LEAF | ECF_NOTHROW))
+    return false;
+
   return gimple_has_side_effects (stmt);
 }

@@ -1377,6 +1392,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
   ref->id = id;
   ref->ref_canonical = false;
   ref->ref_decomposed = false;
+  ref->ref_unanalyzed = false;
   ref->hash = hash;
   ref->stored = NULL;
   ref->loaded = NULL;
@@ -1461,9 +1477,13 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
   if (!mem)
     {
-      /* We use the shared mem_ref for all unanalyzable refs.  */
-      id = UNANALYZABLE_MEM_ID;
-      ref = memory_accesses.refs_list[id];
+      /* Mark unanaylzable refs with different id and skip analysis. */
+      id = memory_accesses.refs_list.length ();
+      ref = mem_ref_alloc (NULL, 0, id);
+      ref->ref_unanalyzed = true;
+      memory_accesses.refs_list.safe_push (ref);
+      record_mem_ref_loc (ref, stmt, NULL);
+
       if (dump_file && (dump_flags & TDF_DETAILS))
        {
          fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
@@ -1576,7 +1596,7 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
       mark_ref_stored (ref, loop);
     }
   /* A not simple memory op is also a read when it is a write.  */
-  if (!is_stored || id == UNANALYZABLE_MEM_ID)
+  if (!is_stored || REF_ID_UNANALYZABLE (id))
     {
       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num],
ref->id);
       mark_ref_loaded (ref, loop);
@@ -1701,6 +1721,31 @@ mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref
*mem2,
   poly_widest_int size1, size2;
   aff_tree off1, off2;

+  /* For refs marked as unanalyzed, use stmt_based alias analysis
+     and returns false when one mem_ref used by this unanalyzed stmt*/
+  if (mem1->ref_unanalyzed
+      || mem2->ref_unanalyzed)
+    {
+      if (mem1->ref_unanalyzed
+         && !mem2->ref_unanalyzed)
+       {
+         gcc_assert (mem1->accesses_in_loop.length() == 1);
+         gimple *stmt = mem1->accesses_in_loop[0].stmt;
+         if (ref_maybe_used_by_stmt_p (stmt, &mem2->mem, tbaa_p))
+           return true;
+       }
+      else if(!mem1->ref_unanalyzed)
+       {
+         gcc_assert (mem2->accesses_in_loop.length() == 1);
+         gimple *stmt = mem2->accesses_in_loop[0].stmt;
+         if (ref_maybe_used_by_stmt_p (stmt, &mem1->mem, tbaa_p))
+           return true;
+       }
+      else
+       return true;
+      return false;
+    }
+
   /* Perform basic offset and type-based disambiguation.  */
   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
     return false;
@@ -2423,7 +2468,7 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree
vdef,
        }
       lim_aux_data *data = get_lim_data (def);
       gcc_assert (data);
-      if (data->ref == UNANALYZABLE_MEM_ID)
+      if (REF_ID_UNANALYZABLE (data->ref) )
        return -1;
       /* One of the stores we want to apply SM to and we've not yet seen.  */
       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
-- 

Does it looks correct? 

And this includes too many unanalyzed ids when I tried to compile a big
program... Is there any possibility to avoid the memory usage?

[Bug tree-optimization/98176] Loop invariant memory could not be hoisted when nonpure_call in loop body

Reply via email to