On Wed, 3 Sep 2014, Richard Biener wrote:

> 
> Ok, so with recent activity in that mgrid bug (PR55334) I tried
> to remember what solution we thought of after determining that
> ADD_RESTRICT is a no-go.
> 
> The following very prototypish patch implements the idea of
> computing known non-dependences and maintaining them over
> the compilation (mainly inlining / cloning for PR55334).
> 
> So the patch piggy-backs on PTA (bad - PTA doesn't compute
> must-aliases, so it will work only for a very limited set
> of testcases now - but at least it might work for
> non-register restrict stuff).
> 
> The representation of "non-dependences" is the most interesting
> bit of course.  We partition memory references into different
> cliques in which all references are independent.  And we
> split that clique again into sets of references based on the
> same pointer.
> 
> That allows us to disambiguate equal clique but distinct pointer
> memory references.
> 
> For restrict you'd put all(!) memory references in the BLOCK
> where the restrict pointer is live into the same clique
> and assign a distinct ptr based on which restrict pointer this
> is based on.  You make all references not based on any
> restrict pointer have ptr == 0 (not implemented in the prototype
> patch - they don't get a clique assigned).
> 
> The patch simplifies this by taking function scope as the
> only BLOCK to consider, thus inside a function the clique
> will be unique (before inlining).
> 
> I can see issues arising with assigning numbers in the frontend
> based on real BLOCKs and
>   
>   {
>     int * restrict q;
>     {
>       int * restrict p;
>       *p = *q;
>     }
>     {
>       int * restrict r;
>       *r = *q;
>     }
> 
> that is, non-dependences based on pointers coming from different
> nested scopes.  The FE in this case would need to "duplicate"
> the ptr value for 'q' to not make *p and *r falsely a
> non-dependence.  But I think we're not planning to assign
> this in the C frontend family (maybe the Fortran FE though).
> 
> To preserve correctness cliques from inlined functions need to
> be remapped to an unused clique number so struct function
> gets a max_clique number.  (remapping not implemented in
> the prototype)
> 
> Any correctness / missed-optimization holes to punch?  That is,
> given a set of non-dependent reference pairs, can you assign
> that clique, ptr values in a correct and optimal way?
> (it somehow assumes transitivity, if the set is *a, *b; *b, *c;
> then *a, *c are also non-dependent)
> 
> It all depends on a conservative but agressive must-be-based-on
> analysis of course.  You'd meet that with the conservative
> may-be-based-on analysis from PTA and compute the proper
> non-dependences from that.
> 
> Comments welcome.
> 
> Patch tested on
> 
> static inline void foo (int * __restrict__ p, int * __restrict__ q)
> {
>   int i;
>   for (i = 0; i < 1024; ++i)
>     p[i] = q[i];
> }
> 
> void bar (int *p, int *q)
> {
>   foo (p, q);
> }
> 
> where it still vectorizes the loop in bar without alias check.

Updated patch with a tiny fix to manage fixing PR55334 (IPA-CP
issue with mgrid) and renaming all-over-the-place.

Still lacks the clique remapping code in the inliner which is
required for correctness and still lacks code to deal with
disambiguating restrct vs. non-restrict accesses.

Improves mgrid performance by 10%, still fixes the above tiny
testcase but otherwise untested.

Richard.

2014-09-05  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/55334

Index: trunk/gcc/function.h
===================================================================
*** trunk.orig/gcc/function.h   2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/function.h        2014-09-05 14:10:30.961758013 +0200
*************** struct GTY(()) function {
*** 593,598 ****
--- 593,601 ----
       a string describing the reason for failure.  */
    const char * GTY((skip)) cannot_be_copied_reason;
  
+   /* Last assigned dependence info clique.  */
+   unsigned short last_clique;
+ 
    /* Collected bit flags.  */
  
    /* Number of units of general registers that need saving in stdarg
Index: trunk/gcc/tree-inline.c
===================================================================
*** trunk.orig/gcc/tree-inline.c        2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/tree-inline.c     2014-09-05 14:10:10.303759435 +0200
*************** remap_gimple_op_r (tree *tp, int *walk_s
*** 929,934 ****
--- 929,938 ----
          TREE_THIS_VOLATILE (*tp) = TREE_THIS_VOLATILE (old);
          TREE_SIDE_EFFECTS (*tp) = TREE_SIDE_EFFECTS (old);
          TREE_NO_WARNING (*tp) = TREE_NO_WARNING (old);
+         /* ???  Properly remap the clique to a new one.  */
+         if (MR_DEPENDENCE_CLIQUE (old) != 0)
+           MR_DEPENDENCE_CLIQUE (*tp) = MR_DEPENDENCE_CLIQUE (old);
+         MR_DEPENDENCE_BASE (*tp) = MR_DEPENDENCE_BASE (old);
          /* We cannot propagate the TREE_THIS_NOTRAP flag if we have
             remapped a parameter as the property might be valid only
             for the parameter itself.  */
*************** copy_tree_body_r (tree *tp, int *walk_su
*** 1180,1185 ****
--- 1184,1193 ----
          TREE_THIS_VOLATILE (*tp) = TREE_THIS_VOLATILE (old);
          TREE_SIDE_EFFECTS (*tp) = TREE_SIDE_EFFECTS (old);
          TREE_NO_WARNING (*tp) = TREE_NO_WARNING (old);
+         /* ???  Properly remap the clique to a new one.  */
+         if (MR_DEPENDENCE_CLIQUE (old) != 0)
+           MR_DEPENDENCE_CLIQUE (*tp) = MR_DEPENDENCE_CLIQUE (old);
+         MR_DEPENDENCE_BASE (*tp) = MR_DEPENDENCE_BASE (old);
          /* We cannot propagate the TREE_THIS_NOTRAP flag if we have
             remapped a parameter as the property might be valid only
             for the parameter itself.  */
Index: trunk/gcc/tree-pretty-print.c
===================================================================
*** trunk.orig/gcc/tree-pretty-print.c  2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/tree-pretty-print.c       2014-09-05 14:13:42.869744800 +0200
*************** dump_generic_node (pretty_printer *buffe
*** 1081,1087 ****
            /* Same value types ignoring qualifiers.  */
            && (TYPE_MAIN_VARIANT (TREE_TYPE (node))
                == TYPE_MAIN_VARIANT
!                   (TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 1))))))
          {
            if (TREE_CODE (TREE_OPERAND (node, 0)) != ADDR_EXPR)
              {
--- 1081,1088 ----
            /* Same value types ignoring qualifiers.  */
            && (TYPE_MAIN_VARIANT (TREE_TYPE (node))
                == TYPE_MAIN_VARIANT
!                   (TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 1)))))
!           && MR_DEPENDENCE_CLIQUE (node) == 0)
          {
            if (TREE_CODE (TREE_OPERAND (node, 0)) != ADDR_EXPR)
              {
*************** dump_generic_node (pretty_printer *buffe
*** 1112,1117 ****
--- 1113,1125 ----
                dump_generic_node (buffer, TREE_OPERAND (node, 1),
                                   spc, flags, false);
              }
+           if (MR_DEPENDENCE_CLIQUE (node) != 0)
+             {
+               pp_string (buffer, " clique ");
+               pp_unsigned_wide_integer (buffer, MR_DEPENDENCE_CLIQUE (node));
+               pp_string (buffer, " base ");
+               pp_unsigned_wide_integer (buffer, MR_DEPENDENCE_BASE (node));
+             }
            pp_right_bracket (buffer);
          }
        break;
*************** dump_generic_node (pretty_printer *buffe
*** 1450,1456 ****
                  /* Same value types ignoring qualifiers.  */
                  && (TYPE_MAIN_VARIANT (TREE_TYPE (op0))
                      == TYPE_MAIN_VARIANT
!                         (TREE_TYPE (TREE_TYPE (TREE_OPERAND (op0, 1))))))))
        {
          op0 = TREE_OPERAND (op0, 0);
          str = "->";
--- 1458,1465 ----
                  /* Same value types ignoring qualifiers.  */
                  && (TYPE_MAIN_VARIANT (TREE_TYPE (op0))
                      == TYPE_MAIN_VARIANT
!                         (TREE_TYPE (TREE_TYPE (TREE_OPERAND (op0, 1)))))
!                 && MR_DEPENDENCE_CLIQUE (op0) == 0)))
        {
          op0 = TREE_OPERAND (op0, 0);
          str = "->";
Index: trunk/gcc/tree-ssa-alias.c
===================================================================
*** trunk.orig/gcc/tree-ssa-alias.c     2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/tree-ssa-alias.c  2014-09-05 14:10:10.304759435 +0200
*************** refs_may_alias_p_1 (ao_ref *ref1, ao_ref
*** 1371,1376 ****
--- 1371,1404 ----
      return decl_refs_may_alias_p (ref1->ref, base1, offset1, max_size1,
                                  ref2->ref, base2, offset2, max_size2);
  
+   /* Handle restrict based accesses.
+      ???  ao_ref_base strips inner MEM_REF [&decl], recover from that
+      here.  */
+   tree rbase1 = base1;
+   tree rbase2 = base2;
+   if (var1_p)
+     {
+       rbase1 = ref1->ref;
+       if (rbase1)
+       while (handled_component_p (rbase1))
+         rbase1 = TREE_OPERAND (rbase1, 0);
+     }
+   if (var2_p)
+     {
+       rbase2 = ref2->ref;
+       if (rbase2)
+       while (handled_component_p (rbase2))
+         rbase2 = TREE_OPERAND (rbase2, 0);
+     }
+   if (rbase1 && rbase2
+       && (TREE_CODE (base1) == MEM_REF || TREE_CODE (base1) == TARGET_MEM_REF)
+       && (TREE_CODE (base2) == MEM_REF || TREE_CODE (base2) == TARGET_MEM_REF)
+       /* If the accesses are in the same restrict clique... */
+       && MR_DEPENDENCE_CLIQUE (base1) == MR_DEPENDENCE_CLIQUE (base2)
+       /* But based on different pointers they do not alias.  */
+       && MR_DEPENDENCE_BASE (base1) != MR_DEPENDENCE_BASE (base2))
+     return false;
+ 
    ind1_p = (TREE_CODE (base1) == MEM_REF
            || TREE_CODE (base1) == TARGET_MEM_REF);
    ind2_p = (TREE_CODE (base2) == MEM_REF
Index: trunk/gcc/tree-ssa-structalias.c
===================================================================
*** trunk.orig/gcc/tree-ssa-structalias.c       2014-09-05 14:05:43.799777783 
+0200
--- trunk/gcc/tree-ssa-structalias.c    2014-09-05 14:25:22.078696660 +0200
***************
*** 52,57 ****
--- 52,60 ----
  #include "splay-tree.h"
  #include "params.h"
  #include "alias.h"
+ #include "tree-phinodes.h"
+ #include "ssa-iterators.h"
+ #include "tree-pretty-print.h"
  
  /* The idea behind this analyzer is to generate set constraints from the
     program, then solve the resulting constraints in order to generate the
*************** struct variable_info
*** 272,283 ****
--- 275,293 ----
    /* True if this field has only restrict qualified pointers.  */
    unsigned int only_restrict_pointers : 1;
  
+   /* True if this represents a heap var created for a restrict qualified
+      pointer.  */
+   unsigned int is_restrict_var : 1;
+ 
    /* True if this represents a global variable.  */
    unsigned int is_global_var : 1;
  
    /* True if this represents a IPA function info.  */
    unsigned int is_fn_info : 1;
  
+   /* ???  Store somewhere better.  */
+   unsigned short ruid;
+ 
    /* The ID of the variable for the next field in this structure
       or zero for the last field in this structure.  */
    unsigned next;
*************** new_var_info (tree t, const char *name)
*** 369,374 ****
--- 379,385 ----
    ret->is_heap_var = false;
    ret->may_have_pointers = true;
    ret->only_restrict_pointers = false;
+   ret->is_restrict_var = false;
    ret->is_global_var = (t == NULL_TREE);
    ret->is_fn_info = false;
    if (t && DECL_P (t))
*************** static varinfo_t
*** 3773,3778 ****
--- 3784,3790 ----
  make_constraint_from_restrict (varinfo_t lhs, const char *name)
  {
    varinfo_t vi = make_heapvar (name);
+   vi->is_restrict_var = 1;
    vi->is_global_var = 1;
    vi->may_have_pointers = 1;
    make_constraint_from (lhs, vi->id);
*************** intra_create_variable_infos (struct func
*** 5845,5850 ****
--- 5857,5863 ----
          tree heapvar = build_fake_var_decl (TREE_TYPE (TREE_TYPE (t)));
          DECL_EXTERNAL (heapvar) = 1;
          vi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS");
+         vi->is_restrict_var = 1;
          insert_vi_for_tree (heapvar, vi);
          lhsc.var = p->id;
          lhsc.type = SCALAR;
*************** compute_may_aliases (void)
*** 6948,6953 ****
--- 6961,7061 ----
    if (dump_file)
      dump_alias_info (dump_file);
  
+     {
+       unsigned short clique = 0;
+       unsigned short last_ruid = 0;
+       for (unsigned i = 0; i < num_ssa_names; ++i)
+       {
+         tree ptr = ssa_name (i);
+         if (!ptr || !POINTER_TYPE_P (TREE_TYPE (ptr)))
+           continue;
+ 
+         /* Avoid all this when ptr is not dereferenced?  */
+         tree p = ptr;
+         if (SSA_NAME_IS_DEFAULT_DEF (ptr)
+             && (TREE_CODE (SSA_NAME_VAR (ptr)) == PARM_DECL
+                 || TREE_CODE (SSA_NAME_VAR (ptr)) == RESULT_DECL))
+           p = SSA_NAME_VAR (ptr);
+ 
+         varinfo_t vi = get_varinfo (find (lookup_vi_for_tree (p)->id));
+         bitmap_iterator bi;
+         unsigned j;
+         varinfo_t restrict_var = NULL;
+         EXECUTE_IF_SET_IN_BITMAP (vi->solution, 0, j, bi)
+           {
+             varinfo_t oi = get_varinfo (j);
+             if (oi->is_restrict_var)
+               {
+                 if (restrict_var)
+                   {
+                     if (dump_file && (dump_flags & TDF_DETAILS))
+                       {
+                         fprintf (dump_file, "found restrict pointed-to "
+                                  "for ");
+                         print_generic_expr (dump_file, ptr, 0);
+                         fprintf (dump_file, " but not exclusively\n");
+                       }
+                     restrict_var = NULL;
+                     break;
+                   }
+                 restrict_var = oi;
+               }
+             /* NULL is the only other valid points-to entry.  */
+             else if (oi->id != nothing_id)
+               {
+                 restrict_var = NULL;
+                 break;
+               }
+           }
+         /* Ok, found that ptr must(!) point to a single(!) restrict
+            variable.  */
+         /* ???  PTA isn't really a proper propagation engine to compute
+            this property.
+            ???  We can handle merging of two restricts
+            by unifying them.  */
+         if (restrict_var)
+           {
+             if (clique == 0)
+               clique = ++cfun->last_clique;
+             if (restrict_var->ruid == 0)
+               restrict_var->ruid = ++last_ruid;
+ 
+             /* Now look at possible dereferences of ptr.  */
+             imm_use_iterator ui;
+             gimple use_stmt;
+             FOR_EACH_IMM_USE_STMT (use_stmt, ui, ptr)
+               {
+                 /* ???  Calls and asms.  */
+                 if (!gimple_assign_single_p (use_stmt))
+                   continue;
+                 tree *ref = gimple_assign_lhs_ptr (use_stmt);
+                 while (handled_component_p (*ref))
+                   ref = &TREE_OPERAND (*ref, 0);
+                 if (TREE_CODE (*ref) == MEM_REF
+                     && TREE_OPERAND (*ref, 0) == ptr)
+                   {
+                     MR_DEPENDENCE_CLIQUE (*ref) = clique;
+                     MR_DEPENDENCE_BASE (*ref) = restrict_var->ruid;
+                   }
+                 ref = gimple_assign_rhs1_ptr (use_stmt);
+                 while (handled_component_p (*ref))
+                   ref = &TREE_OPERAND (*ref, 0);
+                 if (TREE_CODE (*ref) == MEM_REF
+                     && TREE_OPERAND (*ref, 0) == ptr)
+                   {
+                     MR_DEPENDENCE_CLIQUE (*ref) = clique;
+                     MR_DEPENDENCE_BASE (*ref) = restrict_var->ruid;
+                   }
+               }
+ 
+             /* ???  Accesses not based on a restrict pointer should
+                all get into the clique but share a single ptr ID.
+                That way they get disabiguated against restrict
+                accesses but not against each other.  */
+           }
+       }
+     }
+ 
    /* Deallocate memory used by aliasing data structures and the internal
       points-to solution.  */
    delete_points_to_sets ();
Index: trunk/gcc/tree-data-ref.c
===================================================================
*** trunk.orig/gcc/tree-data-ref.c      2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/tree-data-ref.c   2014-09-05 14:10:10.306759435 +0200
*************** dr_analyze_indices (struct data_referenc
*** 980,988 ****
--- 980,991 ----
             guaranteed.
             As a band-aid, mark the access so we can special-case
             it in dr_may_alias_p.  */
+         tree old = ref;
          ref = fold_build2_loc (EXPR_LOCATION (ref),
                                 MEM_REF, TREE_TYPE (ref),
                                 base, memoff);
+         MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
+         MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
          access_fns.safe_push (access_fn);
        }
      }
*************** dr_may_alias_p (const struct data_refere
*** 1389,1394 ****
--- 1392,1403 ----
        return false;
      }
  
+   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
+       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == 
TARGET_MEM_REF)
+       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
+       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
+     return false;
+ 
    /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
       do not know the size of the base-object.  So we cannot do any
       offset/overlap based analysis but have to rely on points-to
Index: trunk/gcc/tree-core.h
===================================================================
*** trunk.orig/gcc/tree-core.h  2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/tree-core.h       2014-09-05 14:10:10.306759435 +0200
*************** struct GTY(()) tree_base {
*** 802,807 ****
--- 802,817 ----
  
      /* Internal function code.  */
      enum internal_fn ifn;
+ 
+     /* The following two fields are used for MEM_REF and TARGET_MEM_REF
+        expression trees and specify known data non-dependences.  For
+        two memory references in a function they are known to not
+        alias if dependence_info.clique are equal and dependence_info.base
+        are distinct.  */
+     struct {
+       unsigned short clique;
+       unsigned short base;
+     } dependence_info;
    } GTY((skip(""))) u;
  };
  
Index: trunk/gcc/tree.h
===================================================================
*** trunk.orig/gcc/tree.h       2014-09-05 14:05:43.799777783 +0200
--- trunk/gcc/tree.h    2014-09-05 14:10:10.307759435 +0200
*************** extern void protected_set_expr_location
*** 1081,1086 ****
--- 1081,1091 ----
  #define TMR_STEP(NODE) (TREE_OPERAND (TARGET_MEM_REF_CHECK (NODE), 3))
  #define TMR_INDEX2(NODE) (TREE_OPERAND (TARGET_MEM_REF_CHECK (NODE), 4))
  
+ #define MR_DEPENDENCE_CLIQUE(NODE) \
+   (TREE_CHECK2 (NODE, MEM_REF, TARGET_MEM_REF)->base.u.dependence_info.clique)
+ #define MR_DEPENDENCE_BASE(NODE) \
+   (TREE_CHECK2 (NODE, MEM_REF, TARGET_MEM_REF)->base.u.dependence_info.base)
+ 
  /* The operands of a BIND_EXPR.  */
  #define BIND_EXPR_VARS(NODE) (TREE_OPERAND (BIND_EXPR_CHECK (NODE), 0))
  #define BIND_EXPR_BODY(NODE) (TREE_OPERAND (BIND_EXPR_CHECK (NODE), 1))

Reply via email to