This attempts to fix PR60276 - the fact that the vectorizer dependence
analysis is run too early and that it invalidates assumptions it
makes there later.  The specific issue in question arises when
the vectorizer needs to effectively unroll the loop and by
performing all vectorized loads first and vectorized stores last
the idea that it can ignore known dependences with negative
distance doesn't work out if that distance is too short.

The following is the shortest (and eventually backportable) change
I could come up with - record the negative distance during
dependence analysis and re-validate it when decisions about
stmt copying and group sizes are fixed.

Bootstrapped and tested on x86_64-unknown-linux-gnu - does this look
ok?

Thanks,
Richard.

2014-02-21  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/60276
        * tree-vectorizer.h (struct _stmt_vec_info): Add min_neg_dist field.
        (STMT_VINFO_MIN_NEG_DIST): New macro.
        * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Record
        STMT_VINFO_MIN_NEG_DIST.
        * tree-vect-stmts.c (vectorizable_load): Verify if assumptions
        made for negative dependence distances still hold.

        * gcc.dg/vect/pr60276.c: New testcase.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h       (revision 207938)
--- gcc/tree-vectorizer.h       (working copy)
*************** typedef struct _stmt_vec_info {
*** 622,627 ****
--- 622,631 ----
       is 1.  */
    unsigned int gap;
  
+   /* The minimum negative dependence distance this stmt participates in
+      or zero if none.  */
+   unsigned int min_neg_dist;
+ 
    /* Not all stmts in the loop need to be vectorized. e.g, the increment
       of the loop induction variable and computation of array indexes. relevant
       indicates whether the stmt needs to be vectorized.  */
*************** typedef struct _stmt_vec_info {
*** 677,682 ****
--- 681,687 ----
  #define STMT_VINFO_GROUP_SAME_DR_STMT(S)   (S)->same_dr_stmt
  #define STMT_VINFO_GROUPED_ACCESS(S)      ((S)->first_element != NULL && 
(S)->data_ref_info)
  #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
+ #define STMT_VINFO_MIN_NEG_DIST(S)    (S)->min_neg_dist
  
  #define GROUP_FIRST_ELEMENT(S)          (S)->first_element
  #define GROUP_NEXT_ELEMENT(S)           (S)->next_element
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c   (revision 207938)
--- gcc/tree-vect-data-refs.c   (working copy)
*************** vect_analyze_data_ref_dependence (struct
*** 403,408 ****
--- 425,437 ----
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "dependence distance negative.\n");
+         /* Record a negative dependence distance to later limit the
+            amount of stmt copying / unrolling we can perform.
+            Only need to handle read-after-write dependence.  */
+         if (DR_IS_READ (drb)
+             && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
+                 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > dist))
+           STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
          continue;
        }
  
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c       (revision 207938)
--- gcc/tree-vect-stmts.c       (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 5629,5634 ****
--- 5629,5648 ----
        return false;
      }
  
+   /* Invalidate assumptions made by dependence analysis when vectorization
+      on the unrolled body effectively re-orders stmts.  */
+   if (ncopies > 1
+       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
+       && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+         > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
+     {
+       if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "cannot perform implicit CSE when unrolling "
+                        "with negative dependence distance\n");
+       return false;
+     }
+ 
    if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
      return false;
  
*************** vectorizable_load (gimple stmt, gimple_s
*** 5686,5691 ****
--- 5700,5719 ----
          else if (!vect_grouped_load_supported (vectype, group_size))
            return false;
        }
+ 
+       /* Invalidate assumptions made by dependence analysis when vectorization
+        on the unrolled body effectively re-orders stmts.  */
+       if (!PURE_SLP_STMT (stmt_info)
+         && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
+         && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+             > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "cannot perform implicit CSE when performing "
+                            "group loads with negative dependence distance\n");
+         return false;
+       }
      }
  
  
Index: gcc/testsuite/gcc.dg/vect/pr60276.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr60276.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr60276.c (working copy)
***************
*** 0 ****
--- 1,52 ----
+ /* { dg-do run } */
+ 
+ extern void abort (void);
+ 
+ static void 
+ foo (int *out, const int *lp, unsigned samples)
+ {
+   int x, target;
+   for (x = 0, target = 0; x < (int)samples; x += 2, target++)
+     {
+       out[x] = lp[target];
+       out[x - 1] = out[x - 2] + out[x];
+     }
+ }
+ 
+ static void 
+ foo_novec (int *out, const int *lp, unsigned samples)
+ {
+   int x, target;
+   for (x = 0, target = 0; x < (int)samples; x += 2, target++)
+     {
+       out[x] = lp[target];
+       out[x - 1] = out[x - 2] + out[x];
+       __asm__ volatile ("" : : : "memory");
+     }
+ }
+ 
+ int main(void)
+ {
+   const int lp[25] = {
+       0, 2, 4, 6, 8,
+       10, 12, 14, 16,
+       18, 20, 22, 24,
+       26, 28, 30, 32,
+       34, 36, 38, 40,
+       42, 44, 46, 48,
+   };
+   int out[49] = {0};
+   int out2[49] = {0};
+   int s;
+ 
+   foo (out + 2, lp + 1, 48);
+   foo_novec (out2 + 2, lp + 1, 48);
+ 
+   for (s = 0; s < 49; s++)
+     if (out[s] != out2[s])
+       abort ();
+ 
+   return 0;
+ }
+ 
+ /* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to