This is the last TLC kind of patches for loop distribution.  It moves
more loop distribution specific code from tree-data-ref.c to
tree-loop-distribution.c thereby simplifying it.  It should now be
reasonably easy and efficient to add more pattern recognitions
(I was thinking about even BLAS-like loops, though for being more
useful loop distribution first has to be extended to work on loop nests
instead of only innermost loops).

Bootstrap and regtest running on x86_64-unknown-linux-gnu (as always
with loop-distribution and pattern recognition enabled).

Richard.

2012-06-04  Richard Guenther  <rguent...@suse.de>

        * tree-data-ref.c (stores_from_loop): Remove.
        (stmt_with_adjacent_zero_store_dr_p): Likewise.
        (stores_zero_from_loop): Likewise.
        * tree-data-ref.h (stores_from_loop, stores_zero_from_loop,
        stmt_with_adjacent_zero_store_dr_p, stride_of_unit_type_p): Remove.
        (adjacent_store_dr_p): New function.
        * tree-loop-distribution.c (generate_memset_builtin): Pass
        the RDG, use the already available data-reference.
        (generate_code_for_partition): Pass down RDG.
        (classify_partition): Inline parts of the former
        stmt_with_adjacent_zero_store_dr_p here and use adjacent_store_dr_p.
        (ldist_gen): Remember if there was any detected builtin and
        do less work if not and flag_tree_loop_distribution is not set.
        (tree_loop_distribution): Inline and fuse stores_from_loop
        and stores_zero_from_loop here.

Index: trunk/gcc/tree-data-ref.c
===================================================================
*** trunk.orig/gcc/tree-data-ref.c      2012-06-04 13:47:31.000000000 +0200
--- trunk/gcc/tree-data-ref.c   2012-06-04 14:29:16.648737016 +0200
*************** free_rdg (struct graph *rdg)
*** 5174,5261 ****
    free_graph (rdg);
  }
  
- /* Initialize STMTS with all the statements of LOOP that contain a
-    store to memory.  */
- 
- void
- stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
- {
-   unsigned int i;
-   basic_block *bbs = get_loop_body_in_dom_order (loop);
- 
-   for (i = 0; i < loop->num_nodes; i++)
-     {
-       basic_block bb = bbs[i];
-       gimple_stmt_iterator bsi;
- 
-       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
-       if (gimple_vdef (gsi_stmt (bsi)))
-         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
-     }
- 
-   free (bbs);
- }
- 
- /* Returns true when the statement at STMT is of the form "A[i] = 0"
-    that contains a data reference on its LHS with a stride of the same
-    size as its unit type.  */
- 
- bool
- stmt_with_adjacent_zero_store_dr_p (gimple stmt)
- {
-   tree lhs, rhs;
-   bool res;
-   struct data_reference *dr;
- 
-   if (!stmt
-       || !gimple_vdef (stmt)
-       || !gimple_assign_single_p (stmt))
-     return false;
- 
-   lhs = gimple_assign_lhs (stmt);
-   rhs = gimple_assign_rhs1 (stmt);
- 
-   /* If this is a bitfield store bail out.  */
-   if (TREE_CODE (lhs) == COMPONENT_REF
-       && DECL_BIT_FIELD (TREE_OPERAND (lhs, 1)))
-     return false;
- 
-   if (!(integer_zerop (rhs) || real_zerop (rhs)))
-     return false;
- 
-   dr = XCNEW (struct data_reference);
- 
-   DR_STMT (dr) = stmt;
-   DR_REF (dr) = lhs;
- 
-   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
-     && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (lhs));
- 
-   free_data_ref (dr);
-   return res;
- }
- 
- /* Initialize STMTS with all the statements of LOOP that contain a
-    store to memory of the form "A[i] = 0".  */
- 
- void
- stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
- {
-   unsigned int i;
-   basic_block bb;
-   gimple_stmt_iterator si;
-   gimple stmt;
-   basic_block *bbs = get_loop_body_in_dom_order (loop);
- 
-   for (i = 0; i < loop->num_nodes; i++)
-     for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
-       if ((stmt = gsi_stmt (si))
-         && stmt_with_adjacent_zero_store_dr_p (stmt))
-       VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
- 
-   free (bbs);
- }
- 
  /* Determines whether the statement from vertex V of the RDG has a
     definition used outside the loop that contains this statement.  */
  
--- 5174,5179 ----
Index: trunk/gcc/tree-data-ref.h
===================================================================
*** trunk.orig/gcc/tree-data-ref.h      2012-06-04 13:47:31.000000000 +0200
--- trunk/gcc/tree-data-ref.h   2012-06-04 14:29:35.097736096 +0200
*************** index_in_loop_nest (int var, VEC (loop_p
*** 609,629 ****
    return var_index;
  }
  
- void stores_from_loop (struct loop *, VEC (gimple, heap) **);
- void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
  bool rdg_defs_used_in_other_loops_p (struct graph *, int);
- bool stmt_with_adjacent_zero_store_dr_p (gimple);
  
! /* Returns true when STRIDE is equal in absolute value to the size of
!    the unit type of TYPE.  */
  
  static inline bool
! stride_of_unit_type_p (tree stride, tree type)
  {
!   return (TREE_CODE (stride) == INTEGER_CST
!         && tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride),
!                                            stride),
!                                TYPE_SIZE_UNIT (type)));
  }
  
  /* In tree-data-ref.c  */
--- 609,637 ----
    return var_index;
  }
  
  bool rdg_defs_used_in_other_loops_p (struct graph *, int);
  
! /* Returns true when the data reference DR the form "A[i] = ..."
!    with a stride equal to its unit type size.  */
  
  static inline bool
! adjacent_store_dr_p (struct data_reference *dr)
  {
!   if (!DR_IS_WRITE (dr))
!     return false;
! 
!   /* If this is a bitfield store bail out.  */
!   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
!       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
!     return false;
! 
!   if (!DR_STEP (dr)
!       || TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
!     return false;
! 
!   return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (DR_STEP (dr)),
!                                        DR_STEP (dr)),
!                            TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
  }
  
  /* In tree-data-ref.c  */
Index: trunk/gcc/tree-loop-distribution.c
===================================================================
*** trunk.orig/gcc/tree-loop-distribution.c     2012-06-04 13:47:31.000000000 
+0200
--- trunk/gcc/tree-loop-distribution.c  2012-06-04 14:32:54.637729199 +0200
*************** build_size_arg_loc (location_t loc, tree
*** 323,329 ****
  /* Generate a call to memset for PARTITION in LOOP.  */
  
  static void
! generate_memset_builtin (struct loop *loop, partition_t partition)
  {
    gimple_stmt_iterator gsi;
    gimple stmt, fn_call;
--- 323,330 ----
  /* Generate a call to memset for PARTITION in LOOP.  */
  
  static void
! generate_memset_builtin (struct loop *loop, struct graph *rdg,
!                        partition_t partition)
  {
    gimple_stmt_iterator gsi;
    gimple stmt, fn_call;
*************** generate_memset_builtin (struct loop *lo
*** 331,337 ****
    gimple_seq stmt_list = NULL, stmts;
    struct data_reference *dr = XCNEW (struct data_reference);
    location_t loc;
-   bool res;
  
    stmt = partition->main_stmt;
    loc = gimple_location (stmt);
--- 332,337 ----
*************** generate_memset_builtin (struct loop *lo
*** 344,354 ****
    /* The new statements will be placed before LOOP.  */
    gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
  
!   DR_STMT (dr) = stmt;
!   DR_REF (dr) = op0;
!   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
!   gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
! 
    nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
    addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
    addr_base = fold_convert_loc (loc, sizetype, addr_base);
--- 344,351 ----
    /* The new statements will be placed before LOOP.  */
    gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
  
!   dr = VEC_index (data_reference_p,
!                 RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, stmt)), 0);
    nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
    addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
    addr_base = fold_convert_loc (loc, sizetype, addr_base);
*************** generate_memset_builtin (struct loop *lo
*** 374,381 ****
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "generated memset zero\n");
- 
-   free_data_ref (dr);
  }
  
  /* Remove and destroy the loop LOOP.  */
--- 371,376 ----
*************** destroy_loop (struct loop *loop)
*** 429,441 ****
  /* Generates code for PARTITION.  */
  
  static void
! generate_code_for_partition (struct loop *loop, partition_t partition,
!                            bool copy_p)
  {
    switch (partition->kind)
      {
      case PKIND_MEMSET:
!       generate_memset_builtin (loop, partition);
        /* If this is the last partition for which we generate code, we have
         to destroy the loop.  */
        if (!copy_p)
--- 424,436 ----
  /* Generates code for PARTITION.  */
  
  static void
! generate_code_for_partition (struct loop *loop, struct graph *rdg,
!                            partition_t partition, bool copy_p)
  {
    switch (partition->kind)
      {
      case PKIND_MEMSET:
!       generate_memset_builtin (loop, rdg, partition);
        /* If this is the last partition for which we generate code, we have
         to destroy the loop.  */
        if (!copy_p)
*************** classify_partition (loop_p loop, struct
*** 865,880 ****
        if (gimple_assign_single_p (stmt)
          && !is_gimple_reg (gimple_assign_lhs (stmt)))
        {
          if (partition->main_stmt != NULL)
            return;
          partition->main_stmt = stmt;
        }
        else
        return;
      }
  
!   if (partition->main_stmt != NULL
!       && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt))
      partition->kind = PKIND_MEMSET;
  }
  
--- 860,883 ----
        if (gimple_assign_single_p (stmt)
          && !is_gimple_reg (gimple_assign_lhs (stmt)))
        {
+         tree rhs;
          if (partition->main_stmt != NULL)
            return;
          partition->main_stmt = stmt;
+         rhs = gimple_assign_rhs1 (stmt);
+         if (!(integer_zerop (rhs) || real_zerop (rhs)))
+           return;
+         if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1)
+           return;
+         if (!adjacent_store_dr_p (VEC_index (data_reference_p,
+                                              RDG_DATAREFS (rdg, i), 0)))
+           return;
        }
        else
        return;
      }
  
!   if (partition->main_stmt != NULL)
      partition->kind = PKIND_MEMSET;
  }
  
*************** ldist_gen (struct loop *loop, struct gra
*** 1095,1100 ****
--- 1098,1104 ----
    VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
    partition_t partition;
    bitmap processed = BITMAP_ALLOC (NULL);
+   bool any_builtin;
  
    remaining_stmts = BITMAP_ALLOC (NULL);
    upstream_mem_writes = BITMAP_ALLOC (NULL);
*************** ldist_gen (struct loop *loop, struct gra
*** 1129,1136 ****
                        processed);
    BITMAP_FREE (processed);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     classify_partition (loop, rdg, partition);
  
    /* If we are only distributing patterns fuse all partitions that
       were not properly classified as builtins.  Else fuse partitions
--- 1133,1144 ----
                        processed);
    BITMAP_FREE (processed);
  
+   any_builtin = false;
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     {
!       classify_partition (loop, rdg, partition);
!       any_builtin |= partition_builtin_p (partition);
!     }
  
    /* If we are only distributing patterns fuse all partitions that
       were not properly classified as builtins.  Else fuse partitions
*************** ldist_gen (struct loop *loop, struct gra
*** 1138,1143 ****
--- 1146,1157 ----
    if (!flag_tree_loop_distribution)
      {
        partition_t into;
+       /* If we did not detect any builtin simply bail out.  */
+       if (!any_builtin)
+       {
+         nbp = 0;
+         goto ldist_done;
+       }
        for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
        if (!partition_builtin_p (into))
          break;
*************** ldist_gen (struct loop *loop, struct gra
*** 1197,1203 ****
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     generate_code_for_partition (loop, partition, i < nbp - 1);
  
   ldist_done:
  
--- 1211,1217 ----
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     generate_code_for_partition (loop, rdg, partition, i < nbp - 1);
  
   ldist_done:
  
*************** tree_loop_distribution (void)
*** 1301,1308 ****
--- 1315,1324 ----
    FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
      {
        VEC (gimple, heap) *work_list = NULL;
+       basic_block *bbs;
        int num = loop->num;
        int nb_generated_loops = 0;
+       unsigned int i;
  
        /* If the loop doesn't have a single exit we will fail anyway,
         so do that early.  */
*************** tree_loop_distribution (void)
*** 1313,1325 ****
        if (loop->num_nodes > 2)
        continue;
  
!       /* -ftree-loop-distribution strictly distributes more but also
!          enables pattern detection.  For now simply distribute all stores
!        or memset like stores.  */
!       if (flag_tree_loop_distribution)
!       stores_from_loop (loop, &work_list);
!       else if (flag_tree_loop_distribute_patterns)
!       stores_zero_from_loop (loop, &work_list);
  
        if (VEC_length (gimple, work_list) > 0)
        nb_generated_loops = distribute_loop (loop, work_list);
--- 1329,1359 ----
        if (loop->num_nodes > 2)
        continue;
  
!       /* Initialize the worklist with stmts we seed the partitions with.  */
!       bbs = get_loop_body_in_dom_order (loop);
!       for (i = 0; i < loop->num_nodes; ++i)
!       {
!         gimple_stmt_iterator gsi;
!         for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
!           {
!             gimple stmt = gsi_stmt (gsi);
!             /* Only distribute stores for now.
!                ???  We should also try to distribute scalar reductions,
!                thus SSA defs that have scalar uses outside of the loop.  */
!             if (!gimple_assign_single_p (stmt)
!                 || is_gimple_reg (gimple_assign_lhs (stmt)))
!               continue;
! 
!             /* If we are only performing pattern detection restrict
!                what we try to distribute to stores from constants.  */
!             if (!flag_tree_loop_distribution
!                 && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
!               continue;
! 
!             VEC_safe_push (gimple, heap, work_list, stmt);
!           }
!       }
!       free (bbs);
  
        if (VEC_length (gimple, work_list) > 0)
        nb_generated_loops = distribute_loop (loop, work_list);

Reply via email to