This adds memcpy/memmove recognition to loop distribution (and
cleans it up some more).  Issues are similar to memset and
not handled (and I just noticed we generate memset/memcpy even
with -fno-builtin ...).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2012-06-05  Richard Guenther  <rguent...@suse.de>

        PR tree-optimization/53081
        * tree-data-ref.h (adjacent_store_dr_p): Rename to ...
        (adjacent_dr_p): ... this and make it work for reads, too.
        * tree-loop-distribution.c (enum partition_kind): Add PKIND_MEMCPY.
        (struct partition_s): Change main_stmt to main_dr, add
        secondary_dr member.
        (build_size_arg_loc): Change to date data-reference and not
        gimplify here.
        (build_addr_arg_loc): New function split out from ...
        (generate_memset_builtin): ... here.  Use it and simplify.
        (generate_memcpy_builtin): New function.
        (generate_code_for_partition): Adjust.
        (classify_partition): Streamline pattern detection.  Detect
        memcpy.
        (ldist_gen): Adjust.
        (tree_loop_distribution): Adjust seed statements for memcpy
        recognition.

        * gcc.dg/tree-ssa/ldist-20.c: New testcase.

Index: gcc/tree-data-ref.h
===================================================================
*** gcc/tree-data-ref.h (revision 188233)
--- gcc/tree-data-ref.h (working copy)
*************** bool rdg_defs_used_in_other_loops_p (str
*** 615,625 ****
     with a stride equal to its unit type size.  */
  
  static inline bool
! adjacent_store_dr_p (struct data_reference *dr)
  {
-   if (!DR_IS_WRITE (dr))
-     return false;
- 
    /* If this is a bitfield store bail out.  */
    if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
        && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
--- 615,622 ----
     with a stride equal to its unit type size.  */
  
  static inline bool
! adjacent_dr_p (struct data_reference *dr)
  {
    /* If this is a bitfield store bail out.  */
    if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
        && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c        (revision 188233)
--- gcc/tree-loop-distribution.c        (working copy)
*************** along with GCC; see the file COPYING3.
*** 52,66 ****
  #include "tree-scalar-evolution.h"
  #include "tree-pass.h"
  
! enum partition_kind { PKIND_NORMAL, PKIND_MEMSET };
  
  typedef struct partition_s
  {
    bitmap stmts;
    bool has_writes;
    enum partition_kind kind;
!   /* Main statement a kind != PKIND_NORMAL partition is about.  */
!   gimple main_stmt;
  } *partition_t;
  
  DEF_VEC_P (partition_t);
--- 52,67 ----
  #include "tree-scalar-evolution.h"
  #include "tree-pass.h"
  
! enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
  
  typedef struct partition_s
  {
    bitmap stmts;
    bool has_writes;
    enum partition_kind kind;
!   /* data-references a kind != PKIND_NORMAL partition is about.  */
!   data_reference_p main_dr;
!   data_reference_p secondary_dr;
  } *partition_t;
  
  DEF_VEC_P (partition_t);
*************** generate_loops_for_partition (struct loo
*** 313,352 ****
    free (bbs);
  }
  
! /* Build the size argument for a memset call.  */
  
! static inline tree
! build_size_arg_loc (location_t loc, tree nb_iter, tree op,
!                   gimple_seq *stmt_list)
! {
!   gimple_seq stmts;
!   tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
!                           fold_convert_loc (loc, size_type_node, nb_iter),
!                           fold_convert_loc (loc, size_type_node,
!                                             TYPE_SIZE_UNIT (TREE_TYPE (op))));
!   x = force_gimple_operand (x, &stmts, true, NULL);
!   gimple_seq_add_seq (stmt_list, stmts);
  
!   return x;
  }
  
  /* Generate a call to memset for PARTITION in LOOP.  */
  
  static void
! generate_memset_builtin (struct loop *loop, struct graph *rdg,
!                        partition_t partition)
  {
    gimple_stmt_iterator gsi;
    gimple stmt, fn_call;
!   tree op0, nb_iter, mem, fn, addr_base, nb_bytes;
!   gimple_seq stmt_list = NULL, stmts;
!   struct data_reference *dr = XCNEW (struct data_reference);
    location_t loc;
    tree val;
  
!   stmt = partition->main_stmt;
    loc = gimple_location (stmt);
-   op0 = gimple_assign_lhs (stmt);
    if (gimple_bb (stmt) == loop->latch)
      nb_iter = number_of_latch_executions (loop);
    else
--- 314,366 ----
    free (bbs);
  }
  
! /* Build the size argument for a memory operation call.  */
  
! static tree
! build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
! {
!   tree size;
!   size = fold_build2_loc (loc, MULT_EXPR, sizetype,
!                         fold_convert_loc (loc, sizetype, nb_iter),
!                         TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
!   return fold_convert_loc (loc, size_type_node, size);
! }
  
! /* Build an address argument for a memory operation call.  */
! 
! static tree
! build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
! {
!   tree addr_base;
! 
!   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
!   addr_base = fold_convert_loc (loc, sizetype, addr_base);
! 
!   /* Test for a negative stride, iterating over every element.  */
!   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
!     {
!       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
!                                 fold_convert_loc (loc, sizetype, nb_bytes));
!       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
!                                 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
!     }
! 
!   return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
  }
  
  /* Generate a call to memset for PARTITION in LOOP.  */
  
  static void
! generate_memset_builtin (struct loop *loop, partition_t partition)
  {
    gimple_stmt_iterator gsi;
    gimple stmt, fn_call;
!   tree nb_iter, mem, fn, nb_bytes;
    location_t loc;
    tree val;
  
!   stmt = DR_STMT (partition->main_dr);
    loc = gimple_location (stmt);
    if (gimple_bb (stmt) == loop->latch)
      nb_iter = number_of_latch_executions (loop);
    else
*************** generate_memset_builtin (struct loop *lo
*** 355,379 ****
    /* The new statements will be placed before LOOP.  */
    gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
  
!   dr = VEC_index (data_reference_p,
!                 RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, stmt)), 0);
!   nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
!   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
!   addr_base = fold_convert_loc (loc, sizetype, addr_base);
! 
!   /* Test for a negative stride, iterating over every element.  */
!   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
!     {
!       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
!                                 fold_convert_loc (loc, sizetype, nb_bytes));
!       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
!                                 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
!     }
! 
!   addr_base = fold_build_pointer_plus_loc (loc,
!                                          DR_BASE_ADDRESS (dr), addr_base);
!   mem = force_gimple_operand (addr_base, &stmts, true, NULL);
!   gimple_seq_add_seq (&stmt_list, stmts);
  
    /* This exactly matches the pattern recognition in classify_partition.  */
    val = gimple_assign_rhs1 (stmt);
--- 369,380 ----
    /* The new statements will be placed before LOOP.  */
    gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
  
!   nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
!   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
!                                      false, GSI_CONTINUE_LINKING);
!   mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
!   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
!                                 false, GSI_CONTINUE_LINKING);
  
    /* This exactly matches the pattern recognition in classify_partition.  */
    val = gimple_assign_rhs1 (stmt);
*************** generate_memset_builtin (struct loop *lo
*** 393,407 ****
          tree tem = create_tmp_reg (integer_type_node, NULL);
          tem = make_ssa_name (tem, NULL);
          cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
!         gimple_seq_add_stmt (&stmt_list, cstmt);
          val = tem;
        }
      }
  
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
    fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
!   gimple_seq_add_stmt (&stmt_list, fn_call);
!   gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 394,407 ----
          tree tem = create_tmp_reg (integer_type_node, NULL);
          tem = make_ssa_name (tem, NULL);
          cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
!         gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
          val = tem;
        }
      }
  
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
    fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
!   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
*************** generate_memset_builtin (struct loop *lo
*** 415,420 ****
--- 415,468 ----
      }
  }
  
+ /* Generate a call to memcpy for PARTITION in LOOP.  */
+ 
+ static void
+ generate_memcpy_builtin (struct loop *loop, partition_t partition)
+ {
+   gimple_stmt_iterator gsi;
+   gimple stmt, fn_call;
+   tree nb_iter, dest, src, fn, nb_bytes;
+   location_t loc;
+   enum built_in_function kind;
+ 
+   stmt = DR_STMT (partition->main_dr);
+   loc = gimple_location (stmt);
+   if (gimple_bb (stmt) == loop->latch)
+     nb_iter = number_of_latch_executions (loop);
+   else
+     nb_iter = number_of_exit_cond_executions (loop);
+ 
+   /* The new statements will be placed before LOOP.  */
+   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
+ 
+   nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
+   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
+                                      false, GSI_CONTINUE_LINKING);
+   dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
+   src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
+   if (ptr_derefs_may_alias_p (dest, src))
+     kind = BUILT_IN_MEMMOVE;
+   else
+     kind = BUILT_IN_MEMCPY;
+ 
+   dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
+                                  false, GSI_CONTINUE_LINKING);
+   src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
+                                 false, GSI_CONTINUE_LINKING);
+   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
+   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
+   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       if (kind == BUILT_IN_MEMCPY)
+       fprintf (dump_file, "generated memcpy\n");
+       else
+       fprintf (dump_file, "generated memmove\n");
+     }
+ }
+ 
  /* Remove and destroy the loop LOOP.  */
  
  static void
*************** destroy_loop (struct loop *loop)
*** 466,478 ****
  /* Generates code for PARTITION.  */
  
  static void
! generate_code_for_partition (struct loop *loop, struct graph *rdg,
                             partition_t partition, bool copy_p)
  {
    switch (partition->kind)
      {
      case PKIND_MEMSET:
!       generate_memset_builtin (loop, rdg, partition);
        /* If this is the last partition for which we generate code, we have
         to destroy the loop.  */
        if (!copy_p)
--- 514,534 ----
  /* Generates code for PARTITION.  */
  
  static void
! generate_code_for_partition (struct loop *loop,
                             partition_t partition, bool copy_p)
  {
    switch (partition->kind)
      {
      case PKIND_MEMSET:
!       generate_memset_builtin (loop, partition);
!       /* If this is the last partition for which we generate code, we have
!        to destroy the loop.  */
!       if (!copy_p)
!       destroy_loop (loop);
!       break;
! 
!     case PKIND_MEMCPY:
!       generate_memcpy_builtin (loop, partition);
        /* If this is the last partition for which we generate code, we have
         to destroy the loop.  */
        if (!copy_p)
*************** classify_partition (loop_p loop, struct
*** 849,857 ****
    bitmap_iterator bi;
    unsigned i;
    tree nb_iter;
  
    partition->kind = PKIND_NORMAL;
!   partition->main_stmt = NULL;
  
    if (!flag_tree_loop_distribute_patterns)
      return;
--- 905,915 ----
    bitmap_iterator bi;
    unsigned i;
    tree nb_iter;
+   data_reference_p single_load, single_store;
  
    partition->kind = PKIND_NORMAL;
!   partition->main_dr = NULL;
!   partition->secondary_dr = NULL;
  
    if (!flag_tree_loop_distribute_patterns)
      return;
*************** classify_partition (loop_p loop, struct
*** 880,889 ****
        }
      }
  
!   /* Detect memset.  */
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
      {
        gimple stmt = RDG_STMT (rdg, i);
  
        if (gimple_code (stmt) == GIMPLE_PHI)
        continue;
--- 938,951 ----
        }
      }
  
!   /* Detect memset and memcpy.  */
!   single_load = NULL;
!   single_store = NULL;
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
      {
        gimple stmt = RDG_STMT (rdg, i);
+       data_reference_p dr;
+       unsigned j;
  
        if (gimple_code (stmt) == GIMPLE_PHI)
        continue;
*************** classify_partition (loop_p loop, struct
*** 892,932 ****
        if (!gimple_vuse (stmt))
        continue;
  
!       /* Exactly one store.  */
!       if (gimple_assign_single_p (stmt)
!         && !is_gimple_reg (gimple_assign_lhs (stmt)))
        {
!         tree rhs;
!         if (partition->main_stmt != NULL)
!           return;
!         partition->main_stmt = stmt;
!         rhs = gimple_assign_rhs1 (stmt);
!         if (!(integer_zerop (rhs)
!               || integer_all_onesp (rhs)
!               || real_zerop (rhs)
!               || (TREE_CODE (rhs) == CONSTRUCTOR
!                   && !TREE_CLOBBER_P (rhs))
!               || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
!                   && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
!                       == TYPE_MODE (unsigned_char_type_node)))))
!           return;
!         if (TREE_CODE (rhs) == SSA_NAME
!             && !SSA_NAME_IS_DEFAULT_DEF (rhs)
!             && flow_bb_inside_loop_p
!                  (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
!           return;
!         if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1)
!           return;
!         if (!adjacent_store_dr_p (VEC_index (data_reference_p,
!                                              RDG_DATAREFS (rdg, i), 0)))
!           return;
        }
-       else
-       return;
      }
  
!   if (partition->main_stmt != NULL)
!     partition->kind = PKIND_MEMSET;
  }
  
  /* For a data reference REF, return the declaration of its base
--- 954,1021 ----
        if (!gimple_vuse (stmt))
        continue;
  
!       /* Otherwise just regular loads/stores.  */
!       if (!gimple_assign_single_p (stmt))
!       return;
! 
!       /* But exactly one store and/or load.  */
!       for (j = 0;
!          VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j)
        {
!         if (DR_IS_READ (dr))
!           {
!             if (single_load != NULL)
!               return;
!             single_load = dr;
!           }
!         else
!           {
!             if (single_store != NULL)
!               return;
!             single_store = dr;
!           }
        }
      }
  
!   if (single_store && !single_load)
!     {
!       gimple stmt = DR_STMT (single_store);
!       tree rhs = gimple_assign_rhs1 (stmt);
!       if (!(integer_zerop (rhs)
!           || integer_all_onesp (rhs)
!           || real_zerop (rhs)
!           || (TREE_CODE (rhs) == CONSTRUCTOR
!               && !TREE_CLOBBER_P (rhs))
!           || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
!               && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
!                   == TYPE_MODE (unsigned_char_type_node)))))
!       return;
!       if (TREE_CODE (rhs) == SSA_NAME
!         && !SSA_NAME_IS_DEFAULT_DEF (rhs)
!         && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
!       return;
!       if (!adjacent_dr_p (single_store))
!       return;
!       partition->kind = PKIND_MEMSET;
!       partition->main_dr = single_store;
!     }
!   else if (single_store && single_load)
!     {
!       gimple store = DR_STMT (single_store);
!       gimple load = DR_STMT (single_load);
!       /* Direct aggregate copy or via an SSA name temporary.  */
!       if (load != store
!         && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
!       return;
!       if (!adjacent_dr_p (single_store)
!         || !adjacent_dr_p (single_load)
!         || !operand_equal_p (DR_STEP (single_store),
!                              DR_STEP (single_load), 0))
!       return;
!       partition->kind = PKIND_MEMCPY;
!       partition->main_dr = single_store;
!       partition->secondary_dr = single_load;
!     }
  }
  
  /* For a data reference REF, return the declaration of its base
*************** ldist_gen (struct loop *loop, struct gra
*** 1259,1265 ****
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     generate_code_for_partition (loop, rdg, partition, i < nbp - 1);
  
   ldist_done:
  
--- 1348,1354 ----
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     generate_code_for_partition (loop, partition, i < nbp - 1);
  
   ldist_done:
  
*************** tree_loop_distribution (void)
*** 1392,1413 ****
                  || is_gimple_reg (gimple_assign_lhs (stmt)))
                continue;
  
-             /* If we are only performing pattern detection restrict
-                what we try to distribute to stores from constants.  */
-             if (!flag_tree_loop_distribution)
-               {
-                 tree rhs = gimple_assign_rhs1 (stmt);
-                 if (!is_gimple_min_invariant (rhs)
-                     && TREE_CODE (rhs) != CONSTRUCTOR
-                     && TREE_CODE (rhs) != SSA_NAME)
-                   continue;
-                 if (TREE_CODE (rhs) == SSA_NAME
-                     && !SSA_NAME_IS_DEFAULT_DEF (rhs)
-                     && flow_bb_inside_loop_p
-                          (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
-                   continue;
-               }
- 
              VEC_safe_push (gimple, heap, work_list, stmt);
            }
        }
--- 1481,1486 ----
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c    (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c    (revision 0)
***************
*** 0 ****
--- 1,37 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ftree-loop-distribute-patterns 
-fdump-tree-ldist-details" } */
+ 
+ void foo(char *);
+ void my_memcpy (void *q, unsigned int n)
+ {
+   unsigned i;
+   char p[1024];
+   for (i = 0; i < n; ++i)
+     ((char *)p)[i] = ((char *)q)[i];
+   foo(p);
+ }
+ 
+ struct S { int i; int j; };
+ 
+ void my_memcpy2 (void *q, unsigned int n)
+ {
+   unsigned i;
+   char p[1024];
+   for (i = 0; i < n; ++i)
+     ((struct S *)p)[i] = ((struct S *)q)[i];
+   foo(p);
+ }
+ 
+ char p[1024];
+ void my_memmove (unsigned int n)
+ {
+   unsigned i;
+   for (i = 0; i < n; ++i)
+     p[i] = p[i+1];
+   foo(p);
+ }
+ 
+ 
+ /* { dg-final { scan-tree-dump-times "generated memcpy" 2 "ldist" } } */
+ /* { dg-final { scan-tree-dump-times "generated memmove" 1 "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */

Reply via email to