This adds memcpy/memmove recognition to loop distribution (and cleans it up some more). Issues are similar to memset and not handled (and I just noticed we generate memset/memcpy even with -fno-builtin ...).
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2012-06-05 Richard Guenther <rguent...@suse.de> PR tree-optimization/53081 * tree-data-ref.h (adjacent_store_dr_p): Rename to ... (adjacent_dr_p): ... this and make it work for reads, too. * tree-loop-distribution.c (enum partition_kind): Add PKIND_MEMCPY. (struct partition_s): Change main_stmt to main_dr, add secondary_dr member. (build_size_arg_loc): Change to date data-reference and not gimplify here. (build_addr_arg_loc): New function split out from ... (generate_memset_builtin): ... here. Use it and simplify. (generate_memcpy_builtin): New function. (generate_code_for_partition): Adjust. (classify_partition): Streamline pattern detection. Detect memcpy. (ldist_gen): Adjust. (tree_loop_distribution): Adjust seed statements for memcpy recognition. * gcc.dg/tree-ssa/ldist-20.c: New testcase. Index: gcc/tree-data-ref.h =================================================================== *** gcc/tree-data-ref.h (revision 188233) --- gcc/tree-data-ref.h (working copy) *************** bool rdg_defs_used_in_other_loops_p (str *** 615,625 **** with a stride equal to its unit type size. */ static inline bool ! adjacent_store_dr_p (struct data_reference *dr) { - if (!DR_IS_WRITE (dr)) - return false; - /* If this is a bitfield store bail out. */ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) --- 615,622 ---- with a stride equal to its unit type size. */ static inline bool ! adjacent_dr_p (struct data_reference *dr) { /* If this is a bitfield store bail out. */ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) Index: gcc/tree-loop-distribution.c =================================================================== *** gcc/tree-loop-distribution.c (revision 188233) --- gcc/tree-loop-distribution.c (working copy) *************** along with GCC; see the file COPYING3. *** 52,66 **** #include "tree-scalar-evolution.h" #include "tree-pass.h" ! enum partition_kind { PKIND_NORMAL, PKIND_MEMSET }; typedef struct partition_s { bitmap stmts; bool has_writes; enum partition_kind kind; ! /* Main statement a kind != PKIND_NORMAL partition is about. */ ! gimple main_stmt; } *partition_t; DEF_VEC_P (partition_t); --- 52,67 ---- #include "tree-scalar-evolution.h" #include "tree-pass.h" ! enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY }; typedef struct partition_s { bitmap stmts; bool has_writes; enum partition_kind kind; ! /* data-references a kind != PKIND_NORMAL partition is about. */ ! data_reference_p main_dr; ! data_reference_p secondary_dr; } *partition_t; DEF_VEC_P (partition_t); *************** generate_loops_for_partition (struct loo *** 313,352 **** free (bbs); } ! /* Build the size argument for a memset call. */ ! static inline tree ! build_size_arg_loc (location_t loc, tree nb_iter, tree op, ! gimple_seq *stmt_list) ! { ! gimple_seq stmts; ! tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node, ! fold_convert_loc (loc, size_type_node, nb_iter), ! fold_convert_loc (loc, size_type_node, ! TYPE_SIZE_UNIT (TREE_TYPE (op)))); ! x = force_gimple_operand (x, &stmts, true, NULL); ! gimple_seq_add_seq (stmt_list, stmts); ! return x; } /* Generate a call to memset for PARTITION in LOOP. */ static void ! generate_memset_builtin (struct loop *loop, struct graph *rdg, ! partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; ! tree op0, nb_iter, mem, fn, addr_base, nb_bytes; ! gimple_seq stmt_list = NULL, stmts; ! struct data_reference *dr = XCNEW (struct data_reference); location_t loc; tree val; ! stmt = partition->main_stmt; loc = gimple_location (stmt); - op0 = gimple_assign_lhs (stmt); if (gimple_bb (stmt) == loop->latch) nb_iter = number_of_latch_executions (loop); else --- 314,366 ---- free (bbs); } ! /* Build the size argument for a memory operation call. */ ! static tree ! build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter) ! { ! tree size; ! size = fold_build2_loc (loc, MULT_EXPR, sizetype, ! fold_convert_loc (loc, sizetype, nb_iter), ! TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); ! return fold_convert_loc (loc, size_type_node, size); ! } ! /* Build an address argument for a memory operation call. */ ! ! static tree ! build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) ! { ! tree addr_base; ! ! addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); ! addr_base = fold_convert_loc (loc, sizetype, addr_base); ! ! /* Test for a negative stride, iterating over every element. */ ! if (tree_int_cst_sgn (DR_STEP (dr)) == -1) ! { ! addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, ! fold_convert_loc (loc, sizetype, nb_bytes)); ! addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, ! TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); ! } ! ! return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); } /* Generate a call to memset for PARTITION in LOOP. */ static void ! generate_memset_builtin (struct loop *loop, partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; ! tree nb_iter, mem, fn, nb_bytes; location_t loc; tree val; ! stmt = DR_STMT (partition->main_dr); loc = gimple_location (stmt); if (gimple_bb (stmt) == loop->latch) nb_iter = number_of_latch_executions (loop); else *************** generate_memset_builtin (struct loop *lo *** 355,379 **** /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! dr = VEC_index (data_reference_p, ! RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, stmt)), 0); ! nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); ! addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); ! addr_base = fold_convert_loc (loc, sizetype, addr_base); ! ! /* Test for a negative stride, iterating over every element. */ ! if (tree_int_cst_sgn (DR_STEP (dr)) == -1) ! { ! addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, ! fold_convert_loc (loc, sizetype, nb_bytes)); ! addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, ! TYPE_SIZE_UNIT (TREE_TYPE (op0))); ! } ! ! addr_base = fold_build_pointer_plus_loc (loc, ! DR_BASE_ADDRESS (dr), addr_base); ! mem = force_gimple_operand (addr_base, &stmts, true, NULL); ! gimple_seq_add_seq (&stmt_list, stmts); /* This exactly matches the pattern recognition in classify_partition. */ val = gimple_assign_rhs1 (stmt); --- 369,380 ---- /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); ! nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, ! false, GSI_CONTINUE_LINKING); ! mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); ! mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, ! false, GSI_CONTINUE_LINKING); /* This exactly matches the pattern recognition in classify_partition. */ val = gimple_assign_rhs1 (stmt); *************** generate_memset_builtin (struct loop *lo *** 393,407 **** tree tem = create_tmp_reg (integer_type_node, NULL); tem = make_ssa_name (tem, NULL); cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); ! gimple_seq_add_stmt (&stmt_list, cstmt); val = tem; } } fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); ! gimple_seq_add_stmt (&stmt_list, fn_call); ! gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) { --- 394,407 ---- tree tem = create_tmp_reg (integer_type_node, NULL); tem = make_ssa_name (tem, NULL); cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); ! gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); val = tem; } } fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); ! gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) { *************** generate_memset_builtin (struct loop *lo *** 415,420 **** --- 415,468 ---- } } + /* Generate a call to memcpy for PARTITION in LOOP. */ + + static void + generate_memcpy_builtin (struct loop *loop, partition_t partition) + { + gimple_stmt_iterator gsi; + gimple stmt, fn_call; + tree nb_iter, dest, src, fn, nb_bytes; + location_t loc; + enum built_in_function kind; + + stmt = DR_STMT (partition->main_dr); + loc = gimple_location (stmt); + if (gimple_bb (stmt) == loop->latch) + nb_iter = number_of_latch_executions (loop); + else + nb_iter = number_of_exit_cond_executions (loop); + + /* The new statements will be placed before LOOP. */ + gsi = gsi_last_bb (loop_preheader_edge (loop)->src); + + nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); + nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); + src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); + if (ptr_derefs_may_alias_p (dest, src)) + kind = BUILT_IN_MEMMOVE; + else + kind = BUILT_IN_MEMCPY; + + dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + fn = build_fold_addr_expr (builtin_decl_implicit (kind)); + fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); + gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (kind == BUILT_IN_MEMCPY) + fprintf (dump_file, "generated memcpy\n"); + else + fprintf (dump_file, "generated memmove\n"); + } + } + /* Remove and destroy the loop LOOP. */ static void *************** destroy_loop (struct loop *loop) *** 466,478 **** /* Generates code for PARTITION. */ static void ! generate_code_for_partition (struct loop *loop, struct graph *rdg, partition_t partition, bool copy_p) { switch (partition->kind) { case PKIND_MEMSET: ! generate_memset_builtin (loop, rdg, partition); /* If this is the last partition for which we generate code, we have to destroy the loop. */ if (!copy_p) --- 514,534 ---- /* Generates code for PARTITION. */ static void ! generate_code_for_partition (struct loop *loop, partition_t partition, bool copy_p) { switch (partition->kind) { case PKIND_MEMSET: ! generate_memset_builtin (loop, partition); ! /* If this is the last partition for which we generate code, we have ! to destroy the loop. */ ! if (!copy_p) ! destroy_loop (loop); ! break; ! ! case PKIND_MEMCPY: ! generate_memcpy_builtin (loop, partition); /* If this is the last partition for which we generate code, we have to destroy the loop. */ if (!copy_p) *************** classify_partition (loop_p loop, struct *** 849,857 **** bitmap_iterator bi; unsigned i; tree nb_iter; partition->kind = PKIND_NORMAL; ! partition->main_stmt = NULL; if (!flag_tree_loop_distribute_patterns) return; --- 905,915 ---- bitmap_iterator bi; unsigned i; tree nb_iter; + data_reference_p single_load, single_store; partition->kind = PKIND_NORMAL; ! partition->main_dr = NULL; ! partition->secondary_dr = NULL; if (!flag_tree_loop_distribute_patterns) return; *************** classify_partition (loop_p loop, struct *** 880,889 **** } } ! /* Detect memset. */ EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) { gimple stmt = RDG_STMT (rdg, i); if (gimple_code (stmt) == GIMPLE_PHI) continue; --- 938,951 ---- } } ! /* Detect memset and memcpy. */ ! single_load = NULL; ! single_store = NULL; EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) { gimple stmt = RDG_STMT (rdg, i); + data_reference_p dr; + unsigned j; if (gimple_code (stmt) == GIMPLE_PHI) continue; *************** classify_partition (loop_p loop, struct *** 892,932 **** if (!gimple_vuse (stmt)) continue; ! /* Exactly one store. */ ! if (gimple_assign_single_p (stmt) ! && !is_gimple_reg (gimple_assign_lhs (stmt))) { ! tree rhs; ! if (partition->main_stmt != NULL) ! return; ! partition->main_stmt = stmt; ! rhs = gimple_assign_rhs1 (stmt); ! if (!(integer_zerop (rhs) ! || integer_all_onesp (rhs) ! || real_zerop (rhs) ! || (TREE_CODE (rhs) == CONSTRUCTOR ! && !TREE_CLOBBER_P (rhs)) ! || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) ! && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) ! == TYPE_MODE (unsigned_char_type_node))))) ! return; ! if (TREE_CODE (rhs) == SSA_NAME ! && !SSA_NAME_IS_DEFAULT_DEF (rhs) ! && flow_bb_inside_loop_p ! (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) ! return; ! if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1) ! return; ! if (!adjacent_store_dr_p (VEC_index (data_reference_p, ! RDG_DATAREFS (rdg, i), 0))) ! return; } - else - return; } ! if (partition->main_stmt != NULL) ! partition->kind = PKIND_MEMSET; } /* For a data reference REF, return the declaration of its base --- 954,1021 ---- if (!gimple_vuse (stmt)) continue; ! /* Otherwise just regular loads/stores. */ ! if (!gimple_assign_single_p (stmt)) ! return; ! ! /* But exactly one store and/or load. */ ! for (j = 0; ! VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j) { ! if (DR_IS_READ (dr)) ! { ! if (single_load != NULL) ! return; ! single_load = dr; ! } ! else ! { ! if (single_store != NULL) ! return; ! single_store = dr; ! } } } ! if (single_store && !single_load) ! { ! gimple stmt = DR_STMT (single_store); ! tree rhs = gimple_assign_rhs1 (stmt); ! if (!(integer_zerop (rhs) ! || integer_all_onesp (rhs) ! || real_zerop (rhs) ! || (TREE_CODE (rhs) == CONSTRUCTOR ! && !TREE_CLOBBER_P (rhs)) ! || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) ! && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) ! == TYPE_MODE (unsigned_char_type_node))))) ! return; ! if (TREE_CODE (rhs) == SSA_NAME ! && !SSA_NAME_IS_DEFAULT_DEF (rhs) ! && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) ! return; ! if (!adjacent_dr_p (single_store)) ! return; ! partition->kind = PKIND_MEMSET; ! partition->main_dr = single_store; ! } ! else if (single_store && single_load) ! { ! gimple store = DR_STMT (single_store); ! gimple load = DR_STMT (single_load); ! /* Direct aggregate copy or via an SSA name temporary. */ ! if (load != store ! && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) ! return; ! if (!adjacent_dr_p (single_store) ! || !adjacent_dr_p (single_load) ! || !operand_equal_p (DR_STEP (single_store), ! DR_STEP (single_load), 0)) ! return; ! partition->kind = PKIND_MEMCPY; ! partition->main_dr = single_store; ! partition->secondary_dr = single_load; ! } } /* For a data reference REF, return the declaration of its base *************** ldist_gen (struct loop *loop, struct gra *** 1259,1265 **** dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! generate_code_for_partition (loop, rdg, partition, i < nbp - 1); ldist_done: --- 1348,1354 ---- dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! generate_code_for_partition (loop, partition, i < nbp - 1); ldist_done: *************** tree_loop_distribution (void) *** 1392,1413 **** || is_gimple_reg (gimple_assign_lhs (stmt))) continue; - /* If we are only performing pattern detection restrict - what we try to distribute to stores from constants. */ - if (!flag_tree_loop_distribution) - { - tree rhs = gimple_assign_rhs1 (stmt); - if (!is_gimple_min_invariant (rhs) - && TREE_CODE (rhs) != CONSTRUCTOR - && TREE_CODE (rhs) != SSA_NAME) - continue; - if (TREE_CODE (rhs) == SSA_NAME - && !SSA_NAME_IS_DEFAULT_DEF (rhs) - && flow_bb_inside_loop_p - (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) - continue; - } - VEC_safe_push (gimple, heap, work_list, stmt); } } --- 1481,1486 ---- Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c (revision 0) --- gcc/testsuite/gcc.dg/tree-ssa/ldist-20.c (revision 0) *************** *** 0 **** --- 1,37 ---- + /* { dg-do compile } */ + /* { dg-options "-O2 -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ + + void foo(char *); + void my_memcpy (void *q, unsigned int n) + { + unsigned i; + char p[1024]; + for (i = 0; i < n; ++i) + ((char *)p)[i] = ((char *)q)[i]; + foo(p); + } + + struct S { int i; int j; }; + + void my_memcpy2 (void *q, unsigned int n) + { + unsigned i; + char p[1024]; + for (i = 0; i < n; ++i) + ((struct S *)p)[i] = ((struct S *)q)[i]; + foo(p); + } + + char p[1024]; + void my_memmove (unsigned int n) + { + unsigned i; + for (i = 0; i < n; ++i) + p[i] = p[i+1]; + foo(p); + } + + + /* { dg-final { scan-tree-dump-times "generated memcpy" 2 "ldist" } } */ + /* { dg-final { scan-tree-dump-times "generated memmove" 1 "ldist" } } */ + /* { dg-final { cleanup-tree-dump "ldist" } } */