This is the last TLC kind of patches for loop distribution. It moves more loop distribution specific code from tree-data-ref.c to tree-loop-distribution.c thereby simplifying it. It should now be reasonably easy and efficient to add more pattern recognitions (I was thinking about even BLAS-like loops, though for being more useful loop distribution first has to be extended to work on loop nests instead of only innermost loops).
Bootstrap and regtest running on x86_64-unknown-linux-gnu (as always with loop-distribution and pattern recognition enabled). Richard. 2012-06-04 Richard Guenther <rguent...@suse.de> * tree-data-ref.c (stores_from_loop): Remove. (stmt_with_adjacent_zero_store_dr_p): Likewise. (stores_zero_from_loop): Likewise. * tree-data-ref.h (stores_from_loop, stores_zero_from_loop, stmt_with_adjacent_zero_store_dr_p, stride_of_unit_type_p): Remove. (adjacent_store_dr_p): New function. * tree-loop-distribution.c (generate_memset_builtin): Pass the RDG, use the already available data-reference. (generate_code_for_partition): Pass down RDG. (classify_partition): Inline parts of the former stmt_with_adjacent_zero_store_dr_p here and use adjacent_store_dr_p. (ldist_gen): Remember if there was any detected builtin and do less work if not and flag_tree_loop_distribution is not set. (tree_loop_distribution): Inline and fuse stores_from_loop and stores_zero_from_loop here. Index: trunk/gcc/tree-data-ref.c =================================================================== *** trunk.orig/gcc/tree-data-ref.c 2012-06-04 13:47:31.000000000 +0200 --- trunk/gcc/tree-data-ref.c 2012-06-04 14:29:16.648737016 +0200 *************** free_rdg (struct graph *rdg) *** 5174,5261 **** free_graph (rdg); } - /* Initialize STMTS with all the statements of LOOP that contain a - store to memory. */ - - void - stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) - { - unsigned int i; - basic_block *bbs = get_loop_body_in_dom_order (loop); - - for (i = 0; i < loop->num_nodes; i++) - { - basic_block bb = bbs[i]; - gimple_stmt_iterator bsi; - - for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) - if (gimple_vdef (gsi_stmt (bsi))) - VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi)); - } - - free (bbs); - } - - /* Returns true when the statement at STMT is of the form "A[i] = 0" - that contains a data reference on its LHS with a stride of the same - size as its unit type. */ - - bool - stmt_with_adjacent_zero_store_dr_p (gimple stmt) - { - tree lhs, rhs; - bool res; - struct data_reference *dr; - - if (!stmt - || !gimple_vdef (stmt) - || !gimple_assign_single_p (stmt)) - return false; - - lhs = gimple_assign_lhs (stmt); - rhs = gimple_assign_rhs1 (stmt); - - /* If this is a bitfield store bail out. */ - if (TREE_CODE (lhs) == COMPONENT_REF - && DECL_BIT_FIELD (TREE_OPERAND (lhs, 1))) - return false; - - if (!(integer_zerop (rhs) || real_zerop (rhs))) - return false; - - dr = XCNEW (struct data_reference); - - DR_STMT (dr) = stmt; - DR_REF (dr) = lhs; - - res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)) - && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (lhs)); - - free_data_ref (dr); - return res; - } - - /* Initialize STMTS with all the statements of LOOP that contain a - store to memory of the form "A[i] = 0". */ - - void - stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) - { - unsigned int i; - basic_block bb; - gimple_stmt_iterator si; - gimple stmt; - basic_block *bbs = get_loop_body_in_dom_order (loop); - - for (i = 0; i < loop->num_nodes; i++) - for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) - if ((stmt = gsi_stmt (si)) - && stmt_with_adjacent_zero_store_dr_p (stmt)) - VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); - - free (bbs); - } - /* Determines whether the statement from vertex V of the RDG has a definition used outside the loop that contains this statement. */ --- 5174,5179 ---- Index: trunk/gcc/tree-data-ref.h =================================================================== *** trunk.orig/gcc/tree-data-ref.h 2012-06-04 13:47:31.000000000 +0200 --- trunk/gcc/tree-data-ref.h 2012-06-04 14:29:35.097736096 +0200 *************** index_in_loop_nest (int var, VEC (loop_p *** 609,629 **** return var_index; } - void stores_from_loop (struct loop *, VEC (gimple, heap) **); - void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **); bool rdg_defs_used_in_other_loops_p (struct graph *, int); - bool stmt_with_adjacent_zero_store_dr_p (gimple); ! /* Returns true when STRIDE is equal in absolute value to the size of ! the unit type of TYPE. */ static inline bool ! stride_of_unit_type_p (tree stride, tree type) { ! return (TREE_CODE (stride) == INTEGER_CST ! && tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride), ! stride), ! TYPE_SIZE_UNIT (type))); } /* In tree-data-ref.c */ --- 609,637 ---- return var_index; } bool rdg_defs_used_in_other_loops_p (struct graph *, int); ! /* Returns true when the data reference DR the form "A[i] = ..." ! with a stride equal to its unit type size. */ static inline bool ! adjacent_store_dr_p (struct data_reference *dr) { ! if (!DR_IS_WRITE (dr)) ! return false; ! ! /* If this is a bitfield store bail out. */ ! if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF ! && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) ! return false; ! ! if (!DR_STEP (dr) ! || TREE_CODE (DR_STEP (dr)) != INTEGER_CST) ! return false; ! ! return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (DR_STEP (dr)), ! DR_STEP (dr)), ! TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); } /* In tree-data-ref.c */ Index: trunk/gcc/tree-loop-distribution.c =================================================================== *** trunk.orig/gcc/tree-loop-distribution.c 2012-06-04 13:47:31.000000000 +0200 --- trunk/gcc/tree-loop-distribution.c 2012-06-04 14:32:54.637729199 +0200 *************** build_size_arg_loc (location_t loc, tree *** 323,329 **** /* Generate a call to memset for PARTITION in LOOP. */ static void ! generate_memset_builtin (struct loop *loop, partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; --- 323,330 ---- /* Generate a call to memset for PARTITION in LOOP. */ static void ! generate_memset_builtin (struct loop *loop, struct graph *rdg, ! partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; *************** generate_memset_builtin (struct loop *lo *** 331,337 **** gimple_seq stmt_list = NULL, stmts; struct data_reference *dr = XCNEW (struct data_reference); location_t loc; - bool res; stmt = partition->main_stmt; loc = gimple_location (stmt); --- 332,337 ---- *************** generate_memset_builtin (struct loop *lo *** 344,354 **** /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! DR_STMT (dr) = stmt; ! DR_REF (dr) = op0; ! res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)); ! gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); ! nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); addr_base = fold_convert_loc (loc, sizetype, addr_base); --- 344,351 ---- /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! dr = VEC_index (data_reference_p, ! RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, stmt)), 0); nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); addr_base = fold_convert_loc (loc, sizetype, addr_base); *************** generate_memset_builtin (struct loop *lo *** 374,381 **** if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "generated memset zero\n"); - - free_data_ref (dr); } /* Remove and destroy the loop LOOP. */ --- 371,376 ---- *************** destroy_loop (struct loop *loop) *** 429,441 **** /* Generates code for PARTITION. */ static void ! generate_code_for_partition (struct loop *loop, partition_t partition, ! bool copy_p) { switch (partition->kind) { case PKIND_MEMSET: ! generate_memset_builtin (loop, partition); /* If this is the last partition for which we generate code, we have to destroy the loop. */ if (!copy_p) --- 424,436 ---- /* Generates code for PARTITION. */ static void ! generate_code_for_partition (struct loop *loop, struct graph *rdg, ! partition_t partition, bool copy_p) { switch (partition->kind) { case PKIND_MEMSET: ! generate_memset_builtin (loop, rdg, partition); /* If this is the last partition for which we generate code, we have to destroy the loop. */ if (!copy_p) *************** classify_partition (loop_p loop, struct *** 865,880 **** if (gimple_assign_single_p (stmt) && !is_gimple_reg (gimple_assign_lhs (stmt))) { if (partition->main_stmt != NULL) return; partition->main_stmt = stmt; } else return; } ! if (partition->main_stmt != NULL ! && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt)) partition->kind = PKIND_MEMSET; } --- 860,883 ---- if (gimple_assign_single_p (stmt) && !is_gimple_reg (gimple_assign_lhs (stmt))) { + tree rhs; if (partition->main_stmt != NULL) return; partition->main_stmt = stmt; + rhs = gimple_assign_rhs1 (stmt); + if (!(integer_zerop (rhs) || real_zerop (rhs))) + return; + if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1) + return; + if (!adjacent_store_dr_p (VEC_index (data_reference_p, + RDG_DATAREFS (rdg, i), 0))) + return; } else return; } ! if (partition->main_stmt != NULL) partition->kind = PKIND_MEMSET; } *************** ldist_gen (struct loop *loop, struct gra *** 1095,1100 **** --- 1098,1104 ---- VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3); partition_t partition; bitmap processed = BITMAP_ALLOC (NULL); + bool any_builtin; remaining_stmts = BITMAP_ALLOC (NULL); upstream_mem_writes = BITMAP_ALLOC (NULL); *************** ldist_gen (struct loop *loop, struct gra *** 1129,1136 **** processed); BITMAP_FREE (processed); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! classify_partition (loop, rdg, partition); /* If we are only distributing patterns fuse all partitions that were not properly classified as builtins. Else fuse partitions --- 1133,1144 ---- processed); BITMAP_FREE (processed); + any_builtin = false; FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! { ! classify_partition (loop, rdg, partition); ! any_builtin |= partition_builtin_p (partition); ! } /* If we are only distributing patterns fuse all partitions that were not properly classified as builtins. Else fuse partitions *************** ldist_gen (struct loop *loop, struct gra *** 1138,1143 **** --- 1146,1157 ---- if (!flag_tree_loop_distribution) { partition_t into; + /* If we did not detect any builtin simply bail out. */ + if (!any_builtin) + { + nbp = 0; + goto ldist_done; + } for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i) if (!partition_builtin_p (into)) break; *************** ldist_gen (struct loop *loop, struct gra *** 1197,1203 **** dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! generate_code_for_partition (loop, partition, i < nbp - 1); ldist_done: --- 1211,1217 ---- dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! generate_code_for_partition (loop, rdg, partition, i < nbp - 1); ldist_done: *************** tree_loop_distribution (void) *** 1301,1308 **** --- 1315,1324 ---- FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) { VEC (gimple, heap) *work_list = NULL; + basic_block *bbs; int num = loop->num; int nb_generated_loops = 0; + unsigned int i; /* If the loop doesn't have a single exit we will fail anyway, so do that early. */ *************** tree_loop_distribution (void) *** 1313,1325 **** if (loop->num_nodes > 2) continue; ! /* -ftree-loop-distribution strictly distributes more but also ! enables pattern detection. For now simply distribute all stores ! or memset like stores. */ ! if (flag_tree_loop_distribution) ! stores_from_loop (loop, &work_list); ! else if (flag_tree_loop_distribute_patterns) ! stores_zero_from_loop (loop, &work_list); if (VEC_length (gimple, work_list) > 0) nb_generated_loops = distribute_loop (loop, work_list); --- 1329,1359 ---- if (loop->num_nodes > 2) continue; ! /* Initialize the worklist with stmts we seed the partitions with. */ ! bbs = get_loop_body_in_dom_order (loop); ! for (i = 0; i < loop->num_nodes; ++i) ! { ! gimple_stmt_iterator gsi; ! for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) ! { ! gimple stmt = gsi_stmt (gsi); ! /* Only distribute stores for now. ! ??? We should also try to distribute scalar reductions, ! thus SSA defs that have scalar uses outside of the loop. */ ! if (!gimple_assign_single_p (stmt) ! || is_gimple_reg (gimple_assign_lhs (stmt))) ! continue; ! ! /* If we are only performing pattern detection restrict ! what we try to distribute to stores from constants. */ ! if (!flag_tree_loop_distribution ! && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt))) ! continue; ! ! VEC_safe_push (gimple, heap, work_list, stmt); ! } ! } ! free (bbs); if (VEC_length (gimple, work_list) > 0) nb_generated_loops = distribute_loop (loop, work_list);