Hi Richard, When you have time to review the patch in detail, you might prefer to look at this one instead. I've been working on the stage2 code in the meanwhile, and ended up factoring a bunch of the stage1 code for reuse. I applied the same factorization to the stage1 code so that eventually it will be easier to review differences between stage1 and stage2.
Thanks, Bill 2011-11-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> gcc: * tree-pass.h (pass_strength_reduction): New declaration. * timevar.def (TV_TREE_SLSR): New time-var. * tree-ssa-strength-reduction.c: New file. * Makefile.in: New dependences. * passes.c (init_optimization_passes): Add new pass. gcc/testsuite: * gcc.dg/tree-ssa/slsr-1.c: New test case. * gcc.dg/tree-ssa/slsr-2.c: New test case. * gcc.dg/tree-ssa/slsr-3.c: New test case. * gcc.dg/tree-ssa/slsr-4.c: New test case. Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 181442) +++ gcc/tree-pass.h (working copy) @@ -455,6 +455,7 @@ extern struct gimple_opt_pass pass_tm_memopt; extern struct gimple_opt_pass pass_tm_edges; extern struct gimple_opt_pass pass_split_functions; extern struct gimple_opt_pass pass_feedback_split_functions; +extern struct gimple_opt_pass pass_strength_reduction; /* IPA Passes */ extern struct simple_ipa_opt_pass pass_ipa_lower_emutls; Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +extern void foo (int); + +void +f (int *p, unsigned int n) +{ + foo (*(p + n * 4)); + foo (*(p + 32 + n * 4)); + if (n > 3) + foo (*(p + 16 + n * 4)); + else + foo (*(p + 48 + n * 4)); +} + +/* { dg-final { scan-tree-dump-times "\\+ 128" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 64" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 192" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c (revision 0) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +extern void foo (int); + +void +f (int *p, int n) +{ + foo (*(p + n++ * 4)); + foo (*(p + 32 + n++ * 4)); + foo (*(p + 16 + n * 4)); +} + +/* { dg-final { scan-tree-dump-times "\\+ 144" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 96" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +foo (int a[], int b[], int i) +{ + a[i] = b[i] + 2; + i++; + a[i] = b[i] + 2; + i++; + a[i] = b[i] + 2; + i++; + a[i] = b[i] + 2; + i++; + return i; +} + +/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 4" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 8" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 12" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c (revision 0) @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-slsr -fdump-tree-optimized" } */ + +void foo (int); + +int +f (int i) +{ + int x, y; + + x = i * 4; + y = x * 10; + foo (y); + + i = i + 5; + x = i * 4; + y = x * 10; + foo (y); + + i = i - 4; + x = i * 4; + y = x * 10; + foo (y); +} + +/* { dg-final { scan-tree-dump-times "\\* 4" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\* 10" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 20;" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\- 16;" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\- 160" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\* 10" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 40" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "slsr" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 181442) +++ gcc/timevar.def (working copy) @@ -253,6 +253,7 @@ DEFTIMEVAR (TV_TREE_IFCOMBINE , "tree if-co DEFTIMEVAR (TV_TREE_UNINIT , "uninit var analysis") DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization") DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution") +DEFTIMEVAR (TV_TREE_SLSR , "straight-line strength reduction") /* Everything else in rest_of_compilation not included above. */ DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes") Index: gcc/tree-ssa-strength-reduction.c =================================================================== --- gcc/tree-ssa-strength-reduction.c (revision 0) +++ gcc/tree-ssa-strength-reduction.c (revision 0) @@ -0,0 +1,851 @@ +/* Straight-line strength reduction. + Copyright (C) 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* There are many algorithms for performing strength reduction on + loops. This is not one of them. IVOPTS handles strength reduction + of induction variables just fine. This pass is intended to pick + up the crumbs it leaves behind, by considering opportunities for + strength reduction along dominator paths. + + Strength reduction will be implemented in four stages, gradually + adding more complex candidates: + + 1) Explicit multiplies, known constant multipliers, no + conditional increments. (complete) + 2) Explicit multiplies, unknown constant multipliers, + no conditional increments. + 3) Explicit multiplies, conditional increments. + 4) Implicit multiplies in addressing expressions. + + It would also be possible to apply strength reduction to divisions + and modulos, but such opportunities are relatively uncommon. + + Strength reduction is also currently restricted to integer operations. + If desired, it could be extended to floating-point operations under + control of something like -funsafe-math-optimizations. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "gimple.h" +#include "basic-block.h" +#include "tree-pass.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-pretty-print.h" +#include "gimple-pretty-print.h" +#include "alloc-pool.h" +#include "tree-flow.h" + +/* Index into the candidate vector, offset by 1. VECs are zero-based, + while cand_idx's are one-based, with zero indicating null. */ + +typedef unsigned cand_idx; + +/* Information about a strength reduction candidate. There will be two + forms of candidates that we will handle, named for the last instruction + in the sequence that we hope to replace. In stage 1, only multiply + candidates are handled. + + A "multiply" candidate is a statement S1 of the expanded form + + S1: LHS = (B + c1) * M, + + where B is an SSA name, c1 is a constant that may be zero, and + M is either an SSA name or a nonzero constant. A multiply + candidate may only be strength reduced if a previous "basis" + statement + + S0: S = (B' + c0) * M + + of the same form exists previously in the same block or in a + dominator. Assuming replacement is profitable, there are two + cases: + + (1) If B = B', then S1 can be replaced with: + + S1': LHS = S + (c1 - c0) * M, + + where (c1 - c0) * M is folded to the extent possible. + + (2) If B differs from B', then we require that B = B' + c0, + and S1 can be replaced with: + + S1': LHS = S + c1 * M, + + where c1 * M is folded if possible. + + Candidate records are allocated from an allocation pool. They are + addressed both from a hash table keyed on S1, and from a vector of + candidate pointers arranged in predominator order. */ + +typedef struct slsr_cand_d +{ + /* The candidate statement S1. */ + gimple cand_stmt; + + /* The base SSA name B. */ + tree base_name; + + /* The stride M. */ + tree stride; + + /* The index constant c1. */ + double_int index; + + /* Index of this candidate in the candidate vector. */ + cand_idx cand_num; + + /* Index of the basis statement S0, if any, in the candidate vector. */ + cand_idx basis; + + /* First candidate for which this candidate is a basis, if one exists. */ + cand_idx dependent; + + /* Next candidate having the same basis as this one. */ + cand_idx sibling; + + /* If this is a conditional candidate, the defining PHI statement + for the base SSA name B. */ + gimple def_phi; + + /* Access to the statement for subsequent modification. Cached to + save compile time. */ + gimple_stmt_iterator cand_gsi; + +} slsr_cand, *slsr_cand_t; + +typedef const struct slsr_cand_d *const_slsr_cand_t; + +/* Candidates are maintained in a vector. If candidate X dominates + candidate Y, then X appears before Y in the vector; but the + converse does not necessarily hold. */ +DEF_VEC_P (slsr_cand_t); +DEF_VEC_ALLOC_P (slsr_cand_t, heap); +static VEC (slsr_cand_t, heap) *cand_vec; + +/* Hash table embodying a mapping from statements to candidates. */ +static htab_t stmt_cand_map; + +/* Allocation pool for candidates. */ +static alloc_pool cand_pool; + + +/* Produce a pointer to the IDX'th candidate in the candidate vector. */ + +static slsr_cand_t +lookup_cand (cand_idx idx) +{ + return VEC_index (slsr_cand_t, cand_vec, idx - 1); +} + +/* Callback to produce a hash value for a candidate. */ + +static hashval_t +stmt_cand_hash (const void *p) +{ + return htab_hash_pointer (((const_slsr_cand_t)p)->cand_stmt); +} + +/* Callback when an element is removed from the hash table. + We never remove entries until the entire table is released. */ + +static void +stmt_cand_free (void *p ATTRIBUTE_UNUSED) +{ +} + +/* Callback to return true if two candidates are equal. */ + +static int +stmt_cand_eq (const void *p1, const void *p2) +{ + const_slsr_cand_t const cand1 = (const_slsr_cand_t)p1; + const_slsr_cand_t const cand2 = (const_slsr_cand_t)p2; + return (cand1->cand_stmt == cand2->cand_stmt); +} + +/* Return TRUE if GS is a statement that defines an SSA name from + a NOP_EXPR and is legal for us to combine an add and multiply + across. This is legitimate for casts from a signed type to + a signed or unsigned type of the same or larger size. It is not + legitimate to convert any unsigned type to a signed type, or + to an unsigned type of a different size. + + The reasoning here is that signed integer overflow is undefined, + so any program that was expecting overflow that no longer occurs + is not correct. Unsigned integers, however, have wrap semantics, + and it is reasonable for programs to assume an overflowing add + will wrap. */ + +static bool +legal_cast_p (gimple gs) +{ + tree lhs, rhs; + unsigned src_size, dst_size, src_uns, dst_uns; + + if (!is_gimple_assign (gs) + || TREE_CODE (gimple_assign_lhs (gs)) != SSA_NAME + || gimple_assign_rhs_code (gs) != NOP_EXPR) + return false; + + lhs = gimple_assign_lhs (gs); + rhs = gimple_assign_rhs1 (gs); + + if (TREE_CODE (rhs) != SSA_NAME) + return false; + + src_size = TYPE_PRECISION (TREE_TYPE (SSA_NAME_VAR (rhs))); + src_uns = TYPE_UNSIGNED (TREE_TYPE (SSA_NAME_VAR (rhs))); + dst_size = TYPE_PRECISION (TREE_TYPE (SSA_NAME_VAR (lhs))); + dst_uns = TYPE_UNSIGNED (TREE_TYPE (SSA_NAME_VAR (lhs))); + + if (dst_size < src_size) + return false; + + if (src_uns && !dst_uns) + return false; + + if (src_uns && dst_uns && src_size != dst_size) + return false; + + return true; +} + +/* If GS is a statement that defines an SSA name from a NOP_EXPR or + CONVERT_EXPR and is legal for our purposes (see legal_cast_p), + return the source SSA name. */ + +static tree +source_of_legal_cast (gimple gs) +{ + if (legal_cast_p (gs)) + return gimple_assign_rhs1 (gs); + + return NULL_TREE; +} + +/* If GS is a statement that defines an SSA name from a NOP_EXPR or + CONVERT_EXPR and is legal for our purposes (see legal_cast_p), + return the target SSA name. */ + +static tree +target_of_legal_cast (gimple gs) +{ + if (legal_cast_p (gs)) + return gimple_assign_lhs (gs); + + return NULL_TREE; +} + +/* Return TRUE if GS consists of an add or subtract of BASE_NAME + with a constant, with the result placed in an SSA name. GS is + known to be a gimple assignment. CODE is the RHS code for GS. */ + +static bool +stmt_adds_base_to_const (gimple gs, enum tree_code code, tree base_name) +{ + return (TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME + && (code == PLUS_EXPR || code == MINUS_EXPR) + && operand_equal_p (gimple_assign_rhs1 (gs), base_name, 0) + && TREE_CODE (gimple_assign_rhs2 (gs)) == INTEGER_CST); +} + +/* Recursive helper for find_basis_for_candidate. To find a + possible basis, first try the immediate uses of the given + BASE_NAME. If a particular use doesn't appear in a multiply, + see if it appears in an add that feeds one or more multiplies. + Recurse using the SSA name defined on the add. Each potential + basis found in this manner must also appear in a block that + dominates the candidate statement, be present in the candidate + table, and have the same stride M. If more than one possible + basis exists, pick the one with highest index in the vector, + which will be the most immediately dominating basis. */ + +static slsr_cand_t +find_basis_for_mul_candidate (slsr_cand_t c, tree base_name, slsr_cand_t basis) +{ + imm_use_iterator use_iter; + use_operand_p use_p; + + FOR_EACH_IMM_USE_FAST (use_p, use_iter, base_name) + { + gimple use_stmt = USE_STMT (use_p); + slsr_cand_t use_basis = NULL; + enum tree_code code; + tree cast_target; + + if (!is_gimple_assign (use_stmt)) + continue; + + /* When revising a candidate's basis, make sure not to pick itself. */ + if (c->cand_stmt == use_stmt) + continue; + + /* Look through casts where legal. */ + cast_target = target_of_legal_cast (use_stmt); + if (cast_target) + return find_basis_for_mul_candidate (c, cast_target, basis); + + /* If the use statement is a multiply, it's a potential basis. + Otherwise, we have to look at the uses of the use statement + to find any multiplies that may be potential bases. */ + code = gimple_assign_rhs_code (use_stmt); + + if (code == MULT_EXPR) + { + slsr_cand mapping_key; + mapping_key.cand_stmt = use_stmt; + use_basis = (slsr_cand_t)htab_find (stmt_cand_map, &mapping_key); + } + + /* Recurse if this is an add of a constant that might feed a + multiply. */ + else if (stmt_adds_base_to_const (use_stmt, code, base_name)) + { + tree lhs = gimple_assign_lhs (use_stmt); + use_basis = find_basis_for_mul_candidate (c, lhs, basis); + } + + /* A potential basis must dominate the candidate. */ + if (!use_basis + || !operand_equal_p (c->stride, use_basis->stride, 0) + || !dominated_by_p (CDI_DOMINATORS, + gimple_bb (c->cand_stmt), + gimple_bb (use_stmt))) + continue; + + /* When revising a candidate's basis, be careful not to pick + a basis that occurs after the candidate. */ + if (use_basis->cand_num > c->cand_num) + continue; + + if (!basis || basis->cand_num < use_basis->cand_num) + basis = use_basis; + } + + return basis; +} + +/* Look for the nearest dominating statement in the candidate + vector that can serve as a basis for the new CANDIDATE. If + found, adjust the dependent links for the basis entry and + return the index of the basis entry. Otherwise return zero. */ + +static unsigned int +find_basis_for_candidate (slsr_cand_t c) +{ + slsr_cand_t basis = find_basis_for_mul_candidate (c, c->base_name, NULL); + + if (basis) + { + c->sibling = basis->dependent; + basis->dependent = c->cand_num; + return basis->cand_num; + } + + return 0; +} + +/* Starting with statement GS, look backwards through any + intervening legal casts to find one or more + adds of an SSA name and a constant. Return the earliest + SSA name in the chain as *BASE, and the sum of all constants + in the chain as INDEX. */ + +static void +combine_feeding_adds (gimple gs, tree *base, double_int *index) +{ + double_int new_index; + tree cast_source; + + while ((cast_source = source_of_legal_cast (gs))) + { + gs = SSA_NAME_DEF_STMT (cast_source); + *base = cast_source; + } + + /* If there aren't any more adds, we're done. */ + if (!is_gimple_assign (gs) + || (gimple_assign_rhs_code (gs) != PLUS_EXPR + && gimple_assign_rhs_code (gs) != MINUS_EXPR) + || TREE_CODE (gimple_assign_rhs1 (gs)) != SSA_NAME + || TREE_CODE (gimple_assign_rhs2 (gs)) != INTEGER_CST) + return; + + *base = gimple_assign_rhs1 (gs); + new_index = tree_to_double_int (gimple_assign_rhs2 (gs)); + + if (gimple_assign_rhs_code (gs) == MINUS_EXPR) + new_index = double_int_neg (new_index); + + *index = double_int_add (*index, new_index); + combine_feeding_adds (SSA_NAME_DEF_STMT (*base), base, index); +} + +/* Allocate storage for a new candidate and initialize its fields. + Attempt to find a basis for the candidate. */ + +static slsr_cand_t +alloc_cand_and_find_basis (gimple gs, tree base, tree stride, + double_int index, gimple_stmt_iterator gsi) +{ + slsr_cand_t c = (slsr_cand_t)pool_alloc (cand_pool); + c->cand_stmt = gs; + c->base_name = base; + c->stride = stride; + c->index = index; + c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1; + c->dependent = 0; + c->sibling = 0; + c->def_phi = NULL; + c->cand_gsi = gsi; + c->basis = find_basis_for_candidate (c); + return c; +} + +/* Given statement GS containing an integer multiply, determine + whether it's a possible strength-reduction candidate. If so, + add it to the candidate vector and the statement-candidate + mapping. */ + +static void +process_possible_mul_candidate (gimple_stmt_iterator gsi, gimple gs) +{ + tree rhs1 = gimple_assign_rhs1 (gs); + tree rhs2 = gimple_assign_rhs2 (gs); + tree base; + double_int index; + slsr_cand_t c; + void **slot; + + /* TODO: Unknown constant multipliers will be dealt with in + stage 2. */ + if (TREE_CODE (rhs2) != INTEGER_CST) + return; + + /* If RHS1 isn't fed by an addition or subtraction of an SSA_NAME + and a constant, then treat it as an add of zero. Look through + legal casts and combine multiple chained adds to find + the true base name and index. */ + gcc_assert (TREE_CODE (rhs1) == SSA_NAME); + base = rhs1; + index = double_int_zero; + combine_feeding_adds (SSA_NAME_DEF_STMT (rhs1), &base, &index); + + /* Allocate and initialize the candidate, and attempt to find + a basis for it. */ + c = alloc_cand_and_find_basis (gs, base, rhs2, index, gsi); + + VEC_safe_push (slsr_cand_t, heap, cand_vec, c); + + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; +} + +/* Find strength-reduction candidates in block BB. */ + +static void +find_candidates_in_block (basic_block bb) +{ + gimple_stmt_iterator gsi; + basic_block son; + + /* Process this block. */ + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple gs = gsi_stmt (gsi); + if (is_gimple_assign (gs) + && gimple_assign_rhs_code (gs) == MULT_EXPR + && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) + process_possible_mul_candidate (gsi, gs); + } + + /* Process dominated blocks. */ + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + find_candidates_in_block (son); +} + +/* Dump a candidate for debug. */ + +static void +dump_candidate (slsr_cand_t c) +{ + fprintf (dump_file, "%3d [%d] ", c->cand_num, + gimple_bb (c->cand_stmt)->index); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + fputs (" base: ", dump_file); + print_generic_expr (dump_file, c->base_name, 0); + fputs ("\n index: ", dump_file); + dump_double_int (dump_file, c->index, false); + fputs ("\n stride: ", dump_file); + print_generic_expr (dump_file, c->stride, 0); + fprintf (dump_file, "\n basis: %3d", c->basis); + fprintf (dump_file, "\n dependent: %3d", c->dependent); + fprintf (dump_file, "\n sibling: %3d", c->sibling); + fputs ("\n phi: ", dump_file); + if (c->def_phi) + print_gimple_stmt (dump_file, c->def_phi, 0, 0); + else + fputs ("n/a", dump_file); + fputs ("\n\n", dump_file); +} + +/* Dump the candidate vector for debug. */ + +static void +dump_cand_vec (void) +{ + unsigned i; + slsr_cand_t c; + + fprintf (dump_file, "\nStrength reduction candidate vector:\n\n"); + + FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c) + dump_candidate (c); +} + +/* Recursive helper for unconditional_cands_with_known_stride_p. + Returns TRUE iff C, its siblings, and its dependents are all + unconditional candidates. */ + +static bool +unconditional_cands (slsr_cand_t c) +{ + if (c->def_phi) + return false; + + if (c->sibling && !unconditional_cands (lookup_cand (c->sibling))) + return false; + + if (c->dependent && !unconditional_cands (lookup_cand (c->dependent))) + return false; + + return true; +} + +/* Determine whether or not the tree of candidates rooted at + ROOT consists entirely of unconditional increments with + an INTEGER_CST stride. */ + +static bool +unconditional_cands_with_known_stride_p (slsr_cand_t root) +{ + /* The stride is identical for all related candidates, so + check it once. */ + if (TREE_CODE (root->stride) != INTEGER_CST) + return false; + + return unconditional_cands (lookup_cand (root->dependent)); +} + +/* GS is an add or subtract that used to be a multiply. Follow + its immediate uses to update the candidate table accordingly. */ + +static void +update_chained_candidates (gimple gs) +{ + tree base_name = gimple_assign_lhs (gs); + imm_use_iterator use_iter; + use_operand_p use_p; + tree cast_target; + + FOR_EACH_IMM_USE_FAST (use_p, use_iter, base_name) + { + gimple use_stmt = USE_STMT (use_p); + enum tree_code code; + slsr_cand_t c; + + if (!is_gimple_assign (use_stmt)) + continue; + + /* Look forward through converts that don't change semantics. */ + cast_target = target_of_legal_cast (use_stmt); + if (cast_target) + { + update_chained_candidates (use_stmt); + continue; + } + + code = gimple_assign_rhs_code (use_stmt); + + /* Case 1: The name defined by the add appears in a + multiply that exists in the candidate table. */ + if (code == MULT_EXPR) + { + slsr_cand mapping_key; + tree base; + double_int index; + + mapping_key.cand_stmt = use_stmt; + if (!(c = (slsr_cand_t)htab_find (stmt_cand_map, &mapping_key))) + continue; + + base = base_name; + index = double_int_zero; + combine_feeding_adds (gs, &base, &index); + c->base_name = base; + c->index = index; + + if (!c->basis) + c->basis = find_basis_for_candidate (c); + else + /* If all has worked correctly, the modified candidate has + the same basis as before. */ + gcc_assert (lookup_cand (c->basis) + == find_basis_for_mul_candidate (c, base, NULL)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("\nRevised candidate:\n", dump_file); + dump_candidate (c); + } + } + + /* Case 2: The name defined by the add appears in another + add with a constant, which in turn feeds one or more multiplies + in the candidate table. Recurse to find the multiplies. */ + else if (stmt_adds_base_to_const (use_stmt, code, base_name)) + update_chained_candidates (use_stmt); + } +} + +/* Calculate the increment required for candidate C relative to + its basis. */ + +static double_int +cand_increment (slsr_cand_t c) +{ + slsr_cand_t basis = lookup_cand (c->basis); + + if (operand_equal_p (c->base_name, basis->base_name, 0)) + return double_int_sub (c->index, basis->index); + else + return c->index; +} + +/* Insert a NOP_EXPR converting NAME to TO_TYPE, prior to + the statement referenced by candidate C. */ + +static tree +convert_name (tree name, slsr_cand_t c, tree to_type) +{ + tree new_name = create_tmp_reg (to_type, "slsr"); + gimple nop_stmt; + add_referenced_var (new_name); + new_name = make_ssa_name (new_name, NULL); + nop_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_name, + name, NULL_TREE); + gimple_set_location (nop_stmt, gimple_location (c->cand_stmt)); + gsi_insert_before (&c->cand_gsi, nop_stmt, GSI_SAME_STMT); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Inserting: ", dump_file); + print_gimple_stmt (dump_file, nop_stmt, 0, 0); + } + + return new_name; +} + +/* Replace candidate C, each sibling of candidate C, and each + dependent of candidate C with an add or subtract. */ + +static void +replace_dependents (slsr_cand_t c) +{ + slsr_cand_t basis = lookup_cand (c->basis); + tree basis_name = gimple_assign_lhs (basis->cand_stmt); + tree incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt)); + double_int stride = tree_to_double_int (c->stride); + double_int increment = double_int_mul (cand_increment (c), stride); + + /* It is highly unlikely, but possible, that the resulting + increment doesn't fit in a HWI. Abandon the replacement + in this case. This does not affect siblings or dependents + of C. */ + /* Restriction to signed HWI is conservative for unsigned types + but allows for safe negation without twisted logic. */ + if (double_int_fits_in_shwi_p (increment)) + { + enum tree_code code = PLUS_EXPR; + tree orig_type + = TREE_TYPE (SSA_NAME_VAR (gimple_assign_rhs1 (c->cand_stmt))); + tree repl_type = TREE_TYPE (SSA_NAME_VAR (basis_name)); + tree incr_tree; + + if (double_int_negative_p (increment)) + { + code = MINUS_EXPR; + increment = double_int_neg (increment); + } + + incr_tree = double_int_to_tree (incr_type, increment); + + /* A widening cast may be necessary. */ + if (orig_type != repl_type) + basis_name = convert_name (basis_name, c, orig_type); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Replacing: ", dump_file); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + } + + gimple_assign_set_rhs_with_ops (&c->cand_gsi, code, + basis_name, incr_tree); + update_stmt (c->cand_stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("With: ", dump_file); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + fputs ("\n", dump_file); + } + + /* The multiply we just converted to an add might well feed + into one or more other multiplies in the candidate table. + If so, we need to update those candidate entries. */ + update_chained_candidates (c->cand_stmt); + } + + if (c->sibling) + replace_dependents (lookup_cand (c->sibling)); + + if (c->dependent) + replace_dependents (lookup_cand (c->dependent)); +} + +/* Analyze costs of related candidates in the candidate vector, + and make beneficial replacements. */ + +static void +analyze_candidates_and_replace (void) +{ + unsigned i; + slsr_cand_t c; + + /* Each candidate that has a null basis and a non-null + dependent is the root of a tree of related statements. + Analyze each tree to determine a subset of those + statements that can be replaced with maximum benefit. */ + FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c) + { + slsr_cand_t first_dep; + + if (c->basis != 0 || c->dependent == 0) + continue; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n", + c->cand_num); + + first_dep = lookup_cand (c->dependent); + + /* If the common stride of all related candidates is a + known constant, and none of these has a phi-dependence, + then all replacements are considered profitable. + Each replaces a multiply by a single add, with the + possibility that a feeding add also goes dead as a + result. */ + if (unconditional_cands_with_known_stride_p (c)) + replace_dependents (first_dep); + + /* TODO: When the stride is an SSA name, it may still + be profitable to replace some or all of the dependent + candidates, depending on whether the introduced increments + can be reused. */ + + /* TODO: When conditional increments occur so that a + candidate is dependent upon a phi-basis, the cost of + introducing a temporary must be accounted for. */ + + /* TODO: Strength reduction candidates hidden in + addressing expressions are not yet handled, and will + have more complex cost functions. */ + } +} + +/* Main entry point for straight-line strength reduction. */ + +static unsigned +execute_strength_reduction (void) +{ + /* Create the allocation pool where candidates will reside. */ + cand_pool = create_alloc_pool ("Strength reduction pool", + sizeof (slsr_cand), 128); + + /* Allocate the candidate vector. */ + cand_vec = VEC_alloc (slsr_cand_t, heap, 128); + + /* Allocate the mapping from statements to candidate indices. */ + stmt_cand_map = htab_create (500, stmt_cand_hash, + stmt_cand_eq, stmt_cand_free); + + /* Initialize the loop optimizer. We need to detect flow across + back edges, and this gives us dominator information as well. */ + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + + /* Walk the CFG in predominator order looking for strength reduction + candidates. */ + find_candidates_in_block (ENTRY_BLOCK_PTR); + + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_cand_vec (); + + /* Analyze costs and make appropriate replacements. */ + analyze_candidates_and_replace (); + + /* Free resources. */ + loop_optimizer_finalize (); + htab_delete (stmt_cand_map); + VEC_free (slsr_cand_t, heap, cand_vec); + free_alloc_pool (cand_pool); + + return 0; +} + +static bool +gate_strength_reduction (void) +{ + return optimize > 0; +} + +struct gimple_opt_pass pass_strength_reduction = +{ + { + GIMPLE_PASS, + "slsr", /* name */ + gate_strength_reduction, /* gate */ + execute_strength_reduction, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_SLSR, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_ssa /* todo_flags_finish */ + } +}; Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 181442) +++ gcc/Makefile.in (working copy) @@ -1416,6 +1416,7 @@ OBJS = \ tree-ssa-reassoc.o \ tree-ssa-sccvn.o \ tree-ssa-sink.o \ + tree-ssa-strength-reduction.o \ tree-ssa-strlen.o \ tree-ssa-structalias.o \ tree-ssa-tail-merge.o \ @@ -2421,6 +2422,10 @@ tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H) alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) langhooks.h $(HASHTAB_H) $(GIMPLE_H) \ $(TREE_INLINE_H) tree-iterator.h tree-ssa-propagate.h tree-ssa-sccvn.h \ $(PARAMS_H) tree-pretty-print.h gimple-pretty-print.h gimple-fold.h +tree-ssa-strength-reduction.o : tree-ssa-strength-reduction.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TREE_H) $(GIMPLE_H) $(BASIC_BLOCK_H) \ + $(TREE_PASS_H) $(TIMEVAR_H) $(CFGLOOP_H) tree-pretty-print.h alloc-pool.h \ + $(TREE_FLOW_H) tree-vrp.o : tree-vrp.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(GGC_H) \ $(BASIC_BLOCK_H) tree-ssa-propagate.h $(FLAGS_H) $(TREE_DUMP_H) \ Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 181442) +++ gcc/passes.c (working copy) @@ -1371,6 +1371,7 @@ init_optimization_passes (void) } NEXT_PASS (pass_lower_vector_ssa); NEXT_PASS (pass_cse_reciprocals); + NEXT_PASS (pass_strength_reduction); NEXT_PASS (pass_reassoc); NEXT_PASS (pass_vrp); NEXT_PASS (pass_dominator);