On Fri, 3 Jul 2015, Marek Polacek wrote: > This patch implements a new pass, called laddress, which deals with > lowering ADDR_EXPR assignments. Such lowering ought to help the > vectorizer, but it also could expose more CSE opportunities, maybe > help reassoc, etc. It's only active when optimize != 0. > > So e.g. > _1 = (sizetype) i_9; > _7 = _1 * 4; > _4 = &b + _7; > instead of > _4 = &b[i_9]; > > This triggered 14105 times during the regtest and 6392 times during > the bootstrap. > > The fallout (at least on x86_64) is surprisingly small, i.e. none, just > gcc.dg/vect/pr59984.c test (using -fopenmp-simd) ICEs, but that is due > to a bug in the vectorizer. Jakub has a patch and knows the details. > As the test shows, we're now able to vectorize ADDR_EXPR of non-invariants > (that was the motivation of this pass). > > This doesn't introduce any kind of verification nor PROP_laddress. > Don't know if we want that, but hopefully it can be done as a follow-up > if we do.
Yes. At the moment nothing requires lowered address form so this is merely an optimization (and not a bug for some later pass to re-introduce un-lowered non-invariant addresses). I can imagine that for example IVOPTs could be simplified if we didn't have this kind of addresses in the IL. > Do we want to move some optimizations into this new pass, e.g. > from fwprop? I think we might want to re-try forwprop_into_addr_expr before lowering the address. Well, but that's maybe just over-cautionous. > Thoughts? Please move the pass before crited, crited and pre are supposed to go together. Otherwise looks ok to me. Thanks, Richard. > Bootstrapped/regtested on x86_64-linux. > > 2015-07-03 Marek Polacek <pola...@redhat.com> > > PR tree-optimization/66718 > * Makefile.in (OBJS): Add tree-ssa-laddress.o. > * passes.def: Schedule pass_laddress. > * timevar.def (DEFTIMEVAR): Add TV_TREE_LADDRESS. > * tree-pass.h (make_pass_laddress): Declare. > * tree-ssa-laddress.c: New file. > > * gcc.dg/vect/vect-126.c: New test. > > diff --git gcc/Makefile.in gcc/Makefile.in > index 89eda96..2574b98 100644 > --- gcc/Makefile.in > +++ gcc/Makefile.in > @@ -1447,6 +1447,7 @@ OBJS = \ > tree-ssa-dse.o \ > tree-ssa-forwprop.o \ > tree-ssa-ifcombine.o \ > + tree-ssa-laddress.o \ > tree-ssa-live.o \ > tree-ssa-loop-ch.o \ > tree-ssa-loop-im.o \ > diff --git gcc/passes.def gcc/passes.def > index 0d8356b..ac16e8a 100644 > --- gcc/passes.def > +++ gcc/passes.def > @@ -214,6 +214,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_cse_sincos); > NEXT_PASS (pass_optimize_bswap); > NEXT_PASS (pass_split_crit_edges); > + NEXT_PASS (pass_laddress); > NEXT_PASS (pass_pre); > NEXT_PASS (pass_sink_code); > NEXT_PASS (pass_asan); > diff --git gcc/testsuite/gcc.dg/vect/vect-126.c > gcc/testsuite/gcc.dg/vect/vect-126.c > index e69de29..66a5821 100644 > --- gcc/testsuite/gcc.dg/vect/vect-126.c > +++ gcc/testsuite/gcc.dg/vect/vect-126.c > @@ -0,0 +1,64 @@ > +/* PR tree-optimization/66718 */ > +/* { dg-do compile } */ > +/* { dg-additional-options "-mavx2" { target avx_runtime } } */ > + > +int *a[1024], b[1024]; > +struct S { int u, v, w, x; }; > +struct S c[1024]; > +int d[1024][10]; > + > +void > +f0 (void) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] = &b[0]; > +} > + > +void > +f1 (void) > +{ > + for (int i = 0; i < 1024; i++) > + { > + int *p = &b[0]; > + a[i] = p + i; > + } > +} > + > +void > +f2 (int *p) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] = &p[i]; > +} > + > +void > +f3 (void) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] = &b[i]; > +} > + > +void > +f4 (void) > +{ > + int *p = &c[0].v; > + for (int i = 0; i < 1024; i++) > + a[i] = &p[4 * i]; > +} > + > +void > +f5 (void) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] = &c[i].v; > +} > + > +void > +f6 (void) > +{ > + for (int i = 0; i < 1024; i++) > + for (unsigned int j = 0; j < 10; j++) > + a[i] = &d[i][j]; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 7 > "vect" { target vect_condition } } } */ > diff --git gcc/timevar.def gcc/timevar.def > index efac4b7..fcc2fe0 100644 > --- gcc/timevar.def > +++ gcc/timevar.def > @@ -275,6 +275,7 @@ DEFTIMEVAR (TV_GIMPLE_SLSR , "straight-line > strength reduction") > DEFTIMEVAR (TV_VTABLE_VERIFICATION , "vtable verification") > DEFTIMEVAR (TV_TREE_UBSAN , "tree ubsan") > DEFTIMEVAR (TV_INITIALIZE_RTL , "initialize rtl") > +DEFTIMEVAR (TV_TREE_LADDRESS , "address lowering") > > /* Everything else in rest_of_compilation not included above. */ > DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes") > diff --git gcc/tree-pass.h gcc/tree-pass.h > index 2808dad..c47b22e 100644 > --- gcc/tree-pass.h > +++ gcc/tree-pass.h > @@ -393,6 +393,7 @@ extern gimple_opt_pass *make_pass_cd_dce (gcc::context > *ctxt); > extern gimple_opt_pass *make_pass_call_cdce (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_merge_phi (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_split_crit_edges (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_laddress (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_pre (gcc::context *ctxt); > extern unsigned int tail_merge_optimize (unsigned int); > extern gimple_opt_pass *make_pass_profile (gcc::context *ctxt); > diff --git gcc/tree-ssa-laddress.c gcc/tree-ssa-laddress.c > index e69de29..3f69d7d 100644 > --- gcc/tree-ssa-laddress.c > +++ gcc/tree-ssa-laddress.c > @@ -0,0 +1,137 @@ > +/* Lower and optimize address expressions. > + Copyright (C) 2015 Free Software Foundation, Inc. > + Contributed by Marek Polacek <pola...@redhat.com> > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "alias.h" > +#include "predict.h" > +#include "tm.h" > +#include "function.h" > +#include "dominance.h" > +#include "cfg.h" > +#include "basic-block.h" > +#include "tree-ssa-alias.h" > +#include "symtab.h" > +#include "tree.h" > +#include "stringpool.h" > +#include "tree-ssanames.h" > +#include "fold-const.h" > +#include "gimple-expr.h" > +#include "gimple.h" > +#include "gimplify.h" > +#include "gimple-iterator.h" > +#include "gimplify-me.h" > +#include "tree-pass.h" > + > + > +namespace { > + > +const pass_data pass_data_laddress = > +{ > + GIMPLE_PASS, /* type */ > + "laddress", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_TREE_LADDRESS, /* tv_id */ > + ( PROP_cfg | PROP_ssa ), /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + 0, /* todo_flags_finish */ > +}; > + > +class pass_laddress : public gimple_opt_pass > +{ > +public: > + pass_laddress (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_laddress, ctxt) > + {} > + > + /* opt_pass methods: */ > + opt_pass * clone () { return new pass_laddress (m_ctxt); } > + virtual bool gate (function *) { return optimize != 0; } > + virtual unsigned int execute (function *); > + > +}; // class pass_laddress > + > +unsigned int > +pass_laddress::execute (function *fun) > +{ > + basic_block bb; > + > + FOR_EACH_BB_FN (bb, fun) > + { > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) > + { > + gimple stmt = gsi_stmt (gsi); > + if (!is_gimple_assign (stmt) > + || gimple_assign_rhs_code (stmt) != ADDR_EXPR > + || is_gimple_invariant_address (gimple_assign_rhs1 (stmt))) > + { > + gsi_next (&gsi); > + continue; > + } > + > + /* Lower ADDR_EXPR assignments: > + _4 = &b[i_9]; > + into > + _1 = (sizetype) i_9; > + _7 = _1 * 4; > + _4 = &b + _7; > + This ought to aid the vectorizer and expose CSE opportunities. > + */ > + > + tree expr = gimple_assign_rhs1 (stmt); > + HOST_WIDE_INT bitsize, bitpos; > + tree base, offset; > + machine_mode mode; > + int volatilep = 0, unsignedp = 0; > + base = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, > + &bitpos, &offset, &mode, &unsignedp, > + &volatilep, false); > + gcc_assert (base != NULL_TREE && (bitpos % BITS_PER_UNIT) == 0); > + if (offset != NULL_TREE) > + { > + if (bitpos != 0) > + offset = size_binop (PLUS_EXPR, offset, > + size_int (bitpos / BITS_PER_UNIT)); > + offset = force_gimple_operand_gsi (&gsi, offset, true, NULL, > + true, GSI_SAME_STMT); > + base = build_fold_addr_expr (base); > + base = force_gimple_operand_gsi (&gsi, base, true, NULL, > + true, GSI_SAME_STMT); > + gimple g = gimple_build_assign (gimple_assign_lhs (stmt), > + POINTER_PLUS_EXPR, base, offset); > + gsi_replace (&gsi, g, false); > + } > + gsi_next (&gsi); > + } > + } > + > + return 0; > +} > + > +} // anon namespace > + > +gimple_opt_pass * > +make_pass_laddress (gcc::context *ctxt) > +{ > + return new pass_laddress (ctxt); > +} > > Marek > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg)