On Mon, Nov 27, 2023 at 3:51 PM Jakub Jelinek <ja...@redhat.com> wrote: > > On Mon, Nov 27, 2023 at 09:52:14PM +0100, Jakub Jelinek wrote: > > On Mon, Oct 16, 2023 at 01:11:01PM -0600, Jeff Law wrote: > > > > gcc/ChangeLog: > > > > > > > > * Makefile.in: Add fold-mem-offsets.o. > > > > * passes.def: Schedule a new pass. > > > > * tree-pass.h (make_pass_fold_mem_offsets): Declare. > > > > * common.opt: New options. > > > > * doc/invoke.texi: Document new option. > > > > * fold-mem-offsets.cc: New file. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > * gcc.target/riscv/fold-mem-offsets-1.c: New test. > > > > * gcc.target/riscv/fold-mem-offsets-2.c: New test. > > > > * gcc.target/riscv/fold-mem-offsets-3.c: New test. > > > Thanks, I've pushed this to the trunk. > > > > This breaks profiledbootstrap on powerpc64le-linux. > > >From what I can see, the pass works one basic block at a time and > > will punt on any non-DEBUG_INSN uses outside of the current block > > (I believe because of the > > /* This use affects instructions outside of CAN_FOLD_INSNS. */ > > if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use))) > > return 0; > > test and can_fold_insns only set in do_analysis (when processing insns in > > current bb, cleared at the end) or results of get_single_def_in_bb > > (which are checked to be in the same bb). > > But, while get_single_def_in_bb checks for > > if (DF_INSN_LUID (def) > DF_INSN_LUID (insn)) > > return NULL; > > (OT, why not DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref)) > > instead of DF_INSN_LUID (def), then it doesn't need to look up > > DF_INSN_INFO_GET (def)?), nothing when walking all uses of def does such > > luid check. > > The basic block in the PR in question has: > > ... > > (insn 212 210 215 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable > > ] [152]) [2 *last_viable_336+0 S8 A64]) > > (reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":50:17 683 > > {*movdi_internal64} > > (expr_list:REG_DEAD (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) > > (nil))) > > (insn 215 212 484 25 (set (reg:DI 5 5 [226]) > > (const_int 0 [0])) "pr111601.ii":52:12 683 {*movdi_internal64} > > (expr_list:REG_EQUIV (const_int 0 [0]) > > (nil))) > > (insn 484 215 218 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) > > (reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":52:12 683 > > {*movdi_internal64} > > (nil)) > > ... > > (insn 564 214 216 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) > > (plus:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) > > (const_int 96 [0x60]))) "pr111601.ii":52:12 66 {*adddi3} > > (nil)) > > (insn 216 564 219 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable > > ] [152]) [2 _343->next+0 S8 A64]) > > (reg:DI 5 5 [226])) "pr111601.ii":52:12 683 {*movdi_internal64} > > (expr_list:REG_DEAD (reg:DI 5 5 [226]) > > (nil))) > > ... > > and when asking for all uses of %r10 from def 564, it will see uses > > in 216 and 212; the former is after the += 96 addition and gets changed > > to load from %r10+96 with the addition being dropped, but there is > > the other store which is a use across the backedge and when reached > > from other edges certainly doesn't have the + 96 addition anywhere, > > so the pass doesn't actually change that location. > > > > Haven't bootstrapped/regtested this yet, will start momentarily, > > posting here just in case I'm missing something important. > > That version failed bootstrap because DF_INSN_LUID is signed int, not > unsigned. > > Here is what so far passed on powerpc64le-linux > ../configure --enable-languages=c,c++ --enable-checking=yes,rtl,extra > --disable-libsanitizer --with-long-double-128 > make -j160 profiledbootstrap > which has been failing for more than a month now. > > I've noticed a couple of small formatting issues and fixed them too, > doing normal {powerpc64le,x86_64,i686}-linux bootstraps/regtests: > > 2023-11-27 Jakub Jelinek <ja...@redhat.com> > > PR bootstrap/111601 > * fold-mem-offsets.cc (fold_offsets): Punt if use appears before > def in the basic block. > (get_single_def_in_bb, get_uses): Formatting fixes. > (fold_offsets_1, pass_fold_mem_offsets::execute): Comment formatting > fixes. > > * g++.dg/opt/pr111601.C: New test. > > --- gcc/fold-mem-offsets.cc.jj 2023-11-02 07:49:17.060865772 +0100 > +++ gcc/fold-mem-offsets.cc 2023-11-27 22:47:21.128591332 +0100 > @@ -154,7 +154,7 @@ static int stats_fold_count; > The definition is desired for REG used in INSN. > Return the definition insn or NULL if there's no definition with > the desired criteria. */ > -static rtx_insn* > +static rtx_insn * > get_single_def_in_bb (rtx_insn *insn, rtx reg) > { > df_ref use; > @@ -205,7 +205,7 @@ get_single_def_in_bb (rtx_insn *insn, rt > /* Get all uses of REG which is set in INSN. Return the use list or NULL if > a > use is missing / irregular. If SUCCESS is not NULL then set it to false > if > there are missing / irregular uses and true otherwise. */ > -static struct df_link* > +static struct df_link *
Since you are making style changes here, it might make sense to remove the struct keyword too since this is C++ code after all. Thanks, Andrew Pinski > get_uses (rtx_insn *insn, rtx reg, bool *success) > { > df_ref def; > @@ -255,8 +255,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b > > If DO_RECURSION is true and ANALYZE is false then offset that would > result > from folding is computed and is returned through the pointer OFFSET_OUT. > - The instructions that can be folded are recorded in FOLDABLE_INSNS. > -*/ > + The instructions that can be folded are recorded in FOLDABLE_INSNS. */ > static bool > fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion, > HOST_WIDE_INT *offset_out, bitmap foldable_insns) > @@ -511,6 +510,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b > if (!success) > return 0; > > + int luid = DF_INSN_LUID (def); > for (ref_link = uses; ref_link; ref_link = ref_link->next) > { > rtx_insn *use = DF_REF_INSN (ref_link->ref); > @@ -534,6 +534,11 @@ fold_offsets (rtx_insn *insn, rtx reg, b > if (use_set && MEM_P (SET_DEST (use_set)) > && reg_mentioned_p (dest, SET_SRC (use_set))) > return 0; > + > + /* Punt if use appears before def in the basic block. See > + PR111601. */ > + if (DF_INSN_LUID (use) < luid) > + return 0; > } > > bitmap_set_bit (&can_fold_insns, INSN_UID (def)); > @@ -846,8 +851,8 @@ pass_fold_mem_offsets::execute (function > FOR_ALL_BB_FN (bb, fn) > { > /* There is a conflict between this pass and RISCV's shorten-memrefs > - pass. For now disable folding if optimizing for size because > - otherwise this cancels the effects of shorten-memrefs. */ > + pass. For now disable folding if optimizing for size because > + otherwise this cancels the effects of shorten-memrefs. */ > if (optimize_bb_for_size_p (bb)) > continue; > > --- gcc/testsuite/g++.dg/opt/pr111601.C.jj 2023-11-27 21:33:12.605006881 > +0100 > +++ gcc/testsuite/g++.dg/opt/pr111601.C 2023-11-27 21:34:47.267678510 +0100 > @@ -0,0 +1,86 @@ > +// PR bootstrap/111601 > +// { dg-do run { target c++11 } } > +// { dg-options "-O2 -fno-exceptions -fno-rtti -fprofile-generate" } > +// { dg-require-profiling "-fprofile-generate" } > +// { dg-final { cleanup-coverage-files } } > + > +struct tree_base > +{ > + int code:16; > +}; > +struct saved_scope > +{ > + void *pad[14]; > + int x_processing_template_decl; > +}; > +struct saved_scope *scope_chain; > +struct z_candidate > +{ > + tree_base *fn; > + void *pad[11]; > + z_candidate *next; > + int viable; > + int flags; > +}; > + > +__attribute__((noipa)) struct z_candidate * > +splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p) > +{ > + struct z_candidate *viable; > + struct z_candidate **last_viable; > + struct z_candidate **cand; > + bool found_strictly_viable = false; > + if (scope_chain->x_processing_template_decl) > + strict_p = true; > + viable = (z_candidate *) 0; > + last_viable = &viable; > + *any_viable_p = false; > + cand = &cands; > + while (*cand) > + { > + struct z_candidate *c = *cand; > + if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273)) > + { > + strict_p = true; > + if (viable && !found_strictly_viable) > + { > + *any_viable_p = false; > + *last_viable = cands; > + cands = viable; > + viable = (z_candidate *) 0; > + last_viable = &viable; > + } > + } > + if (strict_p ? c->viable == 1 : c->viable) > + { > + *last_viable = c; > + *cand = c->next; > + c->next = (z_candidate *) 0; > + last_viable = &c->next; > + *any_viable_p = true; > + if (c->viable == 1) > + found_strictly_viable = true; > + } > + else > + cand = &c->next; > + } > + return viable ? viable : cands; > +} > + > +int > +main () > +{ > + saved_scope s{}; > + scope_chain = &s; > + z_candidate z[4] = {}; > + z[0].next = &z[1]; > + z[1].viable = 1; > + z[1].next = &z[2]; > + z[2].viable = 1; > + z[2].next = &z[3]; > + bool b; > + z_candidate *c = splice_viable (&z[0], true, &b); > + if (c != &z[1] || z[1].next != &z[2] || z[2].next) > + __builtin_abort (); > + return 0; > +} > > > Jakub >