On Tue, Oct 31, 2017 at 10:35:59AM +0100, Jakub Jelinek wrote: > > 2017-10-18 Michael Collison <michael.colli...@arm.com> > > > > PR rtl-optimization/82597 > > * compare-elim.c: (try_validate_parallel): Constrain operands > > of recognized insn. > > That just duplicates more of insn_invalid_p. > I wonder if we don't want to do something like the following instead > (untested so far). The insn_uid decrement and ggc_free can be left out if > deemed unnecessary, I don't have an idea how many try_validate_parallel > calls fail in real-world. > > More importantly, I still don't really like the > df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); > part of the earlier changes, that is very expensive and I doubt it is really > necessary. You only use the UD/DU chains to find from the comparison insn > the previous setter of the in_a in the same basic block (if any), but you > walk before that the basic block from HEAD to END in > find_comparison_dom_walker::before_dom_children. So, wouldn't it be cheaper > to track during that walk the last setter insn of each hard register e.g. in > an > array indexed by REGNO (or perhaps track only selected simple single_set > arith instructions) and remember for comparisons of a hard reg with > const0_rtx in struct comparison next to prev_clobber field also the > reg_setter? After all, that is the way prev_clobber is computed, except > in that case it is just a single register (CC) we track it for. > For many targets that is all we need (if all the arith instructions clobber > flags), on say x86_64/i686 there are only very few exceptions (e.g. lea, but > that is typically used in a way that makes it impossible to merge). > On others such as aarch64 (or arm or both?) not, so we need to track more.
Here is an untested patch (only tried the cmpelim_mult_uses_1.c testcase in aarch64 cross) that does that. I don't have aarch64 boxes around for easy trunk testing, can bootstrap/regtest it on x86_64-linux/i686-linux and maybe powerpc64le-linux though. If the memset (last_setter, 0, sizeof (last_setter)); for each bb is a problem (maybe for ia64/mmix/mips which have huge numbers of hard registers), one possibility would be to put the array into find_comparison_dom_walker class, clear it only in the constructor and have next to it an auto_vec into which we'd push the REGNOs we've set last_setter to non-NULL where previously they were NULL, then instead of clearing the whole vector we'd just pop all the REGNOs from the vector and clear just those. One extra advantage would be that we could also cheaply clear the last_setter entries when seeing some following flags setter, user, call/jump/asm_input (stuff that can_merge_compare_into_arith verifies). 2017-10-31 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/82778 PR rtl-optimization/82597 * compare-elim.c (struct comparison): Add in_a_setter field. (find_comparison_dom_walker::before_dom_children): Remove killed bitmap and df_simulate_find_defs call, instead walk the defs. Compute last_setter and initialize in_a_setter. Merge definitions with first initialization for a few variables. (try_validate_parallel): Use insn_invalid_p instead of recog_memoized. Return insn rather than just the pattern. (try_merge_compare): Fix up comment. Don't uselessly test if in_a is a REG_P. Use cmp->in_a_setter instead of walking UD chains. * g++.dg/opt/pr82778.C: New test. 2017-10-31 Michael Collison <michael.colli...@arm.com> PR rtl-optimization/82597 * gcc.dg/pr82597.c: New test. --- gcc/compare-elim.c.jj 2017-10-19 09:08:17.000000000 +0200 +++ gcc/compare-elim.c 2017-10-31 11:37:29.430797609 +0100 @@ -97,6 +97,9 @@ struct comparison /* The insn prior to the comparison insn that clobbers the flags. */ rtx_insn *prev_clobber; + /* The insn prior to the comparison insn that sets in_a REG. */ + rtx_insn *in_a_setter; + /* The two values being compared. These will be either REGs or constants. */ rtx in_a, in_b; @@ -309,26 +312,22 @@ can_eliminate_compare (rtx compare, rtx edge find_comparison_dom_walker::before_dom_children (basic_block bb) { - struct comparison *last_cmp; - rtx_insn *insn, *next, *last_clobber; - bool last_cmp_valid; + rtx_insn *insn, *next; bool need_purge = false; - bitmap killed; - - killed = BITMAP_ALLOC (NULL); + rtx_insn *last_setter[FIRST_PSEUDO_REGISTER]; /* The last comparison that was made. Will be reset to NULL once the flags are clobbered. */ - last_cmp = NULL; + struct comparison *last_cmp = NULL; /* True iff the last comparison has not been clobbered, nor have its inputs. Used to eliminate duplicate compares. */ - last_cmp_valid = false; + bool last_cmp_valid = false; /* The last insn that clobbered the flags, if that insn is of a form that may be valid for eliminating a following compare. To be reset to NULL once the flags are set otherwise. */ - last_clobber = NULL; + rtx_insn *last_clobber = NULL; /* Propagate the last live comparison throughout the extended basic block. */ if (single_pred_p (bb)) @@ -338,6 +337,7 @@ find_comparison_dom_walker::before_dom_c last_cmp_valid = last_cmp->inputs_valid; } + memset (last_setter, 0, sizeof (last_setter)); for (insn = BB_HEAD (bb); insn; insn = next) { rtx src; @@ -346,10 +346,6 @@ find_comparison_dom_walker::before_dom_c if (!NONDEBUG_INSN_P (insn)) continue; - /* Compute the set of registers modified by this instruction. */ - bitmap_clear (killed); - df_simulate_find_defs (insn, killed); - src = conforming_compare (insn); if (src) { @@ -373,6 +369,13 @@ find_comparison_dom_walker::before_dom_c last_cmp->in_b = XEXP (src, 1); last_cmp->eh_note = eh_note; last_cmp->orig_mode = GET_MODE (src); + if (last_cmp->in_b == const0_rtx + && last_setter[REGNO (last_cmp->in_a)]) + { + rtx set = single_set (last_setter[REGNO (last_cmp->in_a)]); + if (set && rtx_equal_p (SET_DEST (set), last_cmp->in_a)) + last_cmp->in_a_setter = last_setter[REGNO (last_cmp->in_a)]; + } all_compares.safe_push (last_cmp); /* It's unusual, but be prepared for comparison patterns that @@ -388,28 +391,36 @@ find_comparison_dom_walker::before_dom_c find_flags_uses_in_insn (last_cmp, insn); /* Notice if this instruction kills the flags register. */ - if (bitmap_bit_p (killed, targetm.flags_regnum)) - { - /* See if this insn could be the "clobber" that eliminates - a future comparison. */ - last_clobber = (arithmetic_flags_clobber_p (insn) ? insn : NULL); - - /* In either case, the previous compare is no longer valid. */ - last_cmp = NULL; - last_cmp_valid = false; - } + df_ref def; + FOR_EACH_INSN_DEF (def, insn) + if (DF_REF_REGNO (def) == targetm.flags_regnum) + { + /* See if this insn could be the "clobber" that eliminates + a future comparison. */ + last_clobber = (arithmetic_flags_clobber_p (insn) + ? insn : NULL); + + /* In either case, the previous compare is no longer valid. */ + last_cmp = NULL; + last_cmp_valid = false; + break; + } } - /* Notice if any of the inputs to the comparison have changed. */ - if (last_cmp_valid - && (bitmap_bit_p (killed, REGNO (last_cmp->in_a)) - || (REG_P (last_cmp->in_b) - && bitmap_bit_p (killed, REGNO (last_cmp->in_b))))) - last_cmp_valid = false; + /* Notice if any of the inputs to the comparison have changed + and remember last insn that sets each register. */ + df_ref def; + FOR_EACH_INSN_DEF (def, insn) + { + if (last_cmp_valid + && (DF_REF_REGNO (def) == REGNO (last_cmp->in_a) + || (REG_P (last_cmp->in_b) + && DF_REF_REGNO (def) == REGNO (last_cmp->in_b)))) + last_cmp_valid = false; + last_setter[DF_REF_REGNO (def)] = insn; + } } - BITMAP_FREE (killed); - /* Remember the live comparison for subsequent members of the extended basic block. */ if (last_cmp) @@ -625,13 +636,19 @@ can_merge_compare_into_arith (rtx_insn * static rtx try_validate_parallel (rtx set_a, rtx set_b) { - rtx par - = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_a, set_b)); + rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_a, set_b)); + rtx_insn *insn = make_insn_raw (par); - rtx_insn *insn; - insn = gen_rtx_INSN (VOIDmode, 0, 0, 0, par, 0, -1, 0); + if (insn_invalid_p (insn, false)) + { + crtl->emit.x_cur_insn_uid--; + return NULL_RTX; + } - return recog_memoized (insn) > 0 ? par : NULL_RTX; + SET_PREV_INSN (insn) = NULL_RTX; + SET_NEXT_INSN (insn) = NULL_RTX; + INSN_LOCATION (insn) = 0; + return insn; } /* For a comparison instruction described by CMP check if it compares a @@ -643,7 +660,7 @@ try_validate_parallel (rtx set_a, rtx se <instructions that don't read the condition register> I2: CC := CMP R1 0 I2 can be merged with I1 into: - I1: { R1 := R2 + R3 ; CC := CMP (R2 + R3) 0 } + I1: { CC := CMP (R2 + R3) 0 ; R1 := R2 + R3 } This catches cases where R1 is used between I1 and I2 and therefore combine and other RTL optimisations will not try to propagate it into I2. Return true if we succeeded in merging CMP. */ @@ -653,7 +670,7 @@ try_merge_compare (struct comparison *cm { rtx_insn *cmp_insn = cmp->insn; - if (!REG_P (cmp->in_a) || cmp->in_b != const0_rtx) + if (cmp->in_b != const0_rtx || cmp->in_a_setter == NULL) return false; rtx in_a = cmp->in_a; df_ref use; @@ -664,24 +681,8 @@ try_merge_compare (struct comparison *cm if (!use) return false; - /* Validate the data flow information before attempting to - find the instruction that defines in_a. */ - - struct df_link *ref_chain; - ref_chain = DF_REF_CHAIN (use); - if (!ref_chain || !ref_chain->ref - || !DF_REF_INSN_INFO (ref_chain->ref) || ref_chain->next != NULL) - return false; - - rtx_insn *def_insn = DF_REF_INSN (ref_chain->ref); - /* We found the insn that defines in_a. Only consider the cases where - it is in the same block as the comparison. */ - if (BLOCK_FOR_INSN (cmp_insn) != BLOCK_FOR_INSN (def_insn)) - return false; - + rtx_insn *def_insn = cmp->in_a_setter; rtx set = single_set (def_insn); - if (!set) - return false; if (!can_merge_compare_into_arith (cmp_insn, def_insn)) return false; --- gcc/testsuite/g++.dg/opt/pr82778.C.jj 2017-10-31 09:01:25.025934660 +0100 +++ gcc/testsuite/g++.dg/opt/pr82778.C 2017-10-31 09:00:08.000000000 +0100 @@ -0,0 +1,37 @@ +// PR rtl-optimization/82778 +// { dg-do compile } +// { dg-options "-O2" } + +template <typename a, int b> struct c { + typedef a d[b]; + static a e(d f, int g) { return f[g]; } +}; +template <typename a, int b> struct B { + typedef c<a, b> h; + typename h::d i; + long j; + a at() { return h::e(i, j); } +}; +int k, m, r, s, t; +char l, n, q; +short o, p, w; +struct C { + int u; +}; +B<C, 4> v; +void x() { + if (((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) + s = ((-(((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) < 0) / + (-(((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) ^ + -25 & o) && + p) >> + (0 <= 0 + ? 0 || + (-(((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) < + 0) / + (-(((p > (q ? v.at().u : k)) >> l - 226) + + !(n ^ r * m)) ^ -25 & o) + : 0); + w = (p > (q ? v.at().u : k)) >> l - 226; + t = !(n ^ r * m); +} --- gcc/testsuite/gcc.dg/pr82597.c.jj 2017-10-31 11:34:39.787915615 +0100 +++ gcc/testsuite/gcc.dg/pr82597.c 2017-10-31 11:34:39.787915615 +0100 @@ -0,0 +1,40 @@ +/* PR rtl-optimization/82597 */ +/* { dg-do compile }*/ +/* { dg-options "-O2 -funroll-loops" } */ + +int pb; + +void +ch (unsigned char np, char fc) +{ + unsigned char *y6 = &np; + + if (fc != 0) + { + unsigned char *z1 = &np; + + for (;;) + if (*y6 != 0) + for (fc = 0; fc < 12; ++fc) + { + int hh; + int tp; + + if (fc != 0) + hh = (*z1 != 0) ? fc : 0; + else + hh = pb; + + tp = fc > 0; + if (hh == tp) + *y6 = 1; + } + } + + if (np != 0) + y6 = (unsigned char *)&fc; + if (pb != 0 && *y6 != 0) + for (;;) + { + } +} Jakub