On Tue, Oct 22, 2019 at 3:32 PM Jakub Jelinek <ja...@redhat.com> wrote: > > On Mon, Oct 21, 2019 at 01:24:30PM +0200, Jakub Jelinek wrote: > > So I wonder if for correctness I don't need to add: > > > > if (!use->iv->no_overflow > > && !cand->iv->no_overflow > > && !integer_pow2p (cstep)) > > return NULL_TREE; > > > > with some of the above as comment explaining why. > > > > On the other side, if cand->iv->no_overflow, couldn't we bypass the extra > > precision test? > > Here are these two in patch form. > > 2019-10-22 Jakub Jelinek <ja...@redhat.com> > > PR debug/90231 > * tree-ssa-loop-ivopts.c (get_debug_computation_at): New function. > (remove_unused_ivs): Use it instead of get_computation_at. When > choosing best candidate, only consider candidates where > get_debug_computation_at actually returns non-NULL. > > --- gcc/tree-ssa-loop-ivopts.c.jj 2019-10-21 14:17:57.598198162 +0200 > +++ gcc/tree-ssa-loop-ivopts.c 2019-10-22 09:30:09.782238157 +0200 > @@ -4089,6 +4089,94 @@ get_computation_at (class loop *loop, gi > return fold_convert (type, aff_combination_to_tree (&aff)); > } > > +/* Like get_computation_at, but try harder, even if the computation > + is more expensive. Intended for debug stmts. */ > + > +static tree > +get_debug_computation_at (class loop *loop, gimple *at, > + struct iv_use *use, struct iv_cand *cand) > +{ > + if (tree ret = get_computation_at (loop, at, use, cand)) > + return ret; > + > + tree ubase = use->iv->base, ustep = use->iv->step; > + tree cbase = cand->iv->base, cstep = cand->iv->step; > + tree var; > + tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); > + widest_int rat; > + > + /* We must have a precision to express the values of use. */ > + if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype)) > + return NULL_TREE; > + > + /* Try to handle the case that get_computation_at doesn't, > + try to express > + use = ubase + (var - cbase) / ratio. */ > + if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep), > + &rat)) > + return NULL_TREE; > + > + bool neg_p = false; > + if (wi::neg_p (rat)) > + { > + if (TYPE_UNSIGNED (ctype)) > + return NULL_TREE; > + neg_p = true; > + rat = wi::neg (rat); > + } > + > + /* If both IVs can wrap around and CAND doesn't have a power of two step, > + it is unsafe. Consider uint16_t CAND with step 9, when wrapping around, > + the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say > + uint8_t with step 3, those values divided by 3 cast to uint8_t will be > + ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. > */ Interesting, so we can still get correct debug info for iter in such special cases.
> + if (!use->iv->no_overflow > + && !cand->iv->no_overflow > + && !integer_pow2p (cstep)) > + return NULL_TREE; > + > + int bits = wi::exact_log2 (rat); > + if (bits == -1) > + bits = wi::floor_log2 (rat) + 1; > + if (!cand->iv->no_overflow > + && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype)) > + return NULL_TREE; The patch is fine for me. Just for the record, guess we may try to find (by recording information in early phase) the correct/bijection candidate in computing the iv in debuginfo in the future, then those checks would be unnecessary. Thanks, bin > + > + var = var_at_stmt (loop, cand, at); > + > + if (POINTER_TYPE_P (ctype)) > + { > + ctype = unsigned_type_for (ctype); > + cbase = fold_convert (ctype, cbase); > + cstep = fold_convert (ctype, cstep); > + var = fold_convert (ctype, var); > + } > + > + ubase = unshare_expr (ubase); > + cbase = unshare_expr (cbase); > + if (stmt_after_increment (loop, cand, at)) > + var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, > + unshare_expr (cstep)); > + > + var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase); > + var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var, > + wide_int_to_tree (TREE_TYPE (var), rat)); > + if (POINTER_TYPE_P (utype)) > + { > + var = fold_convert (sizetype, var); > + if (neg_p) > + var = fold_build1 (NEGATE_EXPR, sizetype, var); > + var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var); > + } > + else > + { > + var = fold_convert (utype, var); > + var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype, > + ubase, var); > + } > + return var; > +} > + > /* Adjust the cost COST for being in loop setup rather than loop body. > If we're optimizing for space, the loop setup overhead is constant; > if we're optimizing for speed, amortize it over the per-iteration cost. > @@ -7523,6 +7611,7 @@ remove_unused_ivs (struct ivopts_data *d > struct iv_use dummy_use; > struct iv_cand *best_cand = NULL, *cand; > unsigned i, best_pref = 0, cand_pref; > + tree comp = NULL_TREE; > > memset (&dummy_use, 0, sizeof (dummy_use)); > dummy_use.iv = info->iv; > @@ -7543,20 +7632,22 @@ remove_unused_ivs (struct ivopts_data *d > ? 1 : 0; > if (best_cand == NULL || best_pref < cand_pref) > { > - best_cand = cand; > - best_pref = cand_pref; > + tree this_comp > + = get_debug_computation_at (data->current_loop, > + SSA_NAME_DEF_STMT (def), > + &dummy_use, cand); > + if (this_comp) > + { > + best_cand = cand; > + best_pref = cand_pref; > + comp = this_comp; > + } > } > } > > if (!best_cand) > continue; > > - tree comp = get_computation_at (data->current_loop, > - SSA_NAME_DEF_STMT (def), > - &dummy_use, best_cand); > - if (!comp) > - continue; > - > if (count > 1) > { > tree vexpr = make_node (DEBUG_EXPR_DECL); > > > Jakub >