This fixes FREs handling of TARGET_MEM_REF (it didn't consider &TARGET_MEM_REF) and adds a late FRE pass which has iteration disabled and runs only at -O[2s]+ to limit the compile-time impact.
This helps cases where unrolling and vectorization exposes "piecewise" redundancies DOM cannot handle. Thus (vector *)&a = { 1, 2, 3, 4 }; .. = a[2]; there's still the opposite case not handled (PR83518) but I will see whether I can make it work without too much cost: a[0] = 1; a[1] = 2; a[2] = 3; a[3] = 4; ... = (vector *)&a; Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. I'll commit the TARGET_MEM_REF fixing indepenently. Any comments? I'm not sure I like globbing the iteration parameter and the optimize > 1 check; maybe I should simply rename it to 'late' ... The compile-time impact might be non-trivial for those testcases that run into a large overhead from the alias-stmt walking but I didn't do any measurements yet. Thanks, Richard. 2019-06-27 Richard Biener <rguent...@suse.de> * tree-ssa-sccvn.c (class pass_fre): Add may_iterate pass parameter. (pass_fre::execute): Honor it. * passes.def: Adjust pass_fre invocations to allow iterating, add non-iterating pass_fre before late threading/dom. Index: gcc/tree-ssa-sccvn.c =================================================================== --- gcc/tree-ssa-sccvn.c (revision 272742) +++ gcc/tree-ssa-sccvn.c (working copy) @@ -791,39 +791,6 @@ vn_reference_eq (const_vn_reference_t co static void copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result) { - if (TREE_CODE (ref) == TARGET_MEM_REF) - { - vn_reference_op_s temp; - - result->reserve (3); - - memset (&temp, 0, sizeof (temp)); - temp.type = TREE_TYPE (ref); - temp.opcode = TREE_CODE (ref); - temp.op0 = TMR_INDEX (ref); - temp.op1 = TMR_STEP (ref); - temp.op2 = TMR_OFFSET (ref); - temp.off = -1; - temp.clique = MR_DEPENDENCE_CLIQUE (ref); - temp.base = MR_DEPENDENCE_BASE (ref); - result->quick_push (temp); - - memset (&temp, 0, sizeof (temp)); - temp.type = NULL_TREE; - temp.opcode = ERROR_MARK; - temp.op0 = TMR_INDEX2 (ref); - temp.off = -1; - result->quick_push (temp); - - memset (&temp, 0, sizeof (temp)); - temp.type = NULL_TREE; - temp.opcode = TREE_CODE (TMR_BASE (ref)); - temp.op0 = TMR_BASE (ref); - temp.off = -1; - result->quick_push (temp); - return; - } - /* For non-calls, store the information that makes up the address. */ tree orig = ref; while (ref) @@ -853,6 +820,20 @@ copy_reference_ops_from_ref (tree ref, v temp.base = MR_DEPENDENCE_BASE (ref); temp.reverse = REF_REVERSE_STORAGE_ORDER (ref); break; + case TARGET_MEM_REF: + /* The base address gets its own vn_reference_op_s structure. */ + temp.op0 = TMR_INDEX (ref); + temp.op1 = TMR_STEP (ref); + temp.op2 = TMR_OFFSET (ref); + temp.clique = MR_DEPENDENCE_CLIQUE (ref); + temp.base = MR_DEPENDENCE_BASE (ref); + result->safe_push (temp); + memset (&temp, 0, sizeof (temp)); + temp.type = NULL_TREE; + temp.opcode = ERROR_MARK; + temp.op0 = TMR_INDEX2 (ref); + temp.off = -1; + break; case BIT_FIELD_REF: /* Record bits, position and storage order. */ temp.op0 = TREE_OPERAND (ref, 1); @@ -6872,14 +6853,24 @@ class pass_fre : public gimple_opt_pass { public: pass_fre (gcc::context *ctxt) - : gimple_opt_pass (pass_data_fre, ctxt) + : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true) {} /* opt_pass methods: */ opt_pass * clone () { return new pass_fre (m_ctxt); } - virtual bool gate (function *) { return flag_tree_fre != 0; } + void set_pass_param (unsigned int n, bool param) + { + gcc_assert (n == 0); + may_iterate = param; + } + virtual bool gate (function *) + { + return flag_tree_fre != 0 && (may_iterate || optimize > 1); + } virtual unsigned int execute (function *); +private: + bool may_iterate; }; // class pass_fre unsigned int @@ -6888,15 +6879,16 @@ pass_fre::execute (function *fun) unsigned todo = 0; /* At -O[1g] use the cheap non-iterating mode. */ + bool iterate_p = may_iterate && (optimize > 1); calculate_dominance_info (CDI_DOMINATORS); - if (optimize > 1) + if (iterate_p) loop_optimizer_init (AVOID_CFG_MODIFICATIONS); default_vn_walk_kind = VN_WALKREWRITE; - todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true); + todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true); free_rpo_vn (); - if (optimize > 1) + if (iterate_p) loop_optimizer_finalize (); return todo; Index: gcc/passes.def =================================================================== --- gcc/passes.def (revision 272742) +++ gcc/passes.def (working copy) @@ -83,7 +83,7 @@ along with GCC; see the file COPYING3. /* pass_build_ealias is a dummy pass that ensures that we execute TODO_rebuild_alias at this point. */ NEXT_PASS (pass_build_ealias); - NEXT_PASS (pass_fre); + NEXT_PASS (pass_fre, true /* may_iterate */); NEXT_PASS (pass_early_vrp); NEXT_PASS (pass_merge_phi); NEXT_PASS (pass_dse); @@ -117,7 +117,7 @@ along with GCC; see the file COPYING3. NEXT_PASS (pass_oacc_kernels); PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels) NEXT_PASS (pass_ch); - NEXT_PASS (pass_fre); + NEXT_PASS (pass_fre, true /* may_iterate */); /* We use pass_lim to rewrite in-memory iteration and reduction variable accesses in loops into local variables accesses. */ NEXT_PASS (pass_lim); @@ -199,7 +199,7 @@ along with GCC; see the file COPYING3. execute TODO_rebuild_alias at this point. */ NEXT_PASS (pass_build_alias); NEXT_PASS (pass_return_slot); - NEXT_PASS (pass_fre); + NEXT_PASS (pass_fre, true /* may_iterate */); NEXT_PASS (pass_merge_phi); NEXT_PASS (pass_thread_jumps); NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */); @@ -312,6 +312,7 @@ along with GCC; see the file COPYING3. NEXT_PASS (pass_strength_reduction); NEXT_PASS (pass_split_paths); NEXT_PASS (pass_tracer); + NEXT_PASS (pass_fre, false /* may_iterate */); NEXT_PASS (pass_thread_jumps); NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */); NEXT_PASS (pass_strlen);