Hi! The following patch attempts to improve expansion, if we have code like: <bb 16> [local count: 102513059]: if_conversion_var_52 = MEM[base: st1_22, offset: 0B]; if (if_conversion_var_52 < 0) goto <bb 17>; [41.00%] else goto <bb 18>; [59.00%]
... <bb 18> [local count: 60482706]: _81 = _11 == 2; _82 = if_conversion_var_52 > 0; _79 = _81 & _82; if (_79 != 0) goto <bb 19>; [29.26%] else goto <bb 20>; [70.74%] Here, the single pred of the bb performed a similar comparison to what one of the & (or |) operands does, and as & (or |) is not ordered, we can choose which operand we'll expand first. If we expand if_conversion_var_52 > 0 first, there is a chance that we can reuse flags from the previous comparison. The patch does it only if there are no (non-virtual) phis on the current bb, all stmts before the current condition are TERable, so there is nothing that would clobber the flags in between. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk if it helps for SPEC? Venkat, do you think you could benchmark it in the setup where you've measured the slowdown to see if it helps? I see the patch changes the loop: .p2align 4,,10 .p2align 3 .L11: + jle .L10 cmpl $2, %eax - jne .L10 - testq %r8, %r8 - jg .L12 + je .L12 .p2align 4,,10 .p2align 3 .L10: addq %r9, %rsi cmpq %rsi, %rdx jbe .L35 .L13: movl 24(%rsi), %eax testl %eax, %eax jle .L10 movq (%rsi), %r8 testq %r8, %r8 jns .L11 cmpl $1, %eax jne .L10 .L12: addq $1, %r10 movl $1, %r11d movq st5(,%r10,8), %rax movq %rsi, (%rax) addq %r9, %rsi movq %r8, 8(%rax) movq st5(,%rdi,8), %rax movq %r8, 16(%rax) cmpq %rsi, %rdx ja .L13 which I assume shall be an improvement, since we can save one extra comparison. 2018-03-27 Jakub Jelinek <ja...@redhat.com> Richard Biener <rgue...@suse.de> PR rtl-optimization/78200 * cfgexpand.c (gimple_bb_info_for_bb): New variable. (expand_bb_seq, expand_phi_nodes): New functions. (expand_gimple_cond): Use heuristics if it is desirable to swap TRUTH_{AND,OR}IF_EXPR operands. (expand_gimple_basic_block): Remember GIMPLE il for bbs being expanded or already expanded. (pass_expand::execute): Initialize and then free the gimple_bb_info_for_bb vector. --- gcc/cfgexpand.c.jj 2018-02-09 06:44:36.413805556 +0100 +++ gcc/cfgexpand.c 2018-03-26 13:35:57.536509844 +0200 @@ -2357,6 +2357,34 @@ label_rtx_for_bb (basic_block bb ATTRIBU return l; } +/* Maps blocks to their GIMPLE IL. */ +static vec<gimple_bb_info, va_heap, vl_embed> *gimple_bb_info_for_bb; + +/* Like bb_seq, except that during expansion returns the GIMPLE seq + even for the blocks that have been already expanded or are being + currently expanded. */ + +static gimple_seq +expand_bb_seq (basic_block bb) +{ + if ((bb->flags & BB_RTL) + && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb)) + return (*gimple_bb_info_for_bb)[bb->index].seq; + return bb_seq (bb); +} + +/* Like phi_nodes, except that during expansion returns the GIMPLE PHIs + even for the blocks that have been already expanded or are being + currently expanded. */ + +static gimple_seq +expand_phi_nodes (basic_block bb) +{ + if ((bb->flags & BB_RTL) + && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb)) + return (*gimple_bb_info_for_bb)[bb->index].phi_nodes; + return phi_nodes (bb); +} /* A subroutine of expand_gimple_cond. Given E, a fallthrough edge of a basic block where we just expanded the conditional at the end, @@ -2475,6 +2503,65 @@ expand_gimple_cond (basic_block bb, gcon op0 = gimple_assign_rhs1 (second); op1 = gimple_assign_rhs2 (second); } + + /* We'll expand RTL for op0 first, see if we'd better expand RTL + for op1 first. Do that if the previous bb ends with + if (x op cst), op1's def_stmt rhs is x op2 cst and there are + no non-virtual PHIs nor non-TERed stmts in BB before STMT. */ + while (TREE_CODE (op1) == SSA_NAME + && (code == TRUTH_ANDIF_EXPR || code == TRUTH_ORIF_EXPR) + && single_pred_p (bb)) + { + gimple *def1 = SSA_NAME_DEF_STMT (op1); + if (!is_gimple_assign (def1) + || (TREE_CODE_CLASS (gimple_assign_rhs_code (def1)) + != tcc_comparison)) + break; + + basic_block pred = single_pred (bb); + gimple_seq pred_seq = expand_bb_seq (pred); + gimple_stmt_iterator i = gsi_last (pred_seq); + if (!gsi_end_p (i) && is_gimple_debug (gsi_stmt (i))) + gsi_prev_nondebug (&i); + + gimple *last = gsi_stmt (i); + if (!last + || gimple_code (last) != GIMPLE_COND + || gimple_assign_rhs1 (def1) != gimple_cond_lhs (last) + || !operand_equal_p (gimple_assign_rhs2 (def1), + gimple_cond_rhs (last), 0)) + break; + + gimple_seq phi_seq = expand_phi_nodes (bb); + i = gsi_start (phi_seq); + if (!gsi_end_p (i) + && !virtual_operand_p (gimple_phi_result (gsi_stmt (i)))) + break; + + /* Check if all stmts before STMT are TERed. */ + gimple_seq cur_seq = expand_bb_seq (bb); + i = gsi_start_nondebug (cur_seq); + int cnt = 100; + while (!gsi_end_p (i) && gsi_stmt (i) != stmt) + { + gimple *g = gsi_stmt (i); + if (!is_gimple_assign (g)) + break; + if (--cnt == 0) + break; + tree lhs = gimple_assign_lhs (g); + if (TREE_CODE (lhs) != SSA_NAME + || !get_gimple_for_ssa_name (lhs)) + break; + gsi_next_nondebug (&i); + } + + if (gsi_stmt (i) != stmt) + break; + + std::swap (op0, op1); + break; + } } } } @@ -5501,6 +5588,10 @@ expand_gimple_basic_block (basic_block b if (optimize) reorder_operands (bb); stmts = bb_seq (bb); + if ((unsigned) bb->index >= vec_safe_length (gimple_bb_info_for_bb)) + vec_safe_grow_cleared (gimple_bb_info_for_bb, bb->index + 1); + (*gimple_bb_info_for_bb)[bb->index].seq = stmts; + (*gimple_bb_info_for_bb)[bb->index].phi_nodes = phi_nodes (bb); bb->il.gimple.seq = NULL; bb->il.gimple.phi_nodes = NULL; rtl_profile_for_bb (bb); @@ -6419,6 +6510,8 @@ pass_expand::execute (function *fun) >= PARAM_VALUE (PARAM_MAX_DEBUG_MARKER_COUNT)) cfun->debug_nonbind_markers = false; + vec_safe_grow_cleared (gimple_bb_info_for_bb, n_basic_blocks_for_fn (cfun)); + lab_rtx_for_bb = new hash_map<basic_block, rtx_code_label *>; FOR_BB_BETWEEN (bb, init_block->next_bb, EXIT_BLOCK_PTR_FOR_FN (fun), next_bb) @@ -6433,6 +6526,8 @@ pass_expand::execute (function *fun) deep_ter_debug_map = NULL; } + vec_free (gimple_bb_info_for_bb); + /* Free stuff we no longer need after GIMPLE optimizations. */ free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); Jakub