APX CFCMOV feature implements conditionally faulting which means that all memory faults are suppressed when the condition code
evaluates to false and load or store a memory operand. Now we could load or store a memory operand may trap or fault for conditional move. In middle-end, now we don't support a conditional move if we knew that a load from A or B could trap or fault. To enable CFCMOV, we added a new optab. Conditional move suppress_fault for condition mem store would not move any arithmetic calculations. For condition mem load now just support a conditional move one trap mem and one no trap and no mem cases. gcc/ChangeLog: * ifcvt.cc (noce_try_cmove_load_mem_notrap): Allow convert to cfcmov for conditional load. (noce_try_cmove_store_mem_notrap): Convert to conditional store. (noce_process_if_block): Ditto. * optabs.def (OPTAB_D): New optab. --- gcc/ifcvt.cc | 246 ++++++++++++++++++++++++++++++++++++++++++++++++- gcc/optabs.def | 1 + 2 files changed, 246 insertions(+), 1 deletion(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index 58ed42673e5..65c069b8cc6 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx, rtx, rtx, rtx, rtx = NULL, rtx = NULL); static bool noce_try_cmove (struct noce_if_info *); static bool noce_try_cmove_arith (struct noce_if_info *); +static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *); +static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, rtx); static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **); static bool noce_try_minmax (struct noce_if_info *); static bool noce_try_abs (struct noce_if_info *); @@ -2401,6 +2403,233 @@ noce_try_cmove_arith (struct noce_if_info *if_info) return false; } +/* When target support suppress memory fault, try more complex cases involving + conditional_move's source or dest may trap or fault. */ + +static bool +noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx x = if_info->x; + + if (MEM_P (x)) + return false; + /* Just handle a conditional move from one trap MEM + other non_trap, + non mem cases. */ + if (!(MEM_P (a) ^ MEM_P (b))) + return false; + bool a_trap = may_trap_or_fault_p (a); + bool b_trap = may_trap_or_fault_p (b); + + if (!(a_trap ^ b_trap)) + return false; + if (a_trap && !MEM_P (a)) + return false; + if (b_trap && !MEM_P (b)) + return false; + + rtx orig_b; + rtx_insn *insn_a, *insn_b; + bool a_simple = if_info->then_simple; + bool b_simple = if_info->else_simple; + basic_block then_bb = if_info->then_bb; + basic_block else_bb = if_info->else_bb; + rtx target; + enum rtx_code code; + rtx cond = if_info->cond; + rtx_insn *ifcvt_seq; + + /* if (test) x = *a; else x = c - d; + => x = c - d; + if (test) + x = *a; + */ + + code = GET_CODE (cond); + insn_a = if_info->insn_a; + insn_b = if_info->insn_b; + machine_mode x_mode = GET_MODE (x); + + /* Because we only handle one trap MEM + other non_trap, non mem cases, + just move one trap MEM always in then_bb. */ + if (noce_reversed_cond_code (if_info) != UNKNOWN) + { + bool reversep = false; + if (b_trap) + reversep = true; + + if (reversep) + { + if (if_info->rev_cond) + { + cond = if_info->rev_cond; + code = GET_CODE (cond); + } + else + code = reversed_comparison_code (cond, if_info->jump); + std::swap (a, b); + std::swap (insn_a, insn_b); + std::swap (a_simple, b_simple); + std::swap (then_bb, else_bb); + } + } + + if (then_bb && else_bb + && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) + || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) + return false; + + start_sequence (); + + /* If one of the blocks is empty then the corresponding B or A value + came from the test block. The non-empty complex block that we will + emit might clobber the register used by B or A, so move it to a pseudo + first. */ + + rtx tmp_b = NULL_RTX; + + /* Don't move trap mem to a pseudo. */ + if (!may_trap_or_fault_p (b) && (b_simple || !else_bb)) + tmp_b = gen_reg_rtx (x_mode); + + orig_b = b; + + rtx emit_a = NULL_RTX; + rtx emit_b = NULL_RTX; + rtx_insn *tmp_insn = NULL; + bool modified_in_a = false; + bool modified_in_b = false; + /* If either operand is complex, load it into a register first. + The best way to do this is to copy the original insn. In this + way we preserve any clobbers etc that the insn may have had. + This is of course not possible in the IS_MEM case. */ + + if (! general_operand (b, GET_MODE (b)) || tmp_b) + { + if (insn_b) + { + b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b)); + rtx_insn *copy_of_b = as_a <rtx_insn *> (copy_rtx (insn_b)); + rtx set = single_set (copy_of_b); + + SET_DEST (set) = b; + emit_b = PATTERN (copy_of_b); + } + else + { + rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b)); + emit_b = gen_rtx_SET (tmp_reg, b); + b = tmp_reg; + } + } + + if (tmp_b && then_bb) + { + FOR_BB_INSNS (then_bb, tmp_insn) + /* Don't check inside insn_a. We will have changed it to emit_a + with a destination that doesn't conflict. */ + if (!(insn_a && tmp_insn == insn_a) + && modified_in_p (orig_b, tmp_insn)) + { + modified_in_a = true; + break; + } + + } + + modified_in_b = emit_b != NULL_RTX && modified_in_p (a, emit_b); + /* If insn to set up A clobbers any registers B depends on, try to + swap insn that sets up A with the one that sets up B. If even + that doesn't help, punt. */ + if (modified_in_a && !modified_in_b) + { + if (!noce_emit_bb (emit_b, else_bb, b_simple)) + goto end_seq_and_fail; + + if (!noce_emit_bb (emit_a, then_bb, a_simple)) + goto end_seq_and_fail; + } + else if (!modified_in_a) + { + if (!noce_emit_bb (emit_b, else_bb, b_simple)) + goto end_seq_and_fail; + + if (!noce_emit_bb (emit_a, then_bb, a_simple)) + goto end_seq_and_fail; + } + else + goto end_seq_and_fail; + + target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1), + a, b); + + if (! target) + goto end_seq_and_fail; + + if (target != x) + noce_emit_move_insn (x, target); + + ifcvt_seq = end_ifcvt_sequence (if_info); + if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info)) + return false; + + emit_insn_before_setloc (ifcvt_seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + if_info->transform_name = "noce_try_cmove_load_mem_notrap"; + return true; + + end_seq_and_fail: + end_sequence (); + return false; +} + +static bool +noce_try_cmove_store_mem_notrap (struct noce_if_info *if_info, rtx *x_ptr, rtx orig_x) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx x = orig_x; + machine_mode x_mode = GET_MODE (x); + + if (!MEM_P (x) || !rtx_equal_p (x, b)) + return false; + if (!may_trap_or_fault_p (x)) + return false; + if (!if_info->then_simple || !register_operand (a, x_mode)) + return false; + + rtx cond = if_info->cond; + enum rtx_code code = GET_CODE (cond); + rtx_insn *ifcvt_seq; + + start_sequence (); + + rtx target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1), + a, b); + + if (! target) + goto end_seq_and_fail; + + if (target != x) + noce_emit_move_insn (x, target); + + ifcvt_seq = end_ifcvt_sequence (if_info); + if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info)) + return false; + + emit_insn_before_setloc (ifcvt_seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + if_info->transform_name = "noce_try_cmove_load_mem_notrap"; + if_info->x = orig_x; + *x_ptr = orig_x; + return true; + + end_seq_and_fail: + end_sequence (); + return false; +} + /* For most cases, the simplified condition we found is the best choice, but this is not the case for the min/max/abs transforms. For these we wish to know that it is A or B in the condition. */ @@ -4055,6 +4284,8 @@ noce_process_if_block (struct noce_if_info *if_info) the lifetime of hard registers on small register class machines. */ orig_x = x; if_info->orig_x = orig_x; + bool have_cfmovcc = (optab_handler (cfmovcc_optab, GET_MODE (orig_x)) + != CODE_FOR_nothing); if (!REG_P (x) || (HARD_REGISTER_P (x) && targetm.small_register_classes_for_mode_p (GET_MODE (x)))) @@ -4121,12 +4352,21 @@ noce_process_if_block (struct noce_if_info *if_info) } if (!set_b && MEM_P (orig_x)) + { + /* Conditional_move_suppress_fault for condition mem store would not + move any arithmetic calculations. */ + if (have_cfmovcc + && HAVE_conditional_move + && noce_try_cmove_store_mem_notrap (if_info, &x, orig_x)) + goto success; + else /* We want to avoid store speculation to avoid cases like if (pthread_mutex_trylock(mutex)) ++global_variable; Rather than go to much effort here, we rely on the SSA optimizers, which do a good enough job these days. */ - return false; + return false; + } if (noce_try_move (if_info)) goto success; @@ -4160,6 +4400,10 @@ noce_process_if_block (struct noce_if_info *if_info) if (HAVE_conditional_move && noce_try_cmove_arith (if_info)) goto success; + if (HAVE_conditional_move + && have_cfmovcc + && noce_try_cmove_load_mem_notrap (if_info)) + goto success; if (noce_try_sign_mask (if_info)) goto success; } diff --git a/gcc/optabs.def b/gcc/optabs.def index bc2611abdc2..49335ec3212 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -540,3 +540,4 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a") OPTAB_D (len_load_optab, "len_load_$a") OPTAB_D (len_store_optab, "len_store_$a") OPTAB_D (select_vl_optab, "select_vl$a") +OPTAB_D (cfmovcc_optab, "cfmov$acc") -- 2.31.1