From: Lingling Kong <lingling.k...@intel.com> Hi,
Appreciated to Richard's review, the v5 patch contaings below change: 1. Separate the maskload/maskstore emit out from noce_emit_cmove, add a new function emit_mask_load_store in optabs.cc. 2. Follow the operand order of maskload and maskstore optab and takes cond as predicate operand with VOIDmode. 3. Cache may_trap_or_fault_p and correct the logic to ensure only one of cmove source operand can be a may_trap_or_fault memory. Bootstrapped & regtested on x86-64-pclinux-gnu. OK for trunk? APX CFCMOV feature implements conditionally faulting which means that all memory faults are suppressed when the condition code evaluates to false and load or store a memory operand. Now we could load or store a memory operand may trap or fault for conditional move. In middle-end, now we don't support a conditional move if we knew that a load from A or B could trap or fault. To enable CFCMOV, we use mask_load and mask_store as a proxy for backend expander. The predicate of mask_load/mask_store is recognized as comparison rtx in the inital implementation. Conditional move suppress_fault for condition mem store would not move any arithmetic calculations. For condition mem load now just support a conditional move one trap mem and one no trap and no mem cases. gcc/ChangeLog: * ifcvt.cc (can_use_mask_load_store): New function to check wheter conditional fault load store . (noce_try_cmove_arith): Relax the condition for operand may_trap_or_fault check, expand with mask_load/mask_store optab for one of the cmove operand may trap or fault. (noce_process_if_block): Allow trap_or_fault dest for "if (...)" *x = a; else skip" scenario when mask_store optab is available. * optabs.h (emit_mask_load_store): New declaration. * optabs.cc (emit_mask_load_store): New function to emit conditional move with mask_load/mask_store optab. --- gcc/ifcvt.cc | 110 ++++++++++++++++++++++++++++++++++++++++++-------- gcc/optabs.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/optabs.h | 3 ++ 3 files changed, 200 insertions(+), 16 deletions(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index cb5597bc171..51ac398aee1 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -778,6 +778,7 @@ static bool noce_try_store_flag_mask (struct noce_if_info *); static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx, rtx, rtx, rtx, rtx = NULL, rtx = NULL); static bool noce_try_cmove (struct noce_if_info *); +static bool can_use_mask_load_store (struct noce_if_info *); static bool noce_try_cmove_arith (struct noce_if_info *); static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **); static bool noce_try_minmax (struct noce_if_info *); @@ -2132,6 +2133,39 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple) return true; } +/* Return TRUE if backend supports scalar maskload_optab + or maskstore_optab, who suppresses memory faults when trying to + load or store a memory operand and the condition code evaluates + to false. + Currently the following forms + "if (test) *x = a; else skip;" --> mask_store + "if (test) x = *a; else x = b;" --> mask_load + "if (test) x = a; else x = *b;" --> mask_load + are supported. */ + +static bool +can_use_mask_load_store (struct noce_if_info *if_info) +{ + rtx b = if_info->b; + rtx x = if_info->x; + rtx cond = if_info->cond; + + if (MEM_P (x)) + { + if (convert_optab_handler (maskstore_optab, GET_MODE (x), + GET_MODE (cond)) == CODE_FOR_nothing) + return false; + + if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x)) + return false; + + return true; + } + else + return convert_optab_handler (maskload_optab, GET_MODE (x), + GET_MODE (cond)) != CODE_FOR_nothing; +} + /* Try more complex cases involving conditional_move. */ static bool @@ -2151,6 +2185,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info) enum rtx_code code; rtx cond = if_info->cond; rtx_insn *ifcvt_seq; + bool a_may_trap_or_fault = may_trap_or_fault_p (a); + bool b_may_trap_or_fault = may_trap_or_fault_p (b); + bool use_mask_load_store = false; /* A conditional move from two memory sources is equivalent to a conditional on their addresses followed by a load. Don't do this @@ -2167,11 +2204,22 @@ noce_try_cmove_arith (struct noce_if_info *if_info) x = gen_reg_rtx (address_mode); is_mem = true; } - - /* ??? We could handle this if we knew that a load from A or B could - not trap or fault. This is also true if we've already loaded - from the address along the path from ENTRY. */ - else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b)) + /* We could not handle the case that a and b may both trap or + fault. */ + else if (a_may_trap_or_fault && b_may_trap_or_fault) + return false; + /* Scalar maskload_optab/maskstore_optab implies conditionally + faulting, which means that if the condition mask evaluates to + false, all memory faults are suppressed when load or store a + memory operand. So if scalar_mask_load or store enabled, we could + do the conversion when one of a/b may trap or fault. */ + else if (((MEM_P (a) && a_may_trap_or_fault + && !b_may_trap_or_fault) + || (MEM_P (b) && b_may_trap_or_fault + && !a_may_trap_or_fault)) + && can_use_mask_load_store (if_info)) + use_mask_load_store = true; + else if (a_may_trap_or_fault || b_may_trap_or_fault) return false; /* if (test) x = a + b; else x = c - d; @@ -2212,6 +2260,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info) std::swap (insn_a, insn_b); std::swap (a_simple, b_simple); std::swap (then_bb, else_bb); + std::swap (a_may_trap_or_fault, b_may_trap_or_fault); } } @@ -2247,9 +2296,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info) /* If either operand is complex, load it into a register first. The best way to do this is to copy the original insn. In this way we preserve any clobbers etc that the insn may have had. - This is of course not possible in the IS_MEM case. */ + This is of course not possible in the IS_MEM case. + For load or store a operands may trap or fault, should not + hoist the load or store, otherwise it unable to suppress memory + fault, it just a normal arithmetic insn insteads of conditional + faulting movcc. */ - if (! general_operand (a, GET_MODE (a)) || tmp_a) + if (!a_may_trap_or_fault + && (! general_operand (a, GET_MODE (a)) || tmp_a)) { if (is_mem) @@ -2278,7 +2332,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info) } } - if (! general_operand (b, GET_MODE (b)) || tmp_b) + if (!b_may_trap_or_fault + && (! general_operand (b, GET_MODE (b)) || tmp_b)) { if (is_mem) { @@ -2356,8 +2411,12 @@ noce_try_cmove_arith (struct noce_if_info *if_info) else goto end_seq_and_fail; - target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1), - a, b); + if (use_mask_load_store) + target = emit_mask_load_store (x, code, XEXP (cond, 0), + XEXP (cond, 1), a, b); + else + target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), + XEXP (cond, 1), a, b); if (! target) goto end_seq_and_fail; @@ -4210,12 +4269,31 @@ noce_process_if_block (struct noce_if_info *if_info) } if (!set_b && MEM_P (orig_x)) - /* We want to avoid store speculation to avoid cases like - if (pthread_mutex_trylock(mutex)) - ++global_variable; - Rather than go to much effort here, we rely on the SSA optimizers, - which do a good enough job these days. */ - return false; + { + /* When target support scalar mask_store, i.e. x86 cfcmov, + we could do conditonal mem store for "if (...) *x = a; else skip" + where x may trap or fault. */ + if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x), + GET_MODE (cond)) != CODE_FOR_nothing) + && HAVE_conditional_move + && may_trap_or_fault_p (orig_x) + && register_operand (a, GET_MODE (orig_x))) + { + x = orig_x; + if_info->x = x; + if (noce_try_cmove_arith (if_info)) + goto success; + else + return false; + } + /* We want to avoid store speculation to avoid cases like + if (pthread_mutex_trylock(mutex)) + ++global_variable; + Rather than go to much effort here, we rely on the SSA optimizers, + which do a good enough job these days. */ + else + return false; + } if (noce_try_move (if_info)) goto success; diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 36f2e6af8b5..f862f74fb5e 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -5215,6 +5215,109 @@ emit_conditional_move_1 (rtx target, rtx comparison, return NULL_RTX; } +/* Emit a conditional move instruction using mask_load or mask_store optab, + which allows one of move source or dest be a may_trap_or_falut memory. + The input operands have similar meaning as emit_conditional_move, except + the pred parameter can be a mask predicate. */ + +rtx +emit_mask_load_store (rtx target, enum rtx_code code, rtx cmp_a, + rtx cmp_b, rtx vfalse, rtx vtrue, rtx pred) +{ + enum insn_code icode; + + bool unsignedp = (code == LTU || code == GEU + || code == LEU || code == GTU); + + bool maskstore_p = MEM_P (target); + bool restore_stack = false; + saved_pending_stack_adjust save; + rtx_insn *last = get_last_insn (); + + /* If pred doesn't exist, prepare compare insn using cmp_a and + cmp_b as predicate. */ + if (pred == NULL_RTX) + { + if (! general_operand (cmp_a, GET_MODE (cmp_a)) + || ! general_operand (cmp_b, GET_MODE (cmp_b))) + return NULL_RTX; + + if (swap_commutative_operands_p (cmp_a, cmp_b)) + { + std::swap (cmp_a, cmp_b); + code = swap_condition (code); + } + + /* get_condition will prefer to generate LT and GT even if the old + comparison was against zero, so undo that canonicalization here + since comparisons against zero are cheaper. */ + + if (code == LT && cmp_b == const1_rtx) + code = LE, cmp_b = const0_rtx; + else if (code == GT && cmp_b == constm1_rtx) + code = GE, cmp_b = const0_rtx; + + code = unsignedp ? unsigned_condition (code) : code; + rtx comparison = simplify_gen_relational (code, VOIDmode, + VOIDmode, cmp_a, cmp_b); + + if (!COMPARISON_P (comparison)) + return NULL_RTX; + + save_pending_stack_adjust (&save); + do_pending_stack_adjust (); + + machine_mode cmode = VOIDmode; + prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), + GET_CODE (comparison), NULL_RTX, unsignedp, + OPTAB_WIDEN, &comparison, &cmode); + + if (!comparison) + restore_stack = true; + + pred = comparison; + } + + if (pred) + { + machine_mode op_mode = GET_MODE (target), + cond_mode = GET_MODE (pred); + + if (maskstore_p) + icode = convert_optab_handler (maskstore_optab, op_mode, + cond_mode); + else + icode = convert_optab_handler (maskload_optab, op_mode, + cond_mode); + + if (icode != CODE_FOR_nothing) + { + class expand_operand ops[4]; + int opnum = 3; + + create_output_operand (&ops[0], target, op_mode); + create_input_operand (&ops[1], vtrue, op_mode); + create_fixed_operand (&ops[2], pred); + + if (!maskstore_p) + { + create_input_operand (&ops[3], vfalse, op_mode); + opnum = 4; + } + + if (maybe_expand_insn (icode, opnum, ops)) + return target; + } + } + + if (restore_stack) + { + delete_insns_since (last); + restore_pending_stack_adjust (&save); + } + + return NULL_RTX; +} /* Emit a conditional negate or bitwise complement using the negcc or notcc optabs if available. Return NULL_RTX if such operations diff --git a/gcc/optabs.h b/gcc/optabs.h index 23fa77be24e..6e211d9571a 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -293,6 +293,9 @@ extern void emit_indirect_jump (rtx); rtx emit_conditional_move (rtx, rtx_comparison, rtx, rtx, machine_mode, int); rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode); +/* Emit a mask_load or mask_store operation. */ +rtx emit_mask_load_store (rtx, rtx_code, rtx, rtx, rtx, rtx, rtx = NULL); + /* Emit a conditional negate or bitwise complement operation. */ rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx, rtx, rtx); -- 2.31.1