From: konglin1 <lingling.k...@intel.com<mailto:lingling.k...@intel.com>>
APX CFCMOV feature implements conditionally faulting which means that all memory faults are suppressed when the condition code evaluates to false and load or store a memory operand. Now we could load or store a memory operand may trap or fault for conditional move. To enable CFCMOV, we add a target HOOK TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP in if-conversion pass to allow convert to cmov. Bootstrapped & regtested on x86-64-pc-linux-gnu with binutils 2.42 branch. OK for trunk? gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_can_cfcmov_p): New function that test if the cfcmov can be generated. (ix86_expand_int_movcc): Expand to cfcmov pattern if ix86_can_cfcmov_p return ture. * config/i386/i386-opts.h (enum apx_features): Add apx_cfcmov. * config/i386/i386.cc (ix86_have_conditional_move_mem_notrap): New function to hook TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP (TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP): Target hook define. (ix86_rtx_costs): Add UNSPEC_APX_CFCMOV cost; * config/i386/i386.h (TARGET_APX_CFCMOV): Define. * config/i386/i386.md (*cfcmov<mode>_1): New define_insn to support cfcmov. (*cfcmov<mode>_2): Ditto. (UNSPEC_APX_CFCMOV): New unspec for cfcmov. * config/i386/i386.opt: Add enum value for cfcmov. * ifcvt.cc (noce_try_cmove_load_mem_notrap): Use target hook to allow convert to cfcmov for conditional load. (noce_try_cmove_store_mem_notrap): Convert to conditional store. (noce_process_if_block): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-cfcmov-1.c: New test. * gcc.target/i386/apx-cfcmov-2.c: Ditto. --- gcc/config/i386/i386-expand.cc | 63 +++++ gcc/config/i386/i386-opts.h | 4 +- gcc/config/i386/i386.cc | 33 ++- gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.md | 53 +++- gcc/config/i386/i386.opt | 3 + gcc/config/i386/predicates.md | 7 + gcc/ifcvt.cc | 247 ++++++++++++++++++- gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c | 73 ++++++ gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c | 40 +++ 10 files changed, 511 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 312329e550b..c02a4bcbec3 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -3336,6 +3336,30 @@ ix86_expand_int_addcc (rtx operands[]) return true; } +/* Return TRUE if we could convert "if (test) x = a; else x = b;" to cfcmov, + especially when load a or b or x store may cause memmory faults. */ +bool +ix86_can_cfcmov_p (rtx x, rtx a, rtx b) +{ + machine_mode mode = GET_MODE (x); + if (TARGET_APX_CFCMOV + && (mode == DImode || mode == SImode || mode == HImode)) + { + /* C load (r m r), (r m C), (r r m). For r m m could use + two cfcmov. */ + if (register_operand (x, mode) + && ((MEM_P (a) && register_operand (b, mode)) + || (MEM_P (a) && b == const0_rtx) + || (register_operand (a, mode) && MEM_P (b)) + || (MEM_P (a) && MEM_P (b)))) + return true; + /* C store (m r 0). */ + else if (MEM_P (x) && x == b && register_operand (a, mode)) + return true; + } + return false; +} + bool ix86_expand_int_movcc (rtx operands[]) { @@ -3366,6 +3390,45 @@ ix86_expand_int_movcc (rtx operands[]) compare_code = GET_CODE (compare_op); + if (MEM_P (operands[0]) + && !ix86_can_cfcmov_p (operands[0], op2, op3)) + return false; + + if (may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3)) + { + if (ix86_can_cfcmov_p (operands[0], op2, op3)) + { + if (may_trap_or_fault_p (op2)) + op2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, operands[2]), + UNSPEC_APX_CFCMOV); + if (may_trap_or_fault_p (op3)) + op3 = gen_rtx_UNSPEC (mode, gen_rtvec (1, operands[3]), + UNSPEC_APX_CFCMOV); + emit_insn (compare_seq); + + if (may_trap_or_fault_p (op2) && may_trap_or_fault_p (op3)) + { + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IF_THEN_ELSE (mode, + compare_op, + op2, + operands[0]))); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IF_THEN_ELSE (mode, + compare_op, + operands[0], + op3))); + } + else + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IF_THEN_ELSE (mode, + compare_op, + op2, op3))); + return true; + } + return false; + } + if ((op1 == const0_rtx && (code == GE || code == LT)) || (op1 == constm1_rtx && (code == GT || code == LE))) sign_bit_compare_p = true; diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index c7ec0d9fd39..711519ffb53 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -143,8 +143,10 @@ enum apx_features { apx_nf = 1 << 4, apx_ccmp = 1 << 5, apx_zu = 1 << 6, + apx_cfcmov = 1 << 7, apx_all = apx_egpr | apx_push2pop2 | apx_ndd - | apx_ppx | apx_nf | apx_ccmp | apx_zu, + | apx_ppx | apx_nf | apx_ccmp | apx_zu + | apx_cfcmov, }; #endif diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 173db213d14..b14c0a3d9f2 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22349,10 +22349,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, *total = COSTS_N_INSNS (1); if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0))) *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); - if (!REG_P (XEXP (x, 1))) - *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); - if (!REG_P (XEXP (x, 2))) - *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); + rtx op1, op2; + op1 = XEXP (x, 1); + op2 = XEXP (x, 2); + /* Handle UNSPEC_APX_CFCMOV for cfcmov. */ + if (GET_CODE (op1) == UNSPEC && XINT (op1, 1) == UNSPEC_APX_CFCMOV) + op1 = XVECEXP (op1, 0, 0); + if (GET_CODE (op2) == UNSPEC && XINT (op2, 1) == UNSPEC_APX_CFCMOV) + op2 = XVECEXP (op2, 0, 0); + if (!REG_P (op1)) + *total += rtx_cost (op1, mode, code, 1, speed); + if (!REG_P (op2)) + *total += rtx_cost (op2, mode, code, 2, speed); return true; } return false; @@ -24998,6 +25006,19 @@ ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) return default_noce_conversion_profitable_p (seq, if_info); } + +/* Implement targetm.have_conditional_move_mem_notrap hook. */ +static bool +ix86_have_conditional_move_mem_notrap (rtx x) +{ + machine_mode mode = GET_MODE (x); + if (TARGET_APX_CFCMOV + && (mode == DImode || mode == SImode || mode == HImode) + && MEM_P (x)) + return true; + return false; +} + /* x86-specific vector costs. */ class ix86_vector_costs : public vector_costs { @@ -26975,6 +26996,10 @@ ix86_libgcc_floating_mode_supported_p #undef TARGET_NOCE_CONVERSION_PROFITABLE_P #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p +#undef TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP +#define TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP \ + ix86_have_conditional_move_mem_notrap + #undef TARGET_HARD_REGNO_NREGS #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs #undef TARGET_HARD_REGNO_MODE_OK diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index dc1a1f44320..6a20fa678c8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -58,6 +58,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_APX_NF (ix86_apx_features & apx_nf) #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp) #define TARGET_APX_ZU (ix86_apx_features & apx_zu) +#define TARGET_APX_CFCMOV (ix86_apx_features & apx_cfcmov) #include "config/vxworks-dummy.h" diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fd48e764469..57448c07828 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -221,6 +221,9 @@ ;; For APX CCMP support ;; DFV = default flag value UNSPEC_APX_DFV + + ;; For APX CFCMOV support + UNSPEC_APX_CFCMOV ]) (define_c_enum "unspecv" [ @@ -579,7 +582,7 @@ noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl,noapx_nf" + vaes_avx512vl,noapx_nf,apx_cfcmov" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -986,6 +989,7 @@ (eq_attr "mmx_isa" "avx") (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF") + (eq_attr "isa" "apx_cfcmov") (symbol_ref "TARGET_APX_CFCMOV") ] (const_int 1))) @@ -24995,7 +24999,7 @@ ;; Conditional move instructions. (define_expand "mov<mode>cc" - [(set (match_operand:SWIM 0 "register_operand") + [(set (match_operand:SWIM 0 "register_or_cfc_mem_operand") (if_then_else:SWIM (match_operand 1 "comparison_operator") (match_operand:SWIM 2 "<general_operand>") (match_operand:SWIM 3 "<general_operand>")))] @@ -25103,19 +25107,54 @@ (set (match_dup 0) (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))]) +(define_insn "*cfcmov<mode>_1" + [(set (match_operand:SWI248 0 "register_operand" "=r,r") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (unspec:SWI248 + [(match_operand:SWI248 2 "memory_operand" "m,m")] + UNSPEC_APX_CFCMOV) + (match_operand:SWI248 3 "reg_or_0_operand" "C,r")))] + "TARGET_CMOVE && TARGET_APX_CFCMOV" + "@ + cfcmov%O2%C1\t{%2, %0|%0, %2} + cfcmov%O2%C1\t{%2, %3, %0|%0, %3, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "icmov") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cfcmov<mode>_2" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,m") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "register_operand" "r,r") + (unspec:SWI248 + [(match_operand:SWI248 3 "memory_operand" "m,0")] + UNSPEC_APX_CFCMOV)))] + "TARGET_CMOVE && TARGET_APX_CFCMOV" + "@ + cfcmov%O2%c1\t{%3, %2, %0|%0, %2, %3} + cfcmov%O2%C1\t{%2, %0|%0, %2}" + [(set_attr "isa" "apx_ndd,*") + (set_attr "type" "icmov") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + (define_insn "*mov<mode>cc_noc" - [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r") + [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r") - (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))] + (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r,r") + (match_operand:SWI248 3 "nonimm_or_0_operand" "0,rm,r,rm,C")))] "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3} cmov%O2%C1\t{%2, %3, %0|%0, %3, %2} - cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + cmov%O2%c1\t{%3, %2, %0|%0, %2, %3} + cfcmov%O2%C1\t{%2, %0|%0, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_cfcmov") (set_attr "type" "icmov") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 353fffb2343..7d63d9abd95 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1345,6 +1345,9 @@ Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7) EnumValue Enum(apx_features) String(zu) Value(apx_zu) Set(8) +EnumValue +Enum(apx_features) String(cfcmov) Value(apx_cfcmov) Set(9) + EnumValue Enum(apx_features) String(all) Value(apx_all) Set(1) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 7afe3100cb7..d562e10ab41 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -2322,3 +2322,10 @@ return true; }) + +;; Return true if OP is a register operand or memory_operand is only +;; supported under TARGET_APX_CFCMOV. +(define_predicate "register_or_cfc_mem_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "mem") + (match_test "TARGET_APX_CFCMOV")))) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index 58ed42673e5..6e3e48af810 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx, rtx, rtx, rtx, rtx = NULL, rtx = NULL); static bool noce_try_cmove (struct noce_if_info *); static bool noce_try_cmove_arith (struct noce_if_info *); +static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *); +static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, rtx); static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **); static bool noce_try_minmax (struct noce_if_info *); static bool noce_try_abs (struct noce_if_info *); @@ -2401,6 +2403,237 @@ noce_try_cmove_arith (struct noce_if_info *if_info) return false; } +/* When target support suppress memory fault, try more complex cases involving + conditional_move's source or dest may trap or fault. */ + +static bool +noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx x = if_info->x; + + if (MEM_P (x)) + return false; + /* Just handle a conditional move from one trap MEM + other non_trap, + non mem cases. */ + if (!(MEM_P (a) ^ MEM_P (b))) + return false; + bool a_trap = may_trap_or_fault_p (a); + bool b_trap = may_trap_or_fault_p (b); + + if (!(a_trap ^ b_trap)) + return false; + if (a_trap && (!MEM_P (a) || !targetm.have_conditional_move_mem_notrap (a))) + return false; + if (b_trap && (!MEM_P (b) || !targetm.have_conditional_move_mem_notrap (b))) + return false; + + rtx orig_b; + rtx_insn *insn_a, *insn_b; + bool a_simple = if_info->then_simple; + bool b_simple = if_info->else_simple; + basic_block then_bb = if_info->then_bb; + basic_block else_bb = if_info->else_bb; + rtx target; + enum rtx_code code; + rtx cond = if_info->cond; + rtx_insn *ifcvt_seq; + + /* if (test) x = *a; else x = c - d; + => x = c - d; + if (test) + x = *a; + */ + + code = GET_CODE (cond); + insn_a = if_info->insn_a; + insn_b = if_info->insn_b; + + machine_mode x_mode = GET_MODE (x); + + if (!can_conditionally_move_p (x_mode)) + return false; + + /* Because we only handle one trap MEM + other non_trap, non mem cases, + just move one trap MEM always in then_bb. */ + if (noce_reversed_cond_code (if_info) != UNKNOWN) + { + bool reversep = false; + if (b_trap) + reversep = true; + + if (reversep) + { + if (if_info->rev_cond) + { + cond = if_info->rev_cond; + code = GET_CODE (cond); + } + else + code = reversed_comparison_code (cond, if_info->jump); + std::swap (a, b); + std::swap (insn_a, insn_b); + std::swap (a_simple, b_simple); + std::swap (then_bb, else_bb); + } + } + + if (then_bb && else_bb + && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) + || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) + return false; + + start_sequence (); + + /* If one of the blocks is empty then the corresponding B or A value + came from the test block. The non-empty complex block that we will + emit might clobber the register used by B or A, so move it to a pseudo + first. */ + + rtx tmp_b = NULL_RTX; + + /* Don't move trap mem to a pseudo. */ + if (!may_trap_or_fault_p (b) && (b_simple || !else_bb)) + tmp_b = gen_reg_rtx (x_mode); + + orig_b = b; + + rtx emit_a = NULL_RTX; + rtx emit_b = NULL_RTX; + rtx_insn *tmp_insn = NULL; + bool modified_in_a = false; + bool modified_in_b = false; + /* If either operand is complex, load it into a register first. + The best way to do this is to copy the original insn. In this + way we preserve any clobbers etc that the insn may have had. + This is of course not possible in the IS_MEM case. */ + + if (! general_operand (b, GET_MODE (b)) || tmp_b) + { + if (insn_b) + { + b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b)); + rtx_insn *copy_of_b = as_a <rtx_insn *> (copy_rtx (insn_b)); + rtx set = single_set (copy_of_b); + + SET_DEST (set) = b; + emit_b = PATTERN (copy_of_b); + } + else + { + rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b)); + emit_b = gen_rtx_SET (tmp_reg, b); + b = tmp_reg; + } + } + + if (tmp_b && then_bb) + { + FOR_BB_INSNS (then_bb, tmp_insn) + /* Don't check inside insn_a. We will have changed it to emit_a + with a destination that doesn't conflict. */ + if (!(insn_a && tmp_insn == insn_a) + && modified_in_p (orig_b, tmp_insn)) + { + modified_in_a = true; + break; + } + + } + + modified_in_b = emit_b != NULL_RTX && modified_in_p (a, emit_b); + /* If insn to set up A clobbers any registers B depends on, try to + swap insn that sets up A with the one that sets up B. If even + that doesn't help, punt. */ + if (modified_in_a && !modified_in_b) + { + if (!noce_emit_bb (emit_b, else_bb, b_simple)) + goto end_seq_and_fail; + + if (!noce_emit_bb (emit_a, then_bb, a_simple)) + goto end_seq_and_fail; + } + else if (!modified_in_a) + { + if (!noce_emit_bb (emit_b, else_bb, b_simple)) + goto end_seq_and_fail; + + if (!noce_emit_bb (emit_a, then_bb, a_simple)) + goto end_seq_and_fail; + } + else + goto end_seq_and_fail; + + target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1), + a, b); + + if (! target) + goto end_seq_and_fail; + + if (target != x) + noce_emit_move_insn (x, target); + + ifcvt_seq = end_ifcvt_sequence (if_info); + if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info)) + return false; + + emit_insn_before_setloc (ifcvt_seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + if_info->transform_name = "noce_try_cmove_load_mem_notrap"; + return true; + + end_seq_and_fail: + end_sequence (); + return false; +} + +static bool +noce_try_cmove_store_mem_notrap (struct noce_if_info *if_info, rtx *x_ptr, rtx orig_x) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx x = orig_x; + machine_mode x_mode = GET_MODE (x); + + if (!MEM_P (x) || !rtx_equal_p (x, b)) + return false; + if (!may_trap_or_fault_p (x) || !targetm.have_conditional_move_mem_notrap (x)) + return false; + if (!if_info->then_simple || !register_operand (a, x_mode)) + return false; + + rtx cond = if_info->cond; + enum rtx_code code = GET_CODE (cond); + rtx_insn *ifcvt_seq; + + start_sequence (); + + rtx target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1), + a, b); + + if (! target) + goto end_seq_and_fail; + + if (target != x) + noce_emit_move_insn (x, target); + + ifcvt_seq = end_ifcvt_sequence (if_info); + if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info)) + return false; + + emit_insn_before_setloc (ifcvt_seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + if_info->transform_name = "noce_try_cmove_load_mem_notrap"; + if_info->x = orig_x; + *x_ptr = orig_x; + return true; + + end_seq_and_fail: + end_sequence (); + return false; +} + /* For most cases, the simplified condition we found is the best choice, but this is not the case for the min/max/abs transforms. For these we wish to know that it is A or B in the condition. */ @@ -4121,12 +4354,21 @@ noce_process_if_block (struct noce_if_info *if_info) } if (!set_b && MEM_P (orig_x)) + { + /* Conditional_move_suppress_fault for condition mem store would not + move any arithmetic calculations. */ + if (targetm.have_conditional_move_mem_notrap (orig_x) + && HAVE_conditional_move + && noce_try_cmove_store_mem_notrap (if_info, &x, orig_x)) + goto success; + else /* We want to avoid store speculation to avoid cases like if (pthread_mutex_trylock(mutex)) ++global_variable; Rather than go to much effort here, we rely on the SSA optimizers, which do a good enough job these days. */ - return false; + return false; + } if (noce_try_move (if_info)) goto success; @@ -4160,6 +4402,9 @@ noce_process_if_block (struct noce_if_info *if_info) if (HAVE_conditional_move && noce_try_cmove_arith (if_info)) goto success; + if (HAVE_conditional_move + && noce_try_cmove_load_mem_notrap (if_info)) + goto success; if (noce_try_sign_mask (if_info)) goto success; } diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c new file mode 100644 index 00000000000..4a1fb91b24c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O3 -mapxf" } */ + +/* { dg-final { scan-assembler-times "cfcmovne" 1 } } */ +/* { dg-final { scan-assembler-times "cfcmovg" 2} } */ +/* { dg-final { scan-assembler-times "cfcmove" 1 } } */ +/* { dg-final { scan-assembler-times "cfcmovl" 2 } } */ +/* { dg-final { scan-assembler-times "cfcmovle" 1 } } */ + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_store (int a, int b, int c, int d, int *arr) +{ + if (a != b) + *arr = c; + return d; + +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_ndd (int a, int b, int c, int *p) +{ + if (a > b) + return *p; + return c; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_2_trap (int a, int b, int *c, int *p) +{ + if (a > b) + return *p; + return *c; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_zero (int a, int b, int c) +{ + int sum = 0; + if (a == b) + return c; + return sum; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_mem (int a, int b, int *p) +{ + int sum = 0; + if (a < b ) + sum = *p; + return sum; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_arith_1 (int a, int b, int c, int *p) +{ + int sum = 0; + if (a > b) + sum = *p; + else + sum = a + c; + return sum + 1; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_arith_2 (int a, int b, int c, int *p) +{ + int sum = 0; + if (a > b) + sum = a + c; + else + sum = *p; + return sum + 1; +} diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c new file mode 100644 index 00000000000..2b1660f64fa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c @@ -0,0 +1,40 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target apxf } */ +/* { dg-options "-mapxf -march=x86-64 -O3" } */ + +#include "apx-cfcmov-1.c" + +extern void abort (void); + +int main () +{ + if (!__builtin_cpu_supports ("apxf")) + return 0; + + int arr = 6; + int arr1 = 5; + int res = cfc_store (1, 2, 3, 4, &arr); + if (arr != 3 && res != 4) + abort (); + res = cfc_load_ndd (2, 1, 2, &arr); + if (res != 3) + abort (); + res = cfc_load_2_trap (1, 2, &arr1, &arr); + if (res != 5) + abort (); + res = cfc_load_zero (1, 2, 3); + res = cfc_load_zero (1, 2, 3); + if (res != 0) + abort (); + res = cfc_load_mem (2, 1, &arr); + if (res != 0) + abort (); + res = cfc_load_arith_1 (1, 2, 3, &arr); + if (res != 5) + abort(); + res = cfc_load_arith_2 (2, 1, 3,&arr); + if (res != 6) + abort(); + return 0; +} + -- 2.31.1