Ping^2
Hongyu Wang <wwwhhhyyy...@gmail.com> 于2024年11月21日周四 11:04写道: > > Gently ping, it would be appreciate if anyone can help review this. > We hope this patch will not miss GCC15 for complete support on APX. > > Kong, Lingling <lingling.k...@intel.com> 于2024年11月14日周四 09:50写道: > > > > > Hi, > > > > Many thanks to Richard for the suggestion that conditional load is like a > > scalar instance of maskload_optab . So this version has use maskload and > > maskstore optab to expand and generate cfcmov in ifcvt pass. > > > > All the changes passed bootstrap & regtest x86-64-pc-linux-gnu. > > We also tested spec with SDE and passed the runtime test. > > > > Ok for trunk? > > > > APX CFCMOV[1] feature implements conditionally faulting which means that > > all memory faults are suppressed when the condition code evaluates to false > > and load or store a memory operand. Now we could load or store a memory > > operand may trap or fault for conditional move. > > > > In middle-end, now we don't support a conditional move if we knew that a > > load from A or B could trap or fault. To enable CFCMOV, use mask_load and > > mask_store to expand. > > > > Conditional move suppress_fault for condition mem store would not move any > > arithmetic calculations. For condition mem load now just support a > > conditional move one trap mem and one no trap and no mem cases. > > > > [1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.html > > > > gcc/ChangeLog: > > > > * ifcvt.cc (can_use_scalar_mask_store): New func for conditional > > faulting movcc for store. > > (can_use_scalar_mask_load_store): New func for conditional > > faulting. > > (noce_try_cmove_arith): Try to convert to conditional faulting > > movcc. > > (noce_process_if_block): Ditto. > > * optabs.cc (emit_conditional_move): Handle cfmovcc. > > (emit_conditional_move_1): Ditto. > > --- > > gcc/ifcvt.cc | 105 +++++++++++++++++++++++++++++++++++++++++++++----- > > gcc/optabs.cc | 20 ++++++++++ > > 2 files changed, 115 insertions(+), 10 deletions(-) > > > > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc > > index 74f13a637b2..b3adee35ff5 100644 > > --- a/gcc/ifcvt.cc > > +++ b/gcc/ifcvt.cc > > @@ -778,6 +778,8 @@ static bool noce_try_store_flag_mask (struct > > noce_if_info *); > > static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx, > > rtx, rtx, rtx, rtx = NULL, rtx = NULL); > > static bool noce_try_cmove (struct noce_if_info *); > > +static bool can_use_scalar_mask_store (rtx, rtx, rtx, bool); > > +static bool can_use_scalar_mask_load_store (struct noce_if_info *); > > static bool noce_try_cmove_arith (struct noce_if_info *); > > static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn > > **); > > static bool noce_try_minmax (struct noce_if_info *); > > @@ -2132,6 +2134,54 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool > > simple) > > return true; > > } > > > > +/* Return TRUE if we could convert "if (test) *x = a; else skip" to > > + scalar mask store and could do conditional faulting movcc, i.e. > > + x86 cfcmov, especially when store x may cause memmory faults and > > + in else_bb x == b. */ > > + > > +static bool > > +can_use_scalar_mask_store (rtx x, rtx a, rtx b, bool a_simple) > > +{ > > + gcc_assert (MEM_P (x)); > > + > > + machine_mode x_mode = GET_MODE (x); > > + if (convert_optab_handler (maskstore_optab, x_mode, > > + x_mode) == CODE_FOR_nothing) > > + return false; > > + > > + if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x)) > > + return false; > > + if (!a_simple || !register_operand (a, x_mode)) > > + return false; > > + > > + return true; > > +} > > + > > +/* Return TRUE if backend supports scalar maskload_optab/maskstore_optab, > > + which suppressed memory faults when load or store a memory operand > > + and the condition code evaluates to false. */ > > + > > +static bool > > +can_use_scalar_mask_load_store (struct noce_if_info *if_info) > > +{ > > + rtx a = if_info->a; > > + rtx b = if_info->b; > > + rtx x = if_info->x; > > + > > + if (!MEM_P (a) && !MEM_P (b)) > > + return false; > > + > > + if (MEM_P (x)) > > + return can_use_scalar_mask_store (x, a, b, if_info->then_simple); > > + else > > + /* Return TRUE if backend supports scalar maskload_optab, we could > > convert > > + "if (test) x = *a; else x = b;" or "if (test) x = a; else x = *b;" > > + to conditional faulting movcc, i.e. x86 cfcmov, especially when > > load a > > + or b may cause memmory faults. */ > > + return convert_optab_handler (maskstore_optab, GET_MODE (a), > > + GET_MODE (a)) != CODE_FOR_nothing; > > +} > > + > > /* Try more complex cases involving conditional_move. */ > > > > static bool > > @@ -2171,7 +2221,17 @@ noce_try_cmove_arith (struct noce_if_info *if_info) > > /* ??? We could handle this if we knew that a load from A or B could > > not trap or fault. This is also true if we've already loaded > > from the address along the path from ENTRY. */ > > - else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b)) > > + /* Just wait cse_not_expected, then convert to conditional mov on their > > + addresses followed by a load. */ > > + else if (may_trap_or_fault_p (a) && may_trap_or_fault_p (b)) > > + return false; > > + /* Scalar maskload_optab/maskstore_optab implements conditionally > > faulting > > + which means that if the condition code evaluates to false, all memory > > + faults are suppressed when load or store a memory operand. Now we > > could > > + load or store a memory operand may trap or fault for conditional > > + move. */ > > + else if ((may_trap_or_fault_p (a) ^ may_trap_or_fault_p (b)) > > + && !can_use_scalar_mask_load_store (if_info)) > > return false; > > > > /* if (test) x = a + b; else x = c - d; > > @@ -2247,9 +2307,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info) > > /* If either operand is complex, load it into a register first. > > The best way to do this is to copy the original insn. In this > > way we preserve any clobbers etc that the insn may have had. > > - This is of course not possible in the IS_MEM case. */ > > + This is of course not possible in the IS_MEM case. > > + For load or store a operands may trap or fault, should not > > + hoist the load or store, otherwise it unable to suppress memory > > + fault, it just a normal arithmetic insn insteads of conditional > > + faulting movcc. */ > > > > - if (! general_operand (a, GET_MODE (a)) || tmp_a) > > + if (! may_trap_or_fault_p (a) > > + && (! general_operand (a, GET_MODE (a)) || tmp_a)) > > { > > > > if (is_mem) > > @@ -2278,7 +2343,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info) > > } > > } > > > > - if (! general_operand (b, GET_MODE (b)) || tmp_b) > > + if (! may_trap_or_fault_p (b) > > + && (! general_operand (b, GET_MODE (b)) || tmp_b)) > > { > > if (is_mem) > > { > > @@ -4210,12 +4276,31 @@ noce_process_if_block (struct noce_if_info *if_info) > > } > > > > if (!set_b && MEM_P (orig_x)) > > - /* We want to avoid store speculation to avoid cases like > > - if (pthread_mutex_trylock(mutex)) > > - ++global_variable; > > - Rather than go to much effort here, we rely on the SSA optimizers, > > - which do a good enough job these days. */ > > - return false; > > + { > > + /* When target support conditional faulting movcc, i.e. x86 cfcmov, > > + we could do conditonal mem store for "if (...) *x = a; else skip" > > + to maskstore_optab, which x may trap or fault. */ > > + if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x), > > + GET_MODE (orig_x)) != CODE_FOR_nothing) > > + && HAVE_conditional_move > > + && may_trap_or_fault_p (orig_x) > > + && register_operand (a, GET_MODE (orig_x))) > > + { > > + x = orig_x; > > + if_info->x = x; > > + if (noce_try_cmove_arith (if_info)) > > + goto success; > > + else > > + return false; > > + } > > + /* We want to avoid store speculation to avoid cases like > > + if (pthread_mutex_trylock(mutex)) > > + ++global_variable; > > + Rather than go to much effort here, we rely on the SSA optimizers, > > + which do a good enough job these days. */ > > + else > > + return false; > > + } > > > > if (noce_try_move (if_info)) > > goto success; > > diff --git a/gcc/optabs.cc b/gcc/optabs.cc > > index 03ef0c5d81d..524c766d336 100644 > > --- a/gcc/optabs.cc > > +++ b/gcc/optabs.cc > > @@ -5085,6 +5085,16 @@ emit_conditional_move (rtx target, struct > > rtx_comparison comp, > > > > icode = direct_optab_handler (movcc_optab, mode); > > > > + if (may_trap_or_fault_p (target) && MEM_P (target) > > + && convert_optab_handler (maskstore_optab, mode, > > + mode) != CODE_FOR_nothing) > > + icode = convert_optab_handler (maskstore_optab, mode, mode); > > + else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3)) > > + && (MEM_P (op2) || MEM_P (op3)) > > + && convert_optab_handler (maskload_optab, > > + mode, mode) != CODE_FOR_nothing) > > + icode = convert_optab_handler (maskload_optab, mode, mode); > > + > > if (icode == CODE_FOR_nothing) > > return NULL_RTX; > > > > @@ -5217,6 +5227,16 @@ emit_conditional_move_1 (rtx target, rtx comparison, > > > > icode = direct_optab_handler (movcc_optab, mode); > > > > + if (may_trap_or_fault_p (target) && MEM_P (target) > > + && convert_optab_handler (maskstore_optab, mode, > > + mode) != CODE_FOR_nothing) > > + icode = convert_optab_handler (maskstore_optab, mode, mode); > > + else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3)) > > + && (MEM_P (op2) || MEM_P (op3)) > > + && convert_optab_handler (maskload_optab, > > + mode, mode) != CODE_FOR_nothing) > > + icode = convert_optab_handler (maskload_optab, mode, mode); > > + > > if (icode == CODE_FOR_nothing) > > return NULL_RTX; > > > > -- > > 2.31.1 > >