Ping^2

Hongyu Wang <wwwhhhyyy...@gmail.com> 于2024年11月21日周四 11:04写道:
>
> Gently ping, it would be appreciate if anyone can help review this.
> We hope this patch will not miss GCC15 for complete support on APX.
>
> Kong, Lingling <lingling.k...@intel.com> 于2024年11月14日周四 09:50写道:
>
> >
> > Hi,
> >
> > Many thanks to Richard for the suggestion that conditional load is like a 
> > scalar instance of maskload_optab . So this version has use maskload and 
> > maskstore optab to expand and generate cfcmov in ifcvt pass.
> >
> > All the changes passed bootstrap & regtest x86-64-pc-linux-gnu.
> > We also tested spec with SDE and passed the runtime test.
> >
> > Ok for trunk?
> >
> > APX CFCMOV[1] feature implements conditionally faulting which means that 
> > all memory faults are suppressed when the condition code evaluates to false 
> > and load or store a memory operand. Now we could load or store a memory 
> > operand may trap or fault for conditional move.
> >
> > In middle-end, now we don't support a conditional move if we knew that a 
> > load from A or B could trap or fault. To enable CFCMOV, use mask_load and 
> > mask_store to expand.
> >
> > Conditional move suppress_fault for condition mem store would not move any 
> > arithmetic calculations. For condition mem load now just support a 
> > conditional move one trap mem and one no trap and no mem cases.
> >
> > [1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.html
> >
> > gcc/ChangeLog:
> >
> >         * ifcvt.cc (can_use_scalar_mask_store): New func for conditional
> >         faulting movcc for store.
> >         (can_use_scalar_mask_load_store):  New func for conditional 
> > faulting.
> >         (noce_try_cmove_arith): Try to convert to conditional faulting
> >         movcc.
> >         (noce_process_if_block): Ditto.
> >         * optabs.cc (emit_conditional_move): Handle cfmovcc.
> >         (emit_conditional_move_1): Ditto.
> > ---
> >  gcc/ifcvt.cc  | 105 +++++++++++++++++++++++++++++++++++++++++++++-----
> >  gcc/optabs.cc |  20 ++++++++++
> >  2 files changed, 115 insertions(+), 10 deletions(-)
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index 74f13a637b2..b3adee35ff5 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -778,6 +778,8 @@ static bool noce_try_store_flag_mask (struct 
> > noce_if_info *);
> >  static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
> >                             rtx, rtx, rtx, rtx = NULL, rtx = NULL);
> >  static bool noce_try_cmove (struct noce_if_info *);
> > +static bool can_use_scalar_mask_store (rtx, rtx, rtx, bool);
> > +static bool can_use_scalar_mask_load_store (struct noce_if_info *);
> >  static bool noce_try_cmove_arith (struct noce_if_info *);
> >  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn 
> > **);
> >  static bool noce_try_minmax (struct noce_if_info *);
> > @@ -2132,6 +2134,54 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool 
> > simple)
> >    return true;
> >  }
> >
> > +/* Return TRUE if we could convert "if (test) *x = a; else skip" to
> > +   scalar mask store and could do conditional faulting movcc, i.e.
> > +   x86 cfcmov, especially when store x may cause memmory faults and
> > +   in else_bb x == b.  */
> > +
> > +static bool
> > +can_use_scalar_mask_store (rtx x, rtx a, rtx b, bool a_simple)
> > +{
> > +  gcc_assert (MEM_P (x));
> > +
> > +  machine_mode x_mode = GET_MODE (x);
> > +  if (convert_optab_handler (maskstore_optab, x_mode,
> > +                            x_mode) == CODE_FOR_nothing)
> > +    return false;
> > +
> > +  if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
> > +    return false;
> > +  if (!a_simple || !register_operand (a, x_mode))
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> > +/* Return TRUE if backend supports scalar maskload_optab/maskstore_optab,
> > +   which suppressed memory faults when load or store a memory operand
> > +   and the condition code evaluates to false.  */
> > +
> > +static bool
> > +can_use_scalar_mask_load_store (struct noce_if_info *if_info)
> > +{
> > +  rtx a = if_info->a;
> > +  rtx b = if_info->b;
> > +  rtx x = if_info->x;
> > +
> > +  if (!MEM_P (a) && !MEM_P (b))
> > +    return false;
> > +
> > +  if (MEM_P (x))
> > +    return can_use_scalar_mask_store (x, a, b, if_info->then_simple);
> > +  else
> > +    /* Return TRUE if backend supports scalar maskload_optab, we could 
> > convert
> > +       "if (test) x = *a; else x = b;" or "if (test) x = a; else x = *b;"
> > +       to conditional faulting movcc, i.e. x86 cfcmov, especially when 
> > load a
> > +       or b may cause memmory faults.  */
> > +    return convert_optab_handler (maskstore_optab, GET_MODE (a),
> > +                                 GET_MODE (a)) != CODE_FOR_nothing;
> > +}
> > +
> >  /* Try more complex cases involving conditional_move.  */
> >
> >  static bool
> > @@ -2171,7 +2221,17 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
> >    /* ??? We could handle this if we knew that a load from A or B could
> >       not trap or fault.  This is also true if we've already loaded
> >       from the address along the path from ENTRY.  */
> > -  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
> > +  /* Just wait cse_not_expected, then convert to conditional mov on their
> > +     addresses followed by a load.  */
> > +  else if (may_trap_or_fault_p (a) && may_trap_or_fault_p (b))
> > +    return false;
> > +  /* Scalar maskload_optab/maskstore_optab implements conditionally 
> > faulting
> > +     which means that if the condition code evaluates to false, all memory
> > +     faults are suppressed when load or store a memory operand.  Now we 
> > could
> > +     load or store a memory operand may trap or fault for conditional
> > +     move.  */
> > +  else if ((may_trap_or_fault_p (a) ^ may_trap_or_fault_p (b))
> > +          && !can_use_scalar_mask_load_store (if_info))
> >      return false;
> >
> >    /* if (test) x = a + b; else x = c - d;
> > @@ -2247,9 +2307,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
> >    /* If either operand is complex, load it into a register first.
> >       The best way to do this is to copy the original insn.  In this
> >       way we preserve any clobbers etc that the insn may have had.
> > -     This is of course not possible in the IS_MEM case.  */
> > +     This is of course not possible in the IS_MEM case.
> > +     For load or store a operands may trap or fault, should not
> > +     hoist the load or store, otherwise it unable to suppress memory
> > +     fault, it just a normal arithmetic insn insteads of conditional
> > +     faulting movcc.  */
> >
> > -  if (! general_operand (a, GET_MODE (a)) || tmp_a)
> > +  if (! may_trap_or_fault_p (a)
> > +      && (! general_operand (a, GET_MODE (a)) || tmp_a))
> >      {
> >
> >        if (is_mem)
> > @@ -2278,7 +2343,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
> >         }
> >      }
> >
> > -  if (! general_operand (b, GET_MODE (b)) || tmp_b)
> > +  if (! may_trap_or_fault_p (b)
> > +      && (! general_operand (b, GET_MODE (b)) || tmp_b))
> >      {
> >        if (is_mem)
> >         {
> > @@ -4210,12 +4276,31 @@ noce_process_if_block (struct noce_if_info *if_info)
> >      }
> >
> >    if (!set_b && MEM_P (orig_x))
> > -    /* We want to avoid store speculation to avoid cases like
> > -        if (pthread_mutex_trylock(mutex))
> > -          ++global_variable;
> > -       Rather than go to much effort here, we rely on the SSA optimizers,
> > -       which do a good enough job these days.  */
> > -    return false;
> > +    {
> > +      /* When target support conditional faulting movcc, i.e. x86 cfcmov,
> > +        we could do conditonal mem store for "if (...) *x = a; else skip"
> > +        to maskstore_optab, which x may trap or fault.  */
> > +      if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
> > +                                 GET_MODE (orig_x)) != CODE_FOR_nothing)
> > +         && HAVE_conditional_move
> > +         && may_trap_or_fault_p (orig_x)
> > +         && register_operand (a, GET_MODE (orig_x)))
> > +       {
> > +         x = orig_x;
> > +         if_info->x = x;
> > +         if (noce_try_cmove_arith (if_info))
> > +           goto success;
> > +         else
> > +           return false;
> > +       }
> > +      /* We want to avoid store speculation to avoid cases like
> > +          if (pthread_mutex_trylock(mutex))
> > +            ++global_variable;
> > +        Rather than go to much effort here, we rely on the SSA optimizers,
> > +        which do a good enough job these days.  */
> > +      else
> > +       return false;
> > +    }
> >
> >    if (noce_try_move (if_info))
> >      goto success;
> > diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> > index 03ef0c5d81d..524c766d336 100644
> > --- a/gcc/optabs.cc
> > +++ b/gcc/optabs.cc
> > @@ -5085,6 +5085,16 @@ emit_conditional_move (rtx target, struct 
> > rtx_comparison comp,
> >
> >    icode = direct_optab_handler (movcc_optab, mode);
> >
> > +  if (may_trap_or_fault_p (target) && MEM_P (target)
> > +      && convert_optab_handler (maskstore_optab, mode,
> > +                               mode) != CODE_FOR_nothing)
> > +    icode =  convert_optab_handler (maskstore_optab, mode, mode);
> > +  else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
> > +          && (MEM_P (op2) || MEM_P (op3))
> > +          && convert_optab_handler (maskload_optab,
> > +                                    mode, mode) != CODE_FOR_nothing)
> > +    icode =  convert_optab_handler (maskload_optab, mode, mode);
> > +
> >    if (icode == CODE_FOR_nothing)
> >      return NULL_RTX;
> >
> > @@ -5217,6 +5227,16 @@ emit_conditional_move_1 (rtx target, rtx comparison,
> >
> >    icode = direct_optab_handler (movcc_optab, mode);
> >
> > +  if (may_trap_or_fault_p (target) && MEM_P (target)
> > +      && convert_optab_handler (maskstore_optab, mode,
> > +                               mode) != CODE_FOR_nothing)
> > +    icode =  convert_optab_handler (maskstore_optab, mode, mode);
> > +  else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
> > +           && (MEM_P (op2) || MEM_P (op3))
> > +           && convert_optab_handler (maskload_optab,
> > +                                     mode, mode) != CODE_FOR_nothing)
> > +    icode =  convert_optab_handler (maskload_optab, mode, mode);
> > +
> >    if (icode == CODE_FOR_nothing)
> >      return NULL_RTX;
> >
> > --
> > 2.31.1
> >

Reply via email to