Hi,

Many thanks to Richard for the suggestion that conditional load is like a 
scalar instance of maskload_optab . So this version has use maskload and 
maskstore optab to expand and generate cfcmov in ifcvt pass.

All the changes passed bootstrap & regtest x86-64-pc-linux-gnu.
We also tested spec with SDE and passed the runtime test.

Ok for trunk?

APX CFCMOV[1] feature implements conditionally faulting which means that all 
memory faults are suppressed when the condition code evaluates to false and 
load or store a memory operand. Now we could load or store a memory operand may 
trap or fault for conditional move.

In middle-end, now we don't support a conditional move if we knew that a load 
from A or B could trap or fault. To enable CFCMOV, use mask_load and mask_store 
to expand.

Conditional move suppress_fault for condition mem store would not move any 
arithmetic calculations. For condition mem load now just support a conditional 
move one trap mem and one no trap and no mem cases.

[1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.html

gcc/ChangeLog:

        * ifcvt.cc (can_use_scalar_mask_store): New func for conditional
        faulting movcc for store.
        (can_use_scalar_mask_load_store):  New func for conditional faulting.
        (noce_try_cmove_arith): Try to convert to conditional faulting
        movcc.
        (noce_process_if_block): Ditto.
        * optabs.cc (emit_conditional_move): Handle cfmovcc.
        (emit_conditional_move_1): Ditto.
---
 gcc/ifcvt.cc  | 105 +++++++++++++++++++++++++++++++++++++++++++++-----
 gcc/optabs.cc |  20 ++++++++++
 2 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 74f13a637b2..b3adee35ff5 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -778,6 +778,8 @@ static bool noce_try_store_flag_mask (struct noce_if_info 
*);
 static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
                            rtx, rtx, rtx, rtx = NULL, rtx = NULL);
 static bool noce_try_cmove (struct noce_if_info *);
+static bool can_use_scalar_mask_store (rtx, rtx, rtx, bool);
+static bool can_use_scalar_mask_load_store (struct noce_if_info *);
 static bool noce_try_cmove_arith (struct noce_if_info *);
 static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
 static bool noce_try_minmax (struct noce_if_info *);
@@ -2132,6 +2134,54 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
   return true;
 }
 
+/* Return TRUE if we could convert "if (test) *x = a; else skip" to
+   scalar mask store and could do conditional faulting movcc, i.e.
+   x86 cfcmov, especially when store x may cause memmory faults and
+   in else_bb x == b.  */
+
+static bool
+can_use_scalar_mask_store (rtx x, rtx a, rtx b, bool a_simple)
+{
+  gcc_assert (MEM_P (x));
+
+  machine_mode x_mode = GET_MODE (x);
+  if (convert_optab_handler (maskstore_optab, x_mode,
+                            x_mode) == CODE_FOR_nothing)
+    return false;
+
+  if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
+    return false;
+  if (!a_simple || !register_operand (a, x_mode))
+    return false;
+
+  return true;
+}
+
+/* Return TRUE if backend supports scalar maskload_optab/maskstore_optab,
+   which suppressed memory faults when load or store a memory operand
+   and the condition code evaluates to false.  */
+
+static bool
+can_use_scalar_mask_load_store (struct noce_if_info *if_info)
+{
+  rtx a = if_info->a;
+  rtx b = if_info->b;
+  rtx x = if_info->x;
+
+  if (!MEM_P (a) && !MEM_P (b))
+    return false;
+
+  if (MEM_P (x))
+    return can_use_scalar_mask_store (x, a, b, if_info->then_simple);
+  else
+    /* Return TRUE if backend supports scalar maskload_optab, we could convert
+       "if (test) x = *a; else x = b;" or "if (test) x = a; else x = *b;"
+       to conditional faulting movcc, i.e. x86 cfcmov, especially when load a
+       or b may cause memmory faults.  */
+    return convert_optab_handler (maskstore_optab, GET_MODE (a),
+                                 GET_MODE (a)) != CODE_FOR_nothing;
+}
+
 /* Try more complex cases involving conditional_move.  */
 
 static bool
@@ -2171,7 +2221,17 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   /* ??? We could handle this if we knew that a load from A or B could
      not trap or fault.  This is also true if we've already loaded
      from the address along the path from ENTRY.  */
-  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
+  /* Just wait cse_not_expected, then convert to conditional mov on their
+     addresses followed by a load.  */
+  else if (may_trap_or_fault_p (a) && may_trap_or_fault_p (b))
+    return false;
+  /* Scalar maskload_optab/maskstore_optab implements conditionally faulting
+     which means that if the condition code evaluates to false, all memory
+     faults are suppressed when load or store a memory operand.  Now we could
+     load or store a memory operand may trap or fault for conditional
+     move.  */
+  else if ((may_trap_or_fault_p (a) ^ may_trap_or_fault_p (b))
+          && !can_use_scalar_mask_load_store (if_info))
     return false;
 
   /* if (test) x = a + b; else x = c - d;
@@ -2247,9 +2307,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   /* If either operand is complex, load it into a register first.
      The best way to do this is to copy the original insn.  In this
      way we preserve any clobbers etc that the insn may have had.
-     This is of course not possible in the IS_MEM case.  */
+     This is of course not possible in the IS_MEM case.
+     For load or store a operands may trap or fault, should not
+     hoist the load or store, otherwise it unable to suppress memory
+     fault, it just a normal arithmetic insn insteads of conditional
+     faulting movcc.  */
 
-  if (! general_operand (a, GET_MODE (a)) || tmp_a)
+  if (! may_trap_or_fault_p (a)
+      && (! general_operand (a, GET_MODE (a)) || tmp_a))
     {
 
       if (is_mem)
@@ -2278,7 +2343,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
        }
     }
 
-  if (! general_operand (b, GET_MODE (b)) || tmp_b)
+  if (! may_trap_or_fault_p (b)
+      && (! general_operand (b, GET_MODE (b)) || tmp_b))
     {
       if (is_mem)
        {
@@ -4210,12 +4276,31 @@ noce_process_if_block (struct noce_if_info *if_info)
     }
 
   if (!set_b && MEM_P (orig_x))
-    /* We want to avoid store speculation to avoid cases like
-        if (pthread_mutex_trylock(mutex))
-          ++global_variable;
-       Rather than go to much effort here, we rely on the SSA optimizers,
-       which do a good enough job these days.  */
-    return false;
+    {
+      /* When target support conditional faulting movcc, i.e. x86 cfcmov,
+        we could do conditonal mem store for "if (...) *x = a; else skip"
+        to maskstore_optab, which x may trap or fault.  */
+      if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
+                                 GET_MODE (orig_x)) != CODE_FOR_nothing)
+         && HAVE_conditional_move
+         && may_trap_or_fault_p (orig_x)
+         && register_operand (a, GET_MODE (orig_x)))
+       {
+         x = orig_x;
+         if_info->x = x;
+         if (noce_try_cmove_arith (if_info))
+           goto success;
+         else
+           return false;
+       }
+      /* We want to avoid store speculation to avoid cases like
+          if (pthread_mutex_trylock(mutex))
+            ++global_variable;
+        Rather than go to much effort here, we rely on the SSA optimizers,
+        which do a good enough job these days.  */
+      else
+       return false;
+    }
 
   if (noce_try_move (if_info))
     goto success;
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 03ef0c5d81d..524c766d336 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5085,6 +5085,16 @@ emit_conditional_move (rtx target, struct rtx_comparison 
comp,
 
   icode = direct_optab_handler (movcc_optab, mode);
 
+  if (may_trap_or_fault_p (target) && MEM_P (target)
+      && convert_optab_handler (maskstore_optab, mode,
+                               mode) != CODE_FOR_nothing)
+    icode =  convert_optab_handler (maskstore_optab, mode, mode);
+  else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
+          && (MEM_P (op2) || MEM_P (op3))
+          && convert_optab_handler (maskload_optab,
+                                    mode, mode) != CODE_FOR_nothing)
+    icode =  convert_optab_handler (maskload_optab, mode, mode);
+
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;
 
@@ -5217,6 +5227,16 @@ emit_conditional_move_1 (rtx target, rtx comparison,
 
   icode = direct_optab_handler (movcc_optab, mode);
 
+  if (may_trap_or_fault_p (target) && MEM_P (target)
+      && convert_optab_handler (maskstore_optab, mode,
+                               mode) != CODE_FOR_nothing)
+    icode =  convert_optab_handler (maskstore_optab, mode, mode);
+  else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
+           && (MEM_P (op2) || MEM_P (op3))
+           && convert_optab_handler (maskload_optab,
+                                     mode, mode) != CODE_FOR_nothing)
+    icode =  convert_optab_handler (maskload_optab, mode, mode);
+
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;
 
-- 
2.31.1

Reply via email to