From: Lingling Kong <lingling.k...@intel.com>

Hi,

Appreciated to Richard's review, the v5 patch contaings below change:

1. Separate the maskload/maskstore emit out from noce_emit_cmove, add
a new function emit_mask_load_store in optabs.cc.
2. Follow the operand order of maskload and maskstore optab and takes
cond as predicate operand with VOIDmode.
3. Cache may_trap_or_fault_p and correct the logic to ensure only one
of cmove source operand can be a may_trap_or_fault memory.

Bootstrapped & regtested on x86-64-pclinux-gnu.

OK for trunk?

APX CFCMOV feature implements conditionally faulting which means
that all memory faults are suppressed when the condition code
evaluates to false and load or store a memory operand. Now we
could load or store a memory operand may trap or fault for
conditional move.

In middle-end, now we don't support a conditional move if we knew
that a load from A or B could trap or fault. To enable CFCMOV, we
use mask_load and mask_store as a proxy for backend expander. The
predicate of mask_load/mask_store is recognized as comparison rtx
in the inital implementation.

Conditional move suppress_fault for condition mem store would not
move any arithmetic calculations. For condition mem load now just
support a conditional move one trap mem and one no trap and no mem
cases.

gcc/ChangeLog:

        * ifcvt.cc (can_use_mask_load_store):  New function to check
        wheter conditional fault load store .
        (noce_try_cmove_arith): Relax the condition for operand
        may_trap_or_fault check, expand with mask_load/mask_store optab
        for one of the cmove operand may trap or fault.
        (noce_process_if_block): Allow trap_or_fault dest for
        "if (...)" *x = a; else skip" scenario when mask_store optab is
        available.
        * optabs.h (emit_mask_load_store): New declaration.
        * optabs.cc (emit_mask_load_store): New function to emit
        conditional move with mask_load/mask_store optab.
---
 gcc/ifcvt.cc  | 110 ++++++++++++++++++++++++++++++++++++++++++--------
 gcc/optabs.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/optabs.h  |   3 ++
 3 files changed, 200 insertions(+), 16 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index cb5597bc171..51ac398aee1 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -778,6 +778,7 @@ static bool noce_try_store_flag_mask (struct noce_if_info 
*);
 static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
                            rtx, rtx, rtx, rtx = NULL, rtx = NULL);
 static bool noce_try_cmove (struct noce_if_info *);
+static bool can_use_mask_load_store (struct noce_if_info *);
 static bool noce_try_cmove_arith (struct noce_if_info *);
 static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
 static bool noce_try_minmax (struct noce_if_info *);
@@ -2132,6 +2133,39 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
   return true;
 }
 
+/* Return TRUE if backend supports scalar maskload_optab
+   or maskstore_optab, who suppresses memory faults when trying to
+   load or store a memory operand and the condition code evaluates
+   to false.
+   Currently the following forms
+       "if (test) *x = a; else skip;" --> mask_store
+       "if (test) x = *a; else x = b;" --> mask_load
+       "if (test) x = a; else x = *b;" --> mask_load
+   are supported.  */
+
+static bool
+can_use_mask_load_store (struct noce_if_info *if_info)
+{
+  rtx b = if_info->b;
+  rtx x = if_info->x;
+  rtx cond = if_info->cond;
+
+  if (MEM_P (x))
+    {
+      if (convert_optab_handler (maskstore_optab, GET_MODE (x),
+                                GET_MODE (cond)) == CODE_FOR_nothing)
+       return false;
+
+      if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
+       return false;
+
+      return true;
+    }
+  else
+    return convert_optab_handler (maskload_optab, GET_MODE (x),
+                                 GET_MODE (cond)) != CODE_FOR_nothing;
+}
+
 /* Try more complex cases involving conditional_move.  */
 
 static bool
@@ -2151,6 +2185,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   enum rtx_code code;
   rtx cond = if_info->cond;
   rtx_insn *ifcvt_seq;
+  bool a_may_trap_or_fault = may_trap_or_fault_p (a);
+  bool b_may_trap_or_fault = may_trap_or_fault_p (b);
+  bool use_mask_load_store = false;
 
   /* A conditional move from two memory sources is equivalent to a
      conditional on their addresses followed by a load.  Don't do this
@@ -2167,11 +2204,22 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
       x = gen_reg_rtx (address_mode);
       is_mem = true;
     }
-
-  /* ??? We could handle this if we knew that a load from A or B could
-     not trap or fault.  This is also true if we've already loaded
-     from the address along the path from ENTRY.  */
-  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
+  /* We could not handle the case that a and b may both trap or
+     fault.  */
+  else if (a_may_trap_or_fault && b_may_trap_or_fault)
+    return false;
+  /* Scalar maskload_optab/maskstore_optab implies conditionally
+     faulting, which means that if the condition mask evaluates to
+     false, all memory faults are suppressed when load or store a
+     memory operand. So if scalar_mask_load or store enabled, we could
+     do the conversion when one of a/b may trap or fault.  */
+  else if (((MEM_P (a) && a_may_trap_or_fault
+            && !b_may_trap_or_fault)
+           || (MEM_P (b) && b_may_trap_or_fault
+               && !a_may_trap_or_fault))
+          && can_use_mask_load_store (if_info))
+    use_mask_load_store = true;
+  else if (a_may_trap_or_fault || b_may_trap_or_fault)
     return false;
 
   /* if (test) x = a + b; else x = c - d;
@@ -2212,6 +2260,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
          std::swap (insn_a, insn_b);
          std::swap (a_simple, b_simple);
          std::swap (then_bb, else_bb);
+         std::swap (a_may_trap_or_fault, b_may_trap_or_fault);
        }
     }
 
@@ -2247,9 +2296,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   /* If either operand is complex, load it into a register first.
      The best way to do this is to copy the original insn.  In this
      way we preserve any clobbers etc that the insn may have had.
-     This is of course not possible in the IS_MEM case.  */
+     This is of course not possible in the IS_MEM case.
+     For load or store a operands may trap or fault, should not
+     hoist the load or store, otherwise it unable to suppress memory
+     fault, it just a normal arithmetic insn insteads of conditional
+     faulting movcc.  */
 
-  if (! general_operand (a, GET_MODE (a)) || tmp_a)
+  if (!a_may_trap_or_fault
+      && (! general_operand (a, GET_MODE (a)) || tmp_a))
     {
 
       if (is_mem)
@@ -2278,7 +2332,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
        }
     }
 
-  if (! general_operand (b, GET_MODE (b)) || tmp_b)
+  if (!b_may_trap_or_fault
+      && (! general_operand (b, GET_MODE (b)) || tmp_b))
     {
       if (is_mem)
        {
@@ -2356,8 +2411,12 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   else
     goto end_seq_and_fail;
 
-  target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),
-                           a, b);
+  if (use_mask_load_store)
+    target = emit_mask_load_store (x, code, XEXP (cond, 0),
+                                  XEXP (cond, 1), a, b);
+  else
+    target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0),
+                             XEXP (cond, 1), a, b);
 
   if (! target)
     goto end_seq_and_fail;
@@ -4210,12 +4269,31 @@ noce_process_if_block (struct noce_if_info *if_info)
     }
 
   if (!set_b && MEM_P (orig_x))
-    /* We want to avoid store speculation to avoid cases like
-        if (pthread_mutex_trylock(mutex))
-          ++global_variable;
-       Rather than go to much effort here, we rely on the SSA optimizers,
-       which do a good enough job these days.  */
-    return false;
+    {
+      /* When target support scalar mask_store, i.e. x86 cfcmov,
+        we could do conditonal mem store for "if (...) *x = a; else skip"
+        where x may trap or fault.  */
+      if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
+                                 GET_MODE (cond)) != CODE_FOR_nothing)
+         && HAVE_conditional_move
+         && may_trap_or_fault_p (orig_x)
+         && register_operand (a, GET_MODE (orig_x)))
+       {
+         x = orig_x;
+         if_info->x = x;
+         if (noce_try_cmove_arith (if_info))
+           goto success;
+         else
+           return false;
+       }
+      /* We want to avoid store speculation to avoid cases like
+          if (pthread_mutex_trylock(mutex))
+            ++global_variable;
+        Rather than go to much effort here, we rely on the SSA optimizers,
+        which do a good enough job these days.  */
+      else
+       return false;
+    }
 
   if (noce_try_move (if_info))
     goto success;
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 36f2e6af8b5..f862f74fb5e 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5215,6 +5215,109 @@ emit_conditional_move_1 (rtx target, rtx comparison,
   return NULL_RTX;
 }
 
+/* Emit a conditional move instruction using mask_load or mask_store optab,
+   which allows one of move source or dest be a may_trap_or_falut memory.
+   The input operands have similar meaning as emit_conditional_move, except
+   the pred parameter can be a mask predicate.  */
+
+rtx
+emit_mask_load_store (rtx target, enum rtx_code code, rtx cmp_a,
+                     rtx cmp_b, rtx vfalse, rtx vtrue, rtx pred)
+{
+  enum insn_code icode;
+
+  bool unsignedp = (code == LTU || code == GEU
+                   || code == LEU || code == GTU);
+
+  bool maskstore_p = MEM_P (target);
+  bool restore_stack = false;
+  saved_pending_stack_adjust save;
+  rtx_insn *last = get_last_insn ();
+
+  /* If pred doesn't exist, prepare compare insn using cmp_a and
+     cmp_b as predicate.  */
+  if (pred == NULL_RTX)
+    {
+      if (! general_operand (cmp_a, GET_MODE (cmp_a))
+         || ! general_operand (cmp_b, GET_MODE (cmp_b)))
+       return NULL_RTX;
+
+      if (swap_commutative_operands_p (cmp_a, cmp_b))
+       {
+         std::swap (cmp_a, cmp_b);
+         code = swap_condition (code);
+       }
+
+      /* get_condition will prefer to generate LT and GT even if the old
+        comparison was against zero, so undo that canonicalization here
+        since comparisons against zero are cheaper.  */
+
+      if (code == LT && cmp_b == const1_rtx)
+       code = LE, cmp_b = const0_rtx;
+      else if (code == GT && cmp_b == constm1_rtx)
+       code = GE, cmp_b = const0_rtx;
+
+      code = unsignedp ? unsigned_condition (code) : code;
+      rtx comparison = simplify_gen_relational (code, VOIDmode,
+                                               VOIDmode, cmp_a, cmp_b);
+
+      if (!COMPARISON_P (comparison))
+       return NULL_RTX;
+
+      save_pending_stack_adjust (&save);
+      do_pending_stack_adjust ();
+
+      machine_mode cmode = VOIDmode;
+      prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
+                       GET_CODE (comparison), NULL_RTX, unsignedp,
+                       OPTAB_WIDEN, &comparison, &cmode);
+
+      if (!comparison)
+       restore_stack = true;
+
+      pred = comparison;
+    }
+
+  if (pred)
+    {
+      machine_mode op_mode = GET_MODE (target),
+                  cond_mode = GET_MODE (pred);
+
+      if (maskstore_p)
+       icode = convert_optab_handler (maskstore_optab, op_mode,
+                                      cond_mode);
+      else
+       icode = convert_optab_handler (maskload_optab, op_mode,
+                                      cond_mode);
+
+      if (icode != CODE_FOR_nothing)
+       {
+         class expand_operand ops[4];
+         int opnum = 3;
+
+         create_output_operand (&ops[0], target, op_mode);
+         create_input_operand (&ops[1], vtrue, op_mode);
+         create_fixed_operand (&ops[2], pred);
+
+         if (!maskstore_p)
+           {
+             create_input_operand (&ops[3], vfalse, op_mode);
+             opnum = 4;
+           }
+
+         if (maybe_expand_insn (icode, opnum, ops))
+           return target;
+       }
+    }
+
+  if (restore_stack)
+    {
+      delete_insns_since (last);
+      restore_pending_stack_adjust (&save);
+    }
+
+  return NULL_RTX;
+}
 
 /* Emit a conditional negate or bitwise complement using the
    negcc or notcc optabs if available.  Return NULL_RTX if such operations
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 23fa77be24e..6e211d9571a 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -293,6 +293,9 @@ extern void emit_indirect_jump (rtx);
 rtx emit_conditional_move (rtx, rtx_comparison, rtx, rtx, machine_mode, int);
 rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
 
+/* Emit a mask_load or mask_store operation.  */
+rtx emit_mask_load_store (rtx, rtx_code, rtx, rtx, rtx, rtx, rtx = NULL);
+
 /* Emit a conditional negate or bitwise complement operation.  */
 rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,
                                         rtx, rtx);
-- 
2.31.1

Reply via email to