Hello!

Attached patch rewrites nearbyint expanders. The problem with the
current approach is, that setting mask flag of precision interrupt on
x87 only prevents interrupt generation, but the insn still sets
interrupt flag. fclex clears the flag, but it also clears other flags
(e.g. invalid flag, set by sNaN).

This issue can be "solved" by disabling x87 insn for
flag_trapping_math. The flag is considered by other instructions (e.g.
ceil, floor) as "we don't care if insn throws an exception"), so based
on this reasoning, we can as well convert nearbyint to rint (which
doesn't require complex control word set-up).

There is no issue with traps with SSE 4.1 round instruction. This
instruction can suppress generation of precision interrupt by setting
ROUND_NO_EXC bit in its immediate operand.

2018-09-24  Uros Bizjak  <ubiz...@gmail.com>

    * config/i386/i386.md (frndintxf2_mask_pm): Remove.
    (frndintxf2_mask_pm_i387): Ditto.
    (nearbyintxf2): Rewrite expander pattern to match rintxf2.
    Enable for !flag_trapping_math.
    (nearbyint<mode>2): Enable x87 modes for !flag_trapping_math.
    Enable SSE modes for TARGET_SSE4_1 and expand them with round insn.
    Change operand 1 predicate to nonimmediate_operand.
    (attr "i387_cw"): Remove mask_pm.
    * config/i386/i386.h (enum ix86_stack_slot): Remove SLOT_CW_MASK_PM.
    (enum ix86_entity): Remove I387_MASK_PM.
    * config/i386/i386.c (ix86_i387_mode_needed): Do not
    handle I387_MASK_PM.
    (ix86_mode_needed): Ditto.
    (ix86_mode_after): Ditto.
    (ix86_mode_entry): Ditto.
    (ix86_mode_exit): Ditto.
    (emit_i387_cw_initialization): Do not handle I387_CW_MASK_PM.

Patch was bootstrapped and regression tested on x86_64-linux-gnu.

I'll wait a bit for possible comments on the approach.

Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6dd31309495..b627b215c95 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -18874,11 +18874,6 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
        return mode;
       break;
 
-    case I387_MASK_PM:
-      if (mode == I387_CW_MASK_PM)
-       return mode;
-      break;
-
     default:
       gcc_unreachable ();
     }
@@ -18901,7 +18896,6 @@ ix86_mode_needed (int entity, rtx_insn *insn)
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
-    case I387_MASK_PM:
       return ix86_i387_mode_needed (entity, insn);
     default:
       gcc_unreachable ();
@@ -18962,7 +18956,6 @@ ix86_mode_after (int entity, int mode, rtx_insn *insn)
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
-    case I387_MASK_PM:
       return mode;
     default:
       gcc_unreachable ();
@@ -19015,7 +19008,6 @@ ix86_mode_entry (int entity)
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
-    case I387_MASK_PM:
       return I387_CW_ANY;
     default:
       gcc_unreachable ();
@@ -19050,7 +19042,6 @@ ix86_mode_exit (int entity)
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
-    case I387_MASK_PM:
       return I387_CW_ANY;
     default:
       gcc_unreachable ();
@@ -19102,12 +19093,6 @@ emit_i387_cw_initialization (int mode)
       slot = SLOT_CW_CEIL;
       break;
 
-    case I387_CW_MASK_PM:
-      /* mask precision exception for nearbyint() */
-      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
-      slot = SLOT_CW_MASK_PM;
-      break;
-
     default:
       gcc_unreachable ();
     }
@@ -19163,7 +19148,6 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode 
ATTRIBUTE_UNUSED,
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
-    case I387_MASK_PM:
       if (mode != I387_CW_ANY
          && mode != I387_CW_UNINITIALIZED)
        emit_i387_cw_initialization (mode);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2fa9f2d53c4..3fa7d00aabc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2456,7 +2456,6 @@ enum ix86_stack_slot
   SLOT_CW_TRUNC,
   SLOT_CW_FLOOR,
   SLOT_CW_CEIL,
-  SLOT_CW_MASK_PM,
   SLOT_STV_TEMP,
   MAX_386_STACK_LOCALS
 };
@@ -2468,7 +2467,6 @@ enum ix86_entity
   I387_TRUNC,
   I387_FLOOR,
   I387_CEIL,
-  I387_MASK_PM,
   MAX_386_ENTITIES
 };
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e08b2b7c14b..75e2edb791c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -144,7 +144,6 @@
   UNSPEC_FRNDINT_FLOOR
   UNSPEC_FRNDINT_CEIL
   UNSPEC_FRNDINT_TRUNC
-  UNSPEC_FRNDINT_MASK_PM
   UNSPEC_FIST_FLOOR
   UNSPEC_FIST_CEIL
 
@@ -775,7 +774,7 @@
 
 ;; Defines rounding mode of an FP operation.
 
-(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any"
   (const_string "any"))
 
 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
@@ -16341,67 +16340,35 @@
   DONE;
 })
 
-;; Rounding mode control word calculation could clobber FLAGS_REG.
-(define_insn_and_split "frndintxf2_mask_pm"
+(define_expand "nearbyintxf2"
   [(set (match_operand:XF 0 "register_operand")
        (unspec:XF [(match_operand:XF 1 "register_operand")]
-                  UNSPEC_FRNDINT_MASK_PM))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
-
-  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
-  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
-
-  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
-                                         operands[2], operands[3]));
-  DONE;
-}
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
-   (set_attr "mode" "XF")])
-
-(define_insn "frndintxf2_mask_pm_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-                  UNSPEC_FRNDINT_MASK_PM))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
-  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
-   (set_attr "mode" "XF")])
-
-(define_expand "nearbyintxf2"
-  [(parallel [(set (match_operand:XF 0 "register_operand")
-                  (unspec:XF [(match_operand:XF 1 "register_operand")]
-                             UNSPEC_FRNDINT_MASK_PM))
-             (clobber (reg:CC FLAGS_REG))])]
+                  UNSPEC_FRNDINT))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations")
+   && !flag_trapping_math")
 
 (define_expand "nearbyint<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+         || TARGET_MIX_SSE_I387)
+    && !flag_trapping_math)
+   || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
+{
+  if (TARGET_SSE4_1 && TARGET_SSE_MATH)
+    emit_insn (gen_sse4_1_round<mode>2
+              (operands[0], operands[1], GEN_INT (ROUND_MXCSR
+                                                  | ROUND_NO_EXC)));
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
-  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_nearbyintxf2 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+    }
   DONE;
 })
 
diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c
index 97d758c8307..8b45ecf0ec5 100644
--- a/gcc/reg-stack.c
+++ b/gcc/reg-stack.c
@@ -1818,7 +1818,6 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr regstack, 
rtx pat)
              case UNSPEC_FRNDINT_FLOOR:
              case UNSPEC_FRNDINT_CEIL:
              case UNSPEC_FRNDINT_TRUNC:
-             case UNSPEC_FRNDINT_MASK_PM:
 
                /* Above insns operate on the top of the stack.  */
 

Reply via email to