On Wed, Sep 08, 2021 at 06:00:50PM +0800, Hongtao Liu wrote:
> Yes, I think so.
> And I find paradoxical subreg like (subreg:V4SF (reg:SF)) are not
> allowed by validate_subreg until r11-621.
> That's why post_reload splitter is needed here.

Following seems to work for all the testcases I've find (and in some
generates better code than the post-reload splitter):

2021-09-08  Jakub Jelinek  <ja...@redhat.com>
            liuhongt  <hongtao....@intel.com>

        PR target/89984
        * config/i386/i386.md (@xorsign<mode>3_1): Remove.
        * config/i386/i386-expand.c (ix86_expand_xorsign): Expand right away
        into AND with mask and XOR, using paradoxical subregs.
        (ix86_split_xorsign): Remove.

        * gcc.target/i386/avx-pr102224.c: Fix up PR number.
        * gcc.dg/pr89984.c: New test.
        * gcc.target/i386/avx-pr89984.c: New test.

--- gcc/config/i386/i386.md.jj  2021-09-08 11:40:55.826534981 +0200
+++ gcc/config/i386/i386.md     2021-09-08 11:44:08.394828674 +0200
@@ -10918,20 +10918,6 @@ (define_expand "xorsign<mode>3"
   DONE;
 })
 
-(define_insn_and_split "@xorsign<mode>3_1"
-  [(set (match_operand:MODEF 0 "register_operand" "=&Yv,&Yv,&Yv")
-       (unspec:MODEF
-         [(match_operand:MODEF 1 "register_operand" "Yv,0,Yv")
-          (match_operand:MODEF 2 "register_operand" "0,Yv,Yv")
-          (match_operand:<ssevecmode> 3 "nonimmediate_operand" "Yvm,Yvm,Yvm")]
-         UNSPEC_XORSIGN))]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  "ix86_split_xorsign (operands); DONE;"
-  [(set_attr "isa" "*,avx,avx")])
-
 ;; One complement instructions
 
 (define_expand "one_cmpl<mode>2"
--- gcc/config/i386/i386-expand.c.jj    2021-09-08 11:40:55.824535010 +0200
+++ gcc/config/i386/i386-expand.c       2021-09-08 11:51:15.969819626 +0200
@@ -2270,7 +2270,7 @@ void
 ix86_expand_xorsign (rtx operands[])
 {
   machine_mode mode, vmode;
-  rtx dest, op0, op1, mask;
+  rtx dest, op0, op1, mask, x, temp;
 
   dest = operands[0];
   op0 = operands[1];
@@ -2285,60 +2285,15 @@ ix86_expand_xorsign (rtx operands[])
   else
     gcc_unreachable ();
 
+  temp = gen_reg_rtx (vmode);
   mask = ix86_build_signbit_mask (vmode, 0, 0);
 
-  emit_insn (gen_xorsign3_1 (mode, dest, op0, op1, mask));
-}
+  op1 = lowpart_subreg (vmode, op1, mode);
+  x = gen_rtx_AND (vmode, op1, mask);
+  emit_insn (gen_rtx_SET (temp, x));
 
-/* Deconstruct an xorsign operation into bit masks.  */
-
-void
-ix86_split_xorsign (rtx operands[])
-{
-  machine_mode mode, vmode;
-  rtx dest, op0, op1, mask, x;
-
-  dest = operands[0];
-  op0 = operands[1];
-  op1 = operands[2];
-  mask = operands[3];
-
-  mode = GET_MODE (dest);
-  vmode = GET_MODE (mask);
-
-  /* The constraints ensure that for non-AVX dest == op1 is
-     different from op0, and for AVX that at most two of
-     dest, op0 and op1 are the same register but the third one
-     is different.  */
-  if (rtx_equal_p (op0, op1))
-    {
-      gcc_assert (TARGET_AVX && !rtx_equal_p (op0, dest));
-      if (vmode == V4SFmode)
-       vmode = V4SImode;
-      else
-       {
-         gcc_assert (vmode == V2DFmode);
-         vmode = V2DImode;
-       }
-      mask = lowpart_subreg (vmode, mask, GET_MODE (mask));
-      if (MEM_P (mask))
-       {
-         rtx msk = lowpart_subreg (vmode, dest, mode);
-         emit_insn (gen_rtx_SET (msk, mask));
-         mask = msk;
-       }
-      op0 = lowpart_subreg (vmode, op0, mode);
-      x = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, mask), op0);
-    }
-  else
-    {
-      op1 = lowpart_subreg (vmode, op1, mode);
-      x = gen_rtx_AND (vmode, op1, mask);
-      emit_insn (gen_rtx_SET (op1, x));
-
-      op0 = lowpart_subreg (vmode, op0, mode);
-      x = gen_rtx_XOR (vmode, op1, op0);
-    }
+  op0 = lowpart_subreg (vmode, op0, mode);
+  x = gen_rtx_XOR (vmode, temp, op0);
 
   dest = lowpart_subreg (vmode, dest, mode);
   emit_insn (gen_rtx_SET (dest, x));
--- gcc/testsuite/gcc.target/i386/avx-pr102224.c.jj     2021-09-08 
11:40:55.826534981 +0200
+++ gcc/testsuite/gcc.target/i386/avx-pr102224.c        2021-09-08 
11:57:41.741386062 +0200
@@ -1,4 +1,4 @@
-/* PR tree-optimization/51581 */
+/* PR target/102224 */
 /* { dg-do run } */
 /* { dg-options "-O2 -mavx" } */
 /* { dg-require-effective-target avx } */
--- gcc/testsuite/gcc.dg/pr89984.c.jj   2021-09-08 11:56:33.799343240 +0200
+++ gcc/testsuite/gcc.dg/pr89984.c      2021-09-08 11:54:36.070001821 +0200
@@ -0,0 +1,20 @@
+/* PR target/89984 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) float
+foo (float x, float y)
+{
+  return x * __builtin_copysignf (1.0f, y) + y;
+}
+
+int
+main ()
+{
+  if (foo (1.25f, 7.25f) != 1.25f + 7.25f
+      || foo (1.75f, -3.25f) != -1.75f + -3.25f
+      || foo (-2.25f, 7.5f) != -2.25f + 7.5f
+      || foo (-3.0f, -4.0f) != 3.0f + -4.0f)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx-pr89984.c.jj      2021-09-08 
11:57:12.297800869 +0200
+++ gcc/testsuite/gcc.target/i386/avx-pr89984.c 2021-09-08 11:57:56.936172001 
+0200
@@ -0,0 +1,23 @@
+/* PR target/89984 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-require-effective-target avx } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#define main main1
+#include "../../gcc.dg/pr89984.c"
+#undef main
+
+#include CHECK_H
+
+static void
+TEST (void)
+{
+  main1 ();
+}


        Jakub

Reply via email to