https://gcc.gnu.org/g:d3fae2bea034edb001cd45d1d86c5ceef146899b

commit r15-1308-gd3fae2bea034edb001cd45d1d86c5ceef146899b
Author: liuhongt <hongtao....@intel.com>
Date:   Tue Jun 11 21:22:42 2024 +0800

    Adjust ix86_rtx_costs for pternlog_operand_p.
    
    r15-1100-gec985bc97a0157 improves handling of ternlog instructions,
    now GCC can recognize lots of pternlog_operand with different
    variants.
    
    The patch adjust rtx_costs for that, so pass_combine can
    reasonably generate more optimal vpternlog instructions.
    
    .i.e
    for avx512f-vpternlog-3.c, with the patch, 2 vpternlog are combined into 
one.
    
    1532,1533c1526
    <       vpternlogd      $168, %zmm1, %zmm0, %zmm2
    <       vpternlogd      $0x55, %zmm2, %zmm2, %zmm2
    
    >       vpternlogd      $87, %zmm1, %zmm0, %zmm2
    1732,1733c1725,1726
    <       vpand   %xmm0, %xmm1, %xmm0
    <       vpternlogd      $0x55, %zmm0, %zmm0, %zmm0
    
    >       vpternlogd      $63, %zmm1, %zmm0, %zmm1
    >       vmovdqa %xmm1, %xmm0
    1804,1805c1797
    <       vpternlogd      $188, %zmm2, %zmm0, %zmm1
    <       vpternlogd      $0x55, %zmm1, %zmm1, %zmm1
    
    >       vpternlogd      $37, %zmm0, %zmm2, %zmm1
    
    gcc/ChangeLog:
    
            * config/i386/i386.cc (ix86_rtx_costs): Adjust rtx_cost for
            pternlog_operand under AVX512, also adjust VEC_DUPLICATE
            according since vec_dup:mem can't be that cheap.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/avx2-pr98461.c: Scan either notl or
            vpternlog.
            * gcc.target/i386/avx512f-pr96891-3.c: Also scan for inversed
            condition.
            * gcc.target/i386/avx512f-vpternlogd-3.c: Adjust vpternlog
            number to 673.
            * gcc.target/i386/avx512f-vpternlogd-4.c: Ditto.
            * gcc.target/i386/avx512f-vpternlogd-5.c: Ditto.
            * gcc.target/i386/sse2-v1ti-vne.c: Add -mno-avx512f.

Diff:
---
 gcc/config/i386/i386.cc                            | 39 +++++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/avx2-pr98461.c       |  2 +-
 gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c  |  2 +-
 .../gcc.target/i386/avx512f-vpternlogd-3.c         |  2 +-
 .../gcc.target/i386/avx512f-vpternlogd-4.c         |  2 +-
 .../gcc.target/i386/avx512f-vpternlogd-5.c         |  2 +-
 gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c      |  2 +-
 7 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index c72f64da983d..d4ccc24be6ec 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21571,6 +21571,31 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
     = speed ? ix86_tune_cost : &ix86_size_cost;
   int src_cost;
 
+  /* Handling different vternlog variants.  */
+  if ((GET_MODE_SIZE (mode) == 64
+       ? (TARGET_AVX512F && TARGET_EVEX512)
+       : (TARGET_AVX512VL
+         || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
+      && GET_MODE_SIZE (mode) >= 16
+      && outer_code_i == SET
+      && ternlog_operand (x, mode))
+    {
+      rtx args[3];
+
+      args[0] = NULL_RTX;
+      args[1] = NULL_RTX;
+      args[2] = NULL_RTX;
+      int idx = ix86_ternlog_idx (x, args);
+      gcc_assert (idx >= 0);
+
+      *total = cost->sse_op;
+      for (int i = 0; i != 3; i++)
+       if (args[i])
+         *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
+      return true;
+    }
+
+
   switch (code)
     {
     case SET:
@@ -22233,6 +22258,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
       else if (XINT (x, 1) == UNSPEC_VTERNLOG)
        {
          *total = cost->sse_op;
+         *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+         *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+         *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
          return true;
        }
       else if (XINT (x, 1) == UNSPEC_PTEST)
@@ -22260,12 +22288,21 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
 
     case VEC_SELECT:
     case VEC_CONCAT:
-    case VEC_DUPLICATE:
       /* ??? Assume all of these vector manipulation patterns are
         recognizable.  In which case they all pretty much have the
         same cost.  */
      *total = cost->sse_op;
      return true;
+    case VEC_DUPLICATE:
+      *total = rtx_cost (XEXP (x, 0),
+                        GET_MODE (XEXP (x, 0)),
+                        VEC_DUPLICATE, 0, speed);
+      /* It's broadcast instruction, not embedded broadcasting.  */
+      if (outer_code == SET)
+       *total += cost->sse_op;
+
+     return true;
+
     case VEC_MERGE:
       mask = XEXP (x, 2);
       /* This is masked instruction, assume the same cost,
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr98461.c 
b/gcc/testsuite/gcc.target/i386/avx2-pr98461.c
index 15f49b864daa..225f2ab00e5f 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-pr98461.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr98461.c
@@ -2,7 +2,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx2 -masm=att" } */
 /* { dg-final { scan-assembler-times "\tvpmovmskb\t" 6 } } */
-/* { dg-final { scan-assembler-times "\tnotl\t" 6 } } */
+/* { dg-final { scan-assembler-times "\t(?:notl|vpternlog\[dq\])\t" 6 } } */
 /* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
 /* { dg-final { scan-assembler-not "\tvpxor" } } */
 /* { dg-final { scan-assembler-not "\tvpandn" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c 
b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c
index 06db75213050..5b260818cb3c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c
@@ -3,7 +3,7 @@
 /* { dg-final { scan-assembler-not {not[bwlqd]\]} } } */
 /* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$5} 4} } */
 /* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$6} 4} } */
-/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$7} 4} } */
+/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$[37]} 4} } */
 /* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$5} 2} } */
 /* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$6} 2} } */
 /* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$7} 2} } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-3.c 
b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-3.c
index fc66a9f55728..9ed4680346be 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-3.c
@@ -952,4 +952,4 @@ V foo_254_3(V a, V b, V c) { return (c|b)|a; }
 
 V foo_255_1(V a, V b, V c) { return (V){~0,~0,~0,~0}; }
 
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]" 694 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]" 673 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-4.c 
b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-4.c
index 14296508cac9..eb39ffc25642 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-4.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-4.c
@@ -952,4 +952,4 @@ V foo_254_3(V a, V b, V c) { return (c|b)|a; }
 
 V foo_255_1(V a, V b, V c) { return (V){~0,~0,~0,~0}; }
 
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]" 694 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]" 673 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-5.c 
b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-5.c
index 3dbd95452836..85de5b02ce6d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-5.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-5.c
@@ -952,4 +952,4 @@ V foo_254_3(V a, V b, V c) { return (c|b)|a; }
 
 V foo_255_1(V a, V b, V c) { return (V){~0,~0,~0,~0}; }
 
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]" 679 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]" 673 } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c 
b/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c
index 767b0e4b3ac6..2394cff39f2c 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target int128 } } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-avx512f" } */
 typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16)));
 typedef unsigned long long uv2di __attribute__ ((__vector_size__ (16)));
 typedef unsigned int uv4si __attribute__ ((__vector_size__ (16)));

Reply via email to