vmovapd can enable register renaming and have same code size as
vmovsd. Similar for vmovsh vs vmovaps, vmovaps is 1 byte less than
vmovsh.

When TARGET_AVX512VL is not available, still generate
vmovsd/vmovss/vmovsh to avoid vmovapd/vmovaps zmm16-31.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

        * config/i386/i386.md (movdf_internal): Generate vmovapd instead of
        vmovsd when moving DFmode between SSE_REGS.
        (movhi_internal): Generate vmovdqa instead of vmovsh when
        moving HImode between SSE_REGS.
        (mov<mode>_internal): Use vmovaps instead of vmovsh when
        moving HF/BFmode between SSE_REGS.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr89229-4a.c: Adjust testcase.
---
 gcc/config/i386/i386.md                    | 20 +++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr89229-4a.c |  4 +---
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c906d75b13e..77182e34fe1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2961,8 +2961,12 @@ (define_insn "*movhi_internal"
                    ]
                    (const_string "TI"))
            (eq_attr "alternative" "12")
-             (cond [(match_test "TARGET_AVX512FP16")
+             (cond [(match_test "TARGET_AVX512VL")
+                      (const_string "TI")
+                    (match_test "TARGET_AVX512FP16")
                       (const_string "HF")
+                    (match_test "TARGET_AVX512F")
+                      (const_string "SF")
                     (match_test "TARGET_AVX")
                       (const_string "TI")
                     (ior (not (match_test "TARGET_SSE2"))
@@ -4099,8 +4103,12 @@ (define_insn "*movdf_internal"
 
               /* movaps is one byte shorter for non-AVX targets.  */
               (eq_attr "alternative" "13,17")
-                (cond [(match_test "TARGET_AVX")
+                (cond [(match_test "TARGET_AVX512VL")
+                         (const_string "V2DF")
+                       (match_test "TARGET_AVX512F")
                          (const_string "DF")
+                       (match_test "TARGET_AVX")
+                         (const_string "V2DF")
                        (ior (not (match_test "TARGET_SSE2"))
                             (match_test "optimize_function_for_size_p (cfun)"))
                          (const_string "V4SF")
@@ -4380,8 +4388,14 @@ (define_insn "*mov<mode>_internal"
                   (const_string "HI")
                   (const_string "TI"))
               (eq_attr "alternative" "5")
-                (cond [(match_test "TARGET_AVX512FP16")
+                (cond [(match_test "TARGET_AVX512VL")
+                       (const_string "V4SF")
+                       (match_test "TARGET_AVX512FP16")
                          (const_string "HF")
+                       (match_test "TARGET_AVX512F")
+                         (const_string "SF")
+                       (match_test "TARGET_AVX")
+                         (const_string "V4SF")
                        (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
                             (match_test "TARGET_SSE_SPLIT_REGS"))
                          (const_string "V4SF")
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
index 5bc10d25619..8869650b0ad 100644
--- a/gcc/testsuite/gcc.target/i386/pr89229-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do assemble { target { ! ia32 } } } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 extern double d;
@@ -12,5 +12,3 @@ foo1 (double x)
   asm volatile ("" : "+v" (xmm17));
   d = xmm17;
 }
-
-/* { dg-final { scan-assembler-not "vmovapd" } } */
-- 
2.31.1

Reply via email to