Currently, the compiler moves HImode values between GPR and XMM registers with:

    %vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}
    %vpextrw\t{$0, %1, %k0|%k0, %1, 0}

but it could use slightly faster and shorter:

    %vmovd\t{%k1, %0|%0, %k1}
    %vmovd\t{%1, %k0|%k0, %1}

2022-01-06  Uroš Bizjak  <ubiz...@gmail.com>

gcc/ChangeLog:

    * config/i386/i386.c (ix86_output_ssemov) <MODE_DI>:
    Add %q modifier for operands in general registers.
    <MODE_SI>: Add %q modifier for operands in general registers.
    * config/i386/i386.md (*movhi_internal): Change type attribute of
    xmm-gpr interunit alternatives 9,10 to ssemov and mode attribute
    to SImode for non-avx512fp16 targets.
    (*movhf_internal): Ditto for xmm-gpr interunit alternatives 6,8.
    * config/i386/mmx.md (*movv2qi_internal):
    Ditto for xmm-gpr interunit alternatives 8,9.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr102811-2.c (dg-final):
    Update scan-assembler-times directives.
    * gcc.target/i386/sse2-float16-2.c (dg-final):
    Update scan-assembler directives.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1a964fe00f4..aeb7db5a5e3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5535,15 +5535,30 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
 
     case MODE_DI:
       /* Handle broken assemblers that require movd instead of movq. */
-      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-         && (GENERAL_REG_P (operands[0])
-             || GENERAL_REG_P (operands[1])))
-       return "%vmovd\t{%1, %0|%0, %1}";
+      if (GENERAL_REG_P (operands[0]))
+       {
+         if (HAVE_AS_IX86_INTERUNIT_MOVQ)
+           return "%vmovq\t{%1, %q0|%q0, %1}";
+         else
+           return "%vmovd\t{%1, %q0|%q0, %1}";
+       }
+      else if (GENERAL_REG_P (operands[1]))
+       {
+         if (HAVE_AS_IX86_INTERUNIT_MOVQ)
+           return "%vmovq\t{%q1, %0|%0, %q1}";
+         else
+           return "%vmovd\t{%q1, %0|%0, %q1}";
+       }
       else
        return "%vmovq\t{%1, %0|%0, %1}";
 
     case MODE_SI:
-      return "%vmovd\t{%1, %0|%0, %1}";
+      if (GENERAL_REG_P (operands[0]))
+       return "%vmovd\t{%1, %k0|%k0, %1}";
+      else if (GENERAL_REG_P (operands[1]))
+       return "%vmovd\t{%k1, %0|%0, %k1}";
+      else
+       return "%vmovd\t{%1, %0|%0, %1}";
 
     case MODE_HI:
       if (GENERAL_REG_P (operands[0]))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9b424a3935b..376df1d51d1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2580,13 +2580,9 @@
        return standard_sse_constant_opcode (insn, operands);
 
       if (SSE_REG_P (operands[0]))
-       return MEM_P (operands[1])
-         ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
-         : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
+       return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
       else
-       return MEM_P (operands[0])
-         ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
-         : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
+       return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
 
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
@@ -2614,13 +2610,13 @@
              (const_string "mskmov")
            (eq_attr "alternative" "8")
              (const_string "msklog")
-           (eq_attr "alternative" "9,10,13,14")
+           (eq_attr "alternative" "13,14")
              (if_then_else (match_test "TARGET_AVX512FP16")
                (const_string "ssemov")
                (const_string "sselog1"))
            (eq_attr "alternative" "11")
              (const_string "sselog1")
-           (eq_attr "alternative" "12")
+           (eq_attr "alternative" "9,10,12")
              (const_string "ssemov")
            (match_test "optimize_function_for_size_p (cfun)")
              (const_string "imov")
@@ -2644,7 +2640,11 @@
              ]
              (const_string "orig")))
    (set (attr "mode")
-     (cond [(eq_attr "alternative" "9,10,13,14")
+     (cond [(eq_attr "alternative" "9,10")
+             (if_then_else (match_test "TARGET_AVX512FP16")
+               (const_string "HI")
+               (const_string "SI"))
+           (eq_attr "alternative" "13,14")
              (if_then_else (match_test "TARGET_AVX512FP16")
                (const_string "HI")
                (const_string "TI"))
@@ -3876,13 +3876,9 @@
        return standard_sse_constant_opcode (insn, operands);
 
       if (SSE_REG_P (operands[0]))
-       return MEM_P (operands[1])
-              ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
-              : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
+       return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
       else
-       return MEM_P (operands[0])
-              ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
-              : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
+       return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
 
     default:
       if (get_attr_mode (insn) == MODE_SI)
@@ -3901,9 +3897,9 @@
    (set (attr "type")
        (cond [(eq_attr "alternative" "4")
                 (const_string "sselog1")
-              (eq_attr "alternative" "5")
+              (eq_attr "alternative" "5,6,8")
                 (const_string "ssemov")
-              (eq_attr "alternative" "6,7,8,9")
+              (eq_attr "alternative" "7,9")
                 (if_then_else
                   (match_test ("TARGET_AVX512FP16"))
                   (const_string "ssemov")
@@ -3930,7 +3926,12 @@
    (set (attr "mode")
        (cond [(eq_attr "alternative" "4")
                 (const_string "V4SF")
-              (eq_attr "alternative" "6,7,8,9")
+              (eq_attr "alternative" "6,8")
+                (if_then_else
+                  (match_test "TARGET_AVX512FP16")
+                  (const_string "HI")
+                  (const_string "SI"))
+              (eq_attr "alternative" "7,9")
                 (if_then_else
                   (match_test "TARGET_AVX512FP16")
                   (const_string "HI")
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a409bb7c6c6..8e0a6490b7b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -389,13 +389,9 @@
        return standard_sse_constant_opcode (insn, operands);
 
       if (SSE_REG_P (operands[0]))
-       return MEM_P (operands[1])
-         ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
-         : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
+       return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
       else
-       return MEM_P (operands[0])
-         ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
-         : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
+       return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
 
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
@@ -412,13 +408,13 @@
               ]
               (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "6,7,8,9")
+     (cond [(eq_attr "alternative" "6,7")
              (if_then_else (match_test "TARGET_AVX512FP16")
                (const_string "ssemov")
                (const_string "sselog1"))
            (eq_attr "alternative" "4")
              (const_string "sselog1")
-           (eq_attr "alternative" "5")
+           (eq_attr "alternative" "5,8,9")
              (const_string "ssemov")
            (match_test "optimize_function_for_size_p (cfun)")
              (const_string "imov")
@@ -440,10 +436,14 @@
              ]
              (const_string "orig")))
    (set (attr "mode")
-     (cond [(eq_attr "alternative" "6,7,8,9")
+     (cond [(eq_attr "alternative" "6,7")
              (if_then_else (match_test "TARGET_AVX512FP16")
                (const_string "HI")
                (const_string "TI"))
+           (eq_attr "alternative" "8,9")
+             (if_then_else (match_test "TARGET_AVX512FP16")
+               (const_string "HI")
+               (const_string "SI"))
            (eq_attr "alternative" "4")
              (cond [(match_test "TARGET_AVX")
                       (const_string "TI")
diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c 
b/gcc/testsuite/gcc.target/i386/pr102811-2.c
index e511c665ae8..97bc9b14e7f 100644
--- a/gcc/testsuite/gcc.target/i386/pr102811-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c
@@ -1,7 +1,6 @@
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
-/* { dg-final { scan-assembler-times "pextrw" 1 } } */
-/* { dg-final { scan-assembler-times "pinsrw" 1 } } */
+/* { dg-final { scan-assembler-times "vmovd" 2 } } */
 /* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */
 short test (_Float16 a)
 {
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c 
b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
index 3da7683fc31..25e17231c1a 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
@@ -13,4 +13,5 @@ foo (union flt x)
   return x.flt;
 }
 
-/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */
+/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 } } } */
+/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32 } } } 
} */

Reply via email to