Currently, the compiler moves HImode values between GPR and XMM registers with:
%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0} %vpextrw\t{$0, %1, %k0|%k0, %1, 0} but it could use slightly faster and shorter: %vmovd\t{%k1, %0|%0, %k1} %vmovd\t{%1, %k0|%k0, %1} 2022-01-06 Uroš Bizjak <ubiz...@gmail.com> gcc/ChangeLog: * config/i386/i386.c (ix86_output_ssemov) <MODE_DI>: Add %q modifier for operands in general registers. <MODE_SI>: Add %q modifier for operands in general registers. * config/i386/i386.md (*movhi_internal): Change type attribute of xmm-gpr interunit alternatives 9,10 to ssemov and mode attribute to SImode for non-avx512fp16 targets. (*movhf_internal): Ditto for xmm-gpr interunit alternatives 6,8. * config/i386/mmx.md (*movv2qi_internal): Ditto for xmm-gpr interunit alternatives 8,9. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102811-2.c (dg-final): Update scan-assembler-times directives. * gcc.target/i386/sse2-float16-2.c (dg-final): Update scan-assembler directives. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to master. Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1a964fe00f4..aeb7db5a5e3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5535,15 +5535,30 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) case MODE_DI: /* Handle broken assemblers that require movd instead of movq. */ - if (!HAVE_AS_IX86_INTERUNIT_MOVQ - && (GENERAL_REG_P (operands[0]) - || GENERAL_REG_P (operands[1]))) - return "%vmovd\t{%1, %0|%0, %1}"; + if (GENERAL_REG_P (operands[0])) + { + if (HAVE_AS_IX86_INTERUNIT_MOVQ) + return "%vmovq\t{%1, %q0|%q0, %1}"; + else + return "%vmovd\t{%1, %q0|%q0, %1}"; + } + else if (GENERAL_REG_P (operands[1])) + { + if (HAVE_AS_IX86_INTERUNIT_MOVQ) + return "%vmovq\t{%q1, %0|%0, %q1}"; + else + return "%vmovd\t{%q1, %0|%0, %q1}"; + } else return "%vmovq\t{%1, %0|%0, %1}"; case MODE_SI: - return "%vmovd\t{%1, %0|%0, %1}"; + if (GENERAL_REG_P (operands[0])) + return "%vmovd\t{%1, %k0|%k0, %1}"; + else if (GENERAL_REG_P (operands[1])) + return "%vmovd\t{%k1, %0|%0, %k1}"; + else + return "%vmovd\t{%1, %0|%0, %1}"; case MODE_HI: if (GENERAL_REG_P (operands[0])) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9b424a3935b..376df1d51d1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2580,13 +2580,9 @@ return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) - return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else - return MEM_P (operands[0]) - ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; case TYPE_MSKLOG: if (operands[1] == const0_rtx) @@ -2614,13 +2610,13 @@ (const_string "mskmov") (eq_attr "alternative" "8") (const_string "msklog") - (eq_attr "alternative" "9,10,13,14") + (eq_attr "alternative" "13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) (eq_attr "alternative" "11") (const_string "sselog1") - (eq_attr "alternative" "12") + (eq_attr "alternative" "9,10,12") (const_string "ssemov") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") @@ -2644,7 +2640,11 @@ ] (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "9,10,13,14") + (cond [(eq_attr "alternative" "9,10") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "SI")) + (eq_attr "alternative" "13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) @@ -3876,13 +3876,9 @@ return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) - return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else - return MEM_P (operands[0]) - ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; default: if (get_attr_mode (insn) == MODE_SI) @@ -3901,9 +3897,9 @@ (set (attr "type") (cond [(eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "5") + (eq_attr "alternative" "5,6,8") (const_string "ssemov") - (eq_attr "alternative" "6,7,8,9") + (eq_attr "alternative" "7,9") (if_then_else (match_test ("TARGET_AVX512FP16")) (const_string "ssemov") @@ -3930,7 +3926,12 @@ (set (attr "mode") (cond [(eq_attr "alternative" "4") (const_string "V4SF") - (eq_attr "alternative" "6,7,8,9") + (eq_attr "alternative" "6,8") + (if_then_else + (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "SI")) + (eq_attr "alternative" "7,9") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a409bb7c6c6..8e0a6490b7b 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -389,13 +389,9 @@ return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) - return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else - return MEM_P (operands[0]) - ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); @@ -412,13 +408,13 @@ ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "6,7,8,9") + (cond [(eq_attr "alternative" "6,7") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) (eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "5") + (eq_attr "alternative" "5,8,9") (const_string "ssemov") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") @@ -440,10 +436,14 @@ ] (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "6,7,8,9") + (cond [(eq_attr "alternative" "6,7") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) + (eq_attr "alternative" "8,9") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "SI")) (eq_attr "alternative" "4") (cond [(match_test "TARGET_AVX") (const_string "TI") diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c b/gcc/testsuite/gcc.target/i386/pr102811-2.c index e511c665ae8..97bc9b14e7f 100644 --- a/gcc/testsuite/gcc.target/i386/pr102811-2.c +++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c @@ -1,7 +1,6 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ -/* { dg-final { scan-assembler-times "pextrw" 1 } } */ -/* { dg-final { scan-assembler-times "pinsrw" 1 } } */ +/* { dg-final { scan-assembler-times "vmovd" 2 } } */ /* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */ short test (_Float16 a) { diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c index 3da7683fc31..25e17231c1a 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c @@ -13,4 +13,5 @@ foo (union flt x) return x.flt; } -/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */ +/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 } } } */ +/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32 } } } } */