Hi, vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. Cleared before conversion, updated movhi_internal and ix86_can_change_mode_class. And fixed some commit message.
OK for master? gcc/ChangeLog: PR target/102811 * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register for TARGET_SSE2. * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C. (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only. (*extendhf<mode>2): Rename from extendhf<mode>2. (truncsfhf2): Likewise. (truncdfhf2): Likewise. (*trunc<mode>2): Likewise. gcc/testsuite/ChangeLog: PR target/102811 * gcc.target/i386/pr90773-21.c: Optimize movhi_internal, also allow pextrw replace vmovd + movw. * gcc.target/i386/pr90773-23.c: Ditto. * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. --- gcc/config/i386/i386.c | 5 +- gcc/config/i386/i386.md | 74 +++++++++++++++++-- .../i386/avx512vl-vcvtps2ph-pr102811.c | 11 +++ gcc/testsuite/gcc.target/i386/pr90773-21.c | 2 +- gcc/testsuite/gcc.target/i386/pr90773-23.c | 2 +- 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, disallow a change to these modes, reload will assume it's ok to drop the subreg from (subreg:SI (reg:HI 100) 0). This affects the vec_dupv4hi pattern. - NB: AVX512FP16 supports vmovw which can load 16bit data to sse - register. */ - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4; + NB: SSE2 can load 16bit data to sse register via pinsrw. */ + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : +4; if (GET_MODE_SIZE (from) < mov_size) return false; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2525,6 +2525,16 @@ case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); + case TYPE_SSELOG: + if (SSE_REG_P (operands[0])) + return MEM_P (operands[1]) + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + else + return MEM_P (operands[1]) + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; + case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxorw\t%0, %0, %0"; @@ -2540,13 +2550,17 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "avx512fp16") + (cond [(eq_attr "alternative" "9,10,11,12") + (const_string "sse2") + (eq_attr "alternative" "13") + (const_string "sse4") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "ssemov") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "ssemov") + (const_string "sselog")) (eq_attr "alternative" "4,5,6,7") (const_string "mskmov") (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@ emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) -(define_insn "extendhf<mode>2" - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") +(define_expand "extendhfsf2" + [(set (match_operand:SF 0 "register_operand") + (float_extend:SF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" +{ + if (!TARGET_AVX512FP16) + { + rtx res = gen_reg_rtx (V4SFmode); + rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); + + ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); + emit_move_insn (operands[0], gen_lowpart (SFmode, res)); + DONE; + } +}) + +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + +(define_insn "*extendhf<mode>2" + [(set (match_operand:MODEF 0 "register_operand" "=v") (float_extend:MODEF (match_operand:HF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" @@ -4766,7 +4804,31 @@ ;; Conversion from {SF,DF}mode to HFmode. -(define_insn "trunc<mode>hf2" +(define_expand "truncsfhf2" + [(set (match_operand:HF 0 "register_operand") + (float_truncate:HF + (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" + { + if (!TARGET_AVX512FP16) + { + rtx res = gen_reg_rtx (V8HFmode); + rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); + + ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4))); + emit_move_insn (operands[0], gen_lowpart (HFmode, res)); + DONE; + } + }) + +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "register_operand") + (float_truncate:HF + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + +(define_insn "*trunc<mode>hf2" [(set (match_operand:HF 0 "register_operand" "=v") (float_truncate:HF (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c new file mode 100644 index 00000000000..dfbfb167953 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */ +_Float16 test (_Float16 a, _Float16 b) +{ + return a + b; +} diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c index 5bbb387a3ea..0d620fff83c 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c @@ -10,4 +10,4 @@ foo (int c) } /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, +32\\(%\[\^,\]+\\)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c index ca4a86f30b8..b7369e802e1 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c @@ -10,4 +10,4 @@ foo (void) } /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, +32\\(%\[\^,\]+\\)" 1 } } */ -- 2.18.1