There's some typo for the standard pattern name for unsigned_{float,fix}, it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2 in current trunk, the patch fix the typo.
Also vcvttps2udq is available under AVX512VL, so it can be generated directly instead of being emulated via vcvttps2dq. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} Ok for GCC14 stage1{or maybe for trunk)? gcc/ChangeLog: PR target/85048 * config/i386/sse.md (floatuns<si2dfmodelower><mode>2): Generate vcvtudq2ps under AVX512VL. (fixuns_truncv4dfv4si2): New expander. (floatuns<si2dfmodelower><mode>2): New expander. gcc/testsuite/ChangeLog: * g++.target/i386/pr85048.C: New test. --- gcc/config/i386/sse.md | 18 ++++++++++++-- gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 172ec3bea4f..9c2bd468c65 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2" (match_operand:VF1 1 "register_operand")] "TARGET_SSE2" { - if (<MODE>mode == V16SFmode) - emit_insn (gen_ufix_truncv16sfv16si2 (operands[0], + /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors. */ + if (<MODE>mode == V16SFmode || TARGET_AVX512VL) + emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0], operands[1])); else { @@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1" (set_attr "prefix" "evex") (set_attr "mode" "V4SF")]) +(define_expand "floatuns<si2dfmodelower><mode>2" + [(set (match_operand:VF2_512_256VL 0 "register_operand") + (unsigned_float:VF2_512_256VL + (match_operand:<si2dfmode> 1 "nonimmediate_operand")))] + "TARGET_AVX512F") + (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>" [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v") (unsigned_float:VF2_512_256VL @@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>" (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) + +/* The standard pattern name is fixuns_truncmn2. */ +(define_expand "fixuns_truncv4dfv4si2" + [(set (match_operand:V4SI 0 "register_operand") + (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))] + "TARGET_AVX512VL && TARGET_AVX512F") + (define_insn "ufix_truncv4dfv4si2<mask_name>" [(set (match_operand:V4SI 0 "register_operand" "=v") (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C new file mode 100644 index 00000000000..52973c18ebd --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr85048.C @@ -0,0 +1,33 @@ +/* PR target/85048 */ +/* { dg-do compile } */ +/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */ + +#include <cstdint> + +template <class T, int N, int Size = N * sizeof(T)> +using V [[gnu::vector_size(Size)]] = T; + +template <class From, class To> V<To, 4> cvt4(V<From, 4> x) { + return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])}; +} +template <class From, class To> V<To, 8> cvt8(V<From, 8> x) { + return V<To, 8>{ + To(x[0]), To(x[1]), To(x[2]), To(x[3]), + To(x[4]), To(x[5]), To(x[6]), To(x[7]) + }; +} + +#define _(name, from, to, size) \ +auto name(V<from, size> x) { return cvt##size<from, to>(x); } +// integral -> double +_(vcvtudq2pd, uint32_t, double, 4) +_(vcvtudq2pd, uint32_t, double, 8) + +_( cvttps2udq, float, uint32_t, 4) +_(vcvttps2udq, float, uint32_t, 8) + +// double -> integral +_(vcvttpd2udq, double, uint32_t, 4) -- 2.39.1.388.g2fc9e9ca3c