vcvttpd2udq.

liuhongt via Gcc-patches Wed, 29 Mar 2023 18:47:34 -0700

There's some typo for the standard pattern name for unsigned_{float,fix},
it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
in current trunk, the patch fix the typo.


Also vcvttps2udq is available under AVX512VL, so it can be generated
directly instead of being emulated via vcvttps2dq.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for GCC14 stage1{or maybe for trunk)?

gcc/ChangeLog:

        PR target/85048
        * config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
        Generate vcvtudq2ps under AVX512VL.
        (fixuns_truncv4dfv4si2): New expander.
        (floatuns<si2dfmodelower><mode>2): New expander.

gcc/testsuite/ChangeLog:

        * g++.target/i386/pr85048.C: New test.
---
 gcc/config/i386/sse.md                  | 18 ++++++++++++--
 gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 172ec3bea4f..9c2bd468c65 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
    (match_operand:VF1 1 "register_operand")]
   "TARGET_SSE2"
 {
-  if (<MODE>mode == V16SFmode)
-    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
+  /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors.  */
+  if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
+    emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
                                          operands[1]));
   else
     {
@@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
    (set_attr "prefix" "evex")
    (set_attr "mode" "V4SF")])
 
+(define_expand "floatuns<si2dfmodelower><mode>2"
+  [(set (match_operand:VF2_512_256VL 0 "register_operand")
+       (unsigned_float:VF2_512_256VL
+         (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
+   "TARGET_AVX512F")
+
 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
   [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
        (unsigned_float:VF2_512_256VL
@@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>"
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
+
+/* The standard pattern name is fixuns_truncmn2.  */
+(define_expand "fixuns_truncv4dfv4si2"
+  [(set (match_operand:V4SI 0 "register_operand")
+       (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512VL && TARGET_AVX512F")
+
 (define_insn "ufix_truncv4dfv4si2<mask_name>"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
        (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
diff --git a/gcc/testsuite/g++.target/i386/pr85048.C 
b/gcc/testsuite/g++.target/i386/pr85048.C
new file mode 100644
index 00000000000..52973c18ebd
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr85048.C
@@ -0,0 +1,33 @@
+/* PR target/85048 */
+/* { dg-do compile }  */
+/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq 
-mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
+
+#include <cstdint>
+
+template <class T, int N, int Size = N * sizeof(T)>
+using V [[gnu::vector_size(Size)]] = T;
+
+template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
+    return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
+}
+template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
+    return V<To, 8>{
+        To(x[0]), To(x[1]), To(x[2]), To(x[3]),
+        To(x[4]), To(x[5]), To(x[6]), To(x[7])
+    };
+}
+
+#define _(name, from, to, size) \
+auto name(V<from, size> x) { return cvt##size<from, to>(x); }
+// integral -> double
+_(vcvtudq2pd, uint32_t, double, 4)
+_(vcvtudq2pd, uint32_t, double, 8)
+
+_( cvttps2udq, float, uint32_t,  4)
+_(vcvttps2udq, float, uint32_t,  8)
+
+// double -> integral
+_(vcvttpd2udq, double, uint32_t, 4)
-- 
2.39.1.388.g2fc9e9ca3c

[PATCH] Support vector conversion for AVX512 vcvtudq2pd/vcvttps2udq/vcvttpd2udq.

Reply via email to