Do you have any advice?

BRs,
Lin

-----Original Message-----
From: Hu, Lin1 <lin1...@intel.com> 
Sent: Wednesday, May 8, 2024 9:38 AM
To: gcc-patches@gcc.gnu.org
Cc: Liu, Hongtao <hongtao....@intel.com>; ubiz...@gmail.com
Subject: [PATCH] vect: generate suitable convert insn for int -> int, float -> 
float and int <-> float.

Hi, all

This patch aims to optimize __builtin_convertvector. We want the function can 
generate more efficient insn for some situations. Like v2si -> v2di.

The patch has been bootstrapped and regtested on x86_64-pc-linux-gnu, OK for 
trunk?

BRs,
Lin

gcc/ChangeLog:

        PR target/107432
        * tree-vect-generic.cc (expand_vector_conversion): Support
        convert for int -> int, float -> float and int <-> float.
        (expand_vector_conversion_no_vec_pack): Check if can convert
        int <-> int, float <-> float and int <-> float, directly.
        Support indirect convert, when direct optab is not supported.

gcc/testsuite/ChangeLog:

        PR target/107432
        * gcc.target/i386/pr107432-1.c: New test.
        * gcc.target/i386/pr107432-2.c: Ditto.
        * gcc.target/i386/pr107432-3.c: Ditto.
        * gcc.target/i386/pr107432-4.c: Ditto.
        * gcc.target/i386/pr107432-5.c: Ditto.
        * gcc.target/i386/pr107432-6.c: Ditto.
        * gcc.target/i386/pr107432-7.c: Ditto.
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 +++++++++++++++++++++  
gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +++++++++  
gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +++++  
gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +++++  
gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 +++++++  
gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 ++++++++++++  
gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 ++++++++++++++
 gcc/tree-vect-generic.cc                   | 107 +++++++++-
 8 files changed, 918 insertions(+), 6 deletions(-)  create mode 100644 
gcc/testsuite/gcc.target/i386/pr107432-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 00000000000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef 
+char __v2qi __attribute__ ((__vector_size__ (2))); typedef char __v4qi 
+__attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4))); 
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); 
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); 
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4))); 
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); 
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a) {
+  return __builtin_convertvector((__v2di)a, __v2si); }
+
+__m128i        mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si); }
+
+__m256i        mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si); }
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi); }
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi); }
+
+__m128i        mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi); }
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi); }
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4qi); }
+
+__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8di)a, __v8qi); }
+
+__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2hi); }
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi); }
+
+__m128i        mm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8si)a, __v8hi); }
+
+__m256i        mm512_cvtepi32_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v16si)a, __v16hi); }
+
+__v2qi mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2qi); }
+
+__v4qi mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4qi); }
+
+__v8qi mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v8si)a, __v8qi); }
+
+__m128i        mm512_cvtepi32_epi8_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v16si)a, __v16qi); }
+
+__v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
+{
+  return __builtin_convertvector((__v2hi)a, __v2qi); }
+
+__v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hi)a, __v8qi); }
+
+__m128i        mm256_cvtepi16_epi8_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v16hi)a, __v16qi); }
+
+__m256i        mm512_cvtepi16_epi8_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v32hi)a, __v32qi); }
+
+__v2su mm_cvtepu64_epu32_builtin_convertvector(__m128i a) {
+  return __builtin_convertvector((__v2du)a, __v2su); }
+
+__m128i        mm256_cvtepu64_epu32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4du)a, __v4su); }
+
+__m256i        mm512_cvtepu64_epu32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8du)a, __v8su); }
+
+__v2hu mm_cvtepu64_epu16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2hu); }
+
+__v4hu mm256_cvtepu64_epu16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4du)a, __v4hu); }
+
+__m128i        mm512_cvtepu64_epu16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8du)a, __v8hu); }
+
+__v2qu mm_cvtepu64_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2qu); }
+
+__v4qu mm256_cvtepu64_epu8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4du)a, __v4qu); }
+
+__v8qu mm512_cvtepu64_epu8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8du)a, __v8qu); }
+
+__v2hu mm32_cvtepu32_epu16_builtin_convertvector(__v2su a)
+{
+  return __builtin_convertvector((__v2su)a, __v2hu); }
+
+__v4hu mm_cvtepu32_epu16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4su)a, __v4hu); }
+
+__m128i        mm256_cvtepu32_epu16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8su)a, __v8hu); }
+
+__m256i        mm512_cvtepu32_epu16_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v16su)a, __v16hu); }
+
+__v2qu mm32_cvtepu32_epu8_builtin_convertvector(__v2su a)
+{
+  return __builtin_convertvector((__v2su)a, __v2qu); }
+
+__v4qu mm_cvtepu2_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4su)a, __v4qu); }
+
+__v8qu mm256_cvtepu32_epu8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v8su)a, __v8qu); }
+
+__m128i        mm512_cvtepu32_epu8_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v16su)a, __v16qu); }
+
+__v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
+{
+  return __builtin_convertvector((__v2hu)a, __v2qu); }
+
+__v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hu)a, __v8qu); }
+
+__m128i        mm256_cvtepu16_epu8_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v16hu)a, __v16qu); }
+
+__m256i        mm512_cvtepu16_epu8_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v32hu)a, __v32qu); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-2.c 
b/gcc/testsuite/gcc.target/i386/pr107432-2.c
new file mode 100644
index 00000000000..02ffd811cb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-2.c
@@ -0,0 +1,105 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovsxdq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw" 3 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef 
+char __v2qi __attribute__ ((__vector_size__ (2))); typedef char __v4qi 
+__attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8)));
+
+__m128i mm_cvtepi32_epi64_builtin_convertvector(__v2si a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__m256i        mm256_cvtepi32_epi64_builtin_convertvector(__v4si a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di); }
+
+__m512i        mm512_cvtepi32_epi64_builtin_convertvector(__v8si a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di); }
+
+__m128i mm_cvtepi16_epi64_builtin_convertvector(__v2hi a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__m256i        mm256_cvtepi16_epi64_builtin_convertvector(__v4hi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di); }
+
+__m512i        mm512_cvtepi16_epi64_builtin_convertvector(__v8hi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di); }
+
+__m128i mm_cvtepi8_epi64_builtin_convertvector(__v2qi a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__m256i        mm256_cvtepi8_epi64_builtin_convertvector(__v4qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di); }
+
+__m512i        mm512_cvtepi8_epi64_builtin_convertvector(__v8qi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di); }
+
+__m128i mm_cvtepi16_epi32_builtin_convertvector(__v4hi a) {
+  return (__m128i)__builtin_convertvector(a, __v4si); }
+
+__m256i        mm256_cvtepi16_epi32_builtin_convertvector(__v8hi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v8si); }
+
+__m512i        mm512_cvtepi16_epi32_builtin_convertvector(__v16hi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v16si); }
+
+__m128i mm_cvtepi8_epi32_builtin_convertvector(__v4qi a) {
+  return (__m128i)__builtin_convertvector(a, __v4si); }
+
+__m256i        mm256_cvtepi8_epi32_builtin_convertvector(__v8qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v8si); }
+
+__m512i        mm512_cvtepi8_epi32_builtin_convertvector(__v16qi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v16si); }
+
+__m128i mm_cvtepi8_epi16_builtin_convertvector(__v8qi a) {
+  return (__m128i)__builtin_convertvector(a, __v8hi); }
+
+__m256i        mm256_cvtepi8_epi16_builtin_convertvector(__v16qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v16hi); }
+
+__v32hi        mm512_cvtepi8_epi16_builtin_convertvector(__v32qi a)
+{
+  return __builtin_convertvector(a, __v32hi); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-3.c 
b/gcc/testsuite/gcc.target/i386/pr107432-3.c
new file mode 100644
index 00000000000..30dc947b6dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-3.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ph" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4))); typedef 
+_Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2sf mm_cvtpd_ps_builtin_convertvector(__v2df a) {
+  return __builtin_convertvector(a, __v2sf); }
+
+__v4sf mm256_cvtpd_ps_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4sf); }
+
+__v8sf mm512_cvtpd_ps_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8sf); }
+
+__v2hf mm_cvtpd_ph_builtin_convertvector(__v2df a) {
+  return __builtin_convertvector(a, __v2hf); }
+
+__v4hf mm256_cvtpd_ph_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4hf); }
+
+__v8hf mm512_cvtpd_ph_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8hf); }
+
+__v4hf mm_cvtps_ph_builtin_convertvector(__v4sf a) {
+  return __builtin_convertvector(a, __v4hf); }
+
+__v8hf mm256_cvtps_ph_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8hf); }
+
+__v16hf        mm512_cvtps_ph_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector(a, __v16hf); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-4.c 
b/gcc/testsuite/gcc.target/i386/pr107432-4.c
new file mode 100644
index 00000000000..e537e7349e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-4.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4))); typedef 
+_Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2df mm_cvtps_pd_builtin_convertvector(__v2sf a) {
+  return __builtin_convertvector(a, __v2df); }
+
+__v4df mm256_cvtps_pd_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4df); }
+
+__v8df mm512_cvtps_pd_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8df); }
+
+__v2df mm_cvtph_pd_builtin_convertvector(__v2hf a) {
+  return __builtin_convertvector(a, __v2df); }
+
+__v4df mm256_cvtph_pd_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4df); }
+
+__v8df mm512_cvtph_pd_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8df); }
+
+__v4sf mm_cvtph_ps_builtin_convertvector(__v4hf a) {
+  return __builtin_convertvector(a, __v4sf); }
+
+__v8sf mm256_cvtph_ps_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8sf); }
+
+__v16sf        mm512_cvtph_ps_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector(a, __v16sf); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-5.c 
b/gcc/testsuite/gcc.target/i386/pr107432-5.c
new file mode 100644
index 00000000000..5a44ef9f3b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-5.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512dq -mavx512fp16 -mavx512vl -O3" 
+} */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4))); typedef 
+_Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtpd_epi32_builtin_convertvector(__v2df a) {
+  return __builtin_convertvector(a, __v2si); }
+
+__v4si mm256_cvtpd_epi32_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4si); }
+
+__v8si mm512_cvtpd_epi32_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8si); }
+
+__v2di mm_cvtps_epi64_builtin_convertvector(__v2sf a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__v4di mm256_cvtps_epi64_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4di); }
+
+__v8di mm512_cvtps_epi64_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8di); }
+
+__v4si mm_cvtph_epi32_builtin_convertvector(__v4hf a) {
+  return __builtin_convertvector(a, __v4si); }
+
+__v8si mm256_cvtph_epi32_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8si); }
+
+__v16si        mm512_cvtph_epi32_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector(a, __v16si); }
+
+__v2di mm_cvtph_epi64_builtin_convertvector(__v2hf a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__v4di mm256_cvtph_epi64_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4di); }
+
+__v8di mm512_cvtph_epi64_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8di); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c 
b/gcc/testsuite/gcc.target/i386/pr107432-6.c
new file mode 100644
index 00000000000..4a68a10b089
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq 
+-fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 
+} } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 
+} } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2))); typedef char 
+__v4qi __attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8))); typedef char __v16qi 
+__attribute__ ((__vector_size__ (16))); typedef unsigned char __v2qu 
+__attribute__ ((vector_size (2))); typedef unsigned char __v4qu 
+__attribute__ ((vector_size (4))); typedef unsigned char __v8qu 
+__attribute__ ((vector_size (8))); typedef unsigned char __v16qu 
+__attribute__ ((vector_size (16))); typedef _Float16 __v2hf 
+__attribute__ ((__vector_size__ (4))); typedef _Float16 __v4hf 
+__attribute__ ((__vector_size__ (8))); typedef _Float16 __v8hf 
+__attribute__ ((__vector_size__ (16)));
+
+__v2qi mm_cvtpd_epi8_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector((__v2df)a, __v2qi); }
+
+__v4qi mm256_cvtpd_epi8_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector((__v4df)a, __v4qi); }
+
+__v8qi mm512_cvtpd_epi8_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector((__v8df)a, __v8qi); }
+
+__v2qu mm_cvtpd_epu8_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector((__v2df)a, __v2qu); }
+
+__v4qu mm256_cvtpd_epu8_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector((__v4df)a, __v4qu); }
+
+__v8qu mm512_cvtpd_epu8_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector((__v8df)a, __v8qu); }
+
+__v2qi mm64_cvtps_epi8_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector((__v2sf)a, __v2qi); }
+
+__v4qi mm128_cvtps_epi8_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector((__v4sf)a, __v4qi); }
+
+__v8qi mm256_cvtps_epi8_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector((__v8sf)a, __v8qi); }
+
+__v16qi        mm512_cvtps_epi8_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector((__v16sf)a, __v16qi); }
+
+__v2qu mm64_cvtps_epu8_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector((__v2sf)a, __v2qu); }
+
+__v4qu mm128_cvtps_epu8_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector((__v4sf)a, __v4qu); }
+
+__v8qu mm256_cvtps_epu8_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector((__v8sf)a, __v8qu); }
+
+__v16qu        mm512_cvtps_epu8_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector((__v16sf)a, __v16qu); }
+
+__v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector((__v2hf)a, __v2qi); }
+
+__v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector((__v8hf)a, __v8qi); }
+
+__v16qi        mm256_cvtph_epi8_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector((__v16hf)a, __v16qi); }
+
+__v32qi        mm512_cvtph_epi8_builtin_convertvector(__v32hf a)
+{
+  return __builtin_convertvector((__v32hf)a, __v32qi); }
+
+__v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector((__v2hf)a, __v2qu); }
+
+__v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector((__v8hf)a, __v8qu); }
+
+__v16qu        mm256_cvtph_epu8_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector((__v16hf)a, __v16qu); }
+
+__v32qu        mm512_cvtph_epu8_builtin_convertvector(__v32hf a)
+{
+  return __builtin_convertvector((__v32hf)a, __v32qu); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-7.c 
b/gcc/testsuite/gcc.target/i386/pr107432-7.c
new file mode 100644
index 00000000000..0ff5a97ed1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-7.c
@@ -0,0 +1,156 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq 
+-fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 3 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 4 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps" 3 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps" 4 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 4 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 5 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph" 4 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph" 5 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } 
+} } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2))); typedef char 
+__v4qi __attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8))); typedef char __v16qi 
+__attribute__ ((__vector_size__ (16))); typedef unsigned char __v2qu 
+__attribute__ ((vector_size (2))); typedef unsigned char __v4qu 
+__attribute__ ((vector_size (4))); typedef unsigned char __v8qu 
+__attribute__ ((vector_size (8))); typedef unsigned char __v16qu 
+__attribute__ ((vector_size (16))); typedef _Float16 __v2hf 
+__attribute__ ((__vector_size__ (4))); typedef _Float16 __v4hf 
+__attribute__ ((__vector_size__ (8))); typedef _Float16 __v8hf 
+__attribute__ ((__vector_size__ (16)));
+
+__v2df mm_cvtepi8_pd_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2df); }
+
+__v4df mm256_cvtepi8_pd_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4df); }
+
+__v8df mm512_cvtepi8_pd_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8df); }
+
+__v2df mm_cvtepu8_pd_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2df); }
+
+__v4df mm256_cvtepu8_pd_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4df); }
+
+__v8df mm512_cvtepu8_pd_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8df); }
+
+__v2sf mm64_cvtepi8_ps_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2sf); }
+
+__v4sf mm128_cvtepi8_ps_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4sf); }
+
+__v8sf mm256_cvtepi8_ps_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8sf); }
+
+__v16sf        mm512_cvtepi8_ps_builtin_convertvector(__v16qi a)
+{
+  return __builtin_convertvector((__v16qi)a, __v16sf); }
+
+__v2sf mm64_cvtepu8_ps_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2sf); }
+
+__v4sf mm128_cvtepu8_ps_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4sf); }
+
+__v8sf mm256_cvtepu8_ps_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8sf); }
+
+__v16sf        mm512_cvtepu8_ps_builtin_convertvector(__v16qu a)
+{
+  return __builtin_convertvector((__v16qu)a, __v16sf); }
+
+__v2hf mm32_cvtepi8_ph_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2hf); }
+
+__v4hf mm64_cvtepi8_ph_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4hf); }
+
+__v8hf mm128_cvtepi8_ph_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8hf); }
+
+__v16hf        mm256_cvtepi8_ph_builtin_convertvector(__v16qi a)
+{
+  return __builtin_convertvector((__v16qi)a, __v16hf); }
+
+__v32hf        mm512_cvtepi8_ph_builtin_convertvector(__v32qi a)
+{
+  return __builtin_convertvector((__v32qi)a, __v32hf); }
+
+__v2hf mm32_cvtepu8_ph_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2hf); }
+
+__v4hf mm64_cvtepu8_ph_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4hf); }
+
+__v8hf mm128_cvtepu8_ph_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8hf); }
+
+__v16hf        mm256_cvtepu8_ph_builtin_convertvector(__v16qu a)
+{
+  return __builtin_convertvector((__v16qu)a, __v16hf); }
+
+__v32hf        mm512_cvtepu8_ph_builtin_convertvector(__v32qu a)
+{
+  return __builtin_convertvector((__v32qu)a, __v32hf); }
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 
ab640096ca2..e14fac9f179 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -45,6 +45,8 @@ along with GCC; see the file COPYING3.  If not see  #include 
"gimple-match.h"
 #include "recog.h"             /* FIXME: for insn_data */
 #include "optabs-libfuncs.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 
 /* Build a ternary operation and gimplify it.  Emit code before GSI.
@@ -1834,6 +1836,102 @@ do_vec_narrow_conversion (gimple_stmt_iterator *gsi, 
tree inner_type, tree a,
   return gimplify_build2 (gsi, code, outer_type, b, c);  }
 
+/* A subroutine of expand_vector_conversion, support indirect conversion for
+   float <-> int, like char -> double.  */ bool 
+expand_vector_conversion_no_vec_pack (gimple_stmt_iterator *gsi,
+                                     enum tree_code code,
+                                     tree lhs,
+                                     tree arg)
+{
+  gimple *g;
+  tree ret_type = TREE_TYPE (lhs);
+  tree arg_type = TREE_TYPE (arg);
+  tree new_rhs;
+  enum {NARROW, NONE, WIDEN} modifier = NONE;
+  enum tree_code code1 = ERROR_MARK;
+  enum tree_code codecvt1 = ERROR_MARK;
+  bool float_expr_p = code == FLOAT_EXPR;
+
+  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
+    {
+      g = gimple_build_assign (lhs, code1, arg);
+      gsi_replace (gsi, g, false);
+      return true;
+    }
+
+  unsigned int ret_elt_bits = vector_element_bits (ret_type);  unsigned 
+ int arg_elt_bits = vector_element_bits (arg_type);  if (ret_elt_bits < 
+ arg_elt_bits)
+    modifier = NARROW;
+  else if (ret_elt_bits > arg_elt_bits)
+    modifier = WIDEN;
+
+  if (((code == FIX_TRUNC_EXPR && !flag_trapping_math && modifier == NARROW)
+       || (code == FLOAT_EXPR && modifier == WIDEN)))
+    {
+      unsigned short target_size;
+      scalar_mode tmp_cvt_mode;
+      scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (ret_type));
+      scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (arg_type));
+      tree cvt_type = NULL_TREE;
+      if (modifier == NARROW)
+       {
+         tmp_cvt_mode = lhs_mode;
+         target_size = GET_MODE_SIZE (rhs_mode);
+       }
+      else
+       {
+         target_size = GET_MODE_SIZE (lhs_mode);
+         int rhs_size = GET_MODE_BITSIZE (rhs_mode);
+         if (!int_mode_for_size (rhs_size, 0).exists (&tmp_cvt_mode))
+           return false;
+       }
+
+      code1 = float_expr_p ? code : NOP_EXPR;
+      codecvt1 = float_expr_p ? NOP_EXPR : code;
+      opt_scalar_mode mode_iter;
+      enum tree_code tc1, tc2;
+      unsigned HOST_WIDE_INT nelts
+       = constant_lower_bound (TYPE_VECTOR_SUBPARTS (arg_type));
+
+      FOR_EACH_2XWIDER_MODE (mode_iter, tmp_cvt_mode)
+       {
+         tmp_cvt_mode = mode_iter.require ();
+
+         if (GET_MODE_SIZE (tmp_cvt_mode) > target_size)
+           break;
+
+         scalar_mode cvt_mode;
+         int tmp_cvt_size = GET_MODE_BITSIZE (tmp_cvt_mode);
+         if (!int_mode_for_size (tmp_cvt_size, 0).exists (&cvt_mode))
+           break;
+
+         int cvt_size = GET_MODE_BITSIZE (cvt_mode);
+         bool isUnsigned = TYPE_UNSIGNED (ret_type) || TYPE_UNSIGNED 
(arg_type);
+         cvt_type = build_nonstandard_integer_type (cvt_size, isUnsigned);
+
+         cvt_type = build_vector_type (cvt_type, nelts);
+         if (cvt_type == NULL_TREE
+             || !supportable_convert_operation ((tree_code) code1,
+                                                ret_type,
+                                                cvt_type, &tc1)
+             || !supportable_convert_operation ((tree_code) codecvt1,
+                                                cvt_type,
+                                                arg_type, &tc2))
+           continue;
+
+         new_rhs = make_ssa_name (cvt_type);
+         g = vect_gimple_build (new_rhs, tc2, arg);
+         gsi_insert_before (gsi, g, GSI_SAME_STMT);
+         g = gimple_build_assign (lhs, tc1, new_rhs);
+         gsi_replace (gsi, g, false);
+         return true;
+       }
+    }
+  return false;
+}
+
 /* Expand VEC_CONVERT ifn call.  */
 
 static void
@@ -1871,14 +1969,11 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
   else if (ret_elt_bits > arg_elt_bits)
     modifier = WIDEN;
 
+  if (expand_vector_conversion_no_vec_pack(gsi, code, lhs, arg))
+    return;
+
   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
     {
-      if (supportable_convert_operation (code, ret_type, arg_type, &code1))
-       {
-         g = gimple_build_assign (lhs, code1, arg);
-         gsi_replace (gsi, g, false);
-         return;
-       }
       /* Can't use get_compute_type here, as supportable_convert_operation
         doesn't necessarily use an optab and needs two arguments.  */
       tree vec_compute_type
--
2.31.1


Reply via email to