On Wed, May 29, 2024 at 4:56 PM Hu, Lin1 <lin1...@intel.com> wrote: > > Exclude add TARGET_MMX_WITH_SSE, I merge two patterns. Ok. > > BRs, > Lin > > gcc/ChangeLog: > > PR target/107432 > * config/i386/mmx.md > (VI2_32_64): New mode iterator. > (mmxhalfmode): New mode atter. > (mmxhalfmodelower): Ditto. > (truncv2hiv2qi2): Extend mode v4hi and change name from > truncv2hiv2qi to trunc<mode><mmxhalfmodelower>2. > > gcc/testsuite/ChangeLog: > > PR target/107432 > * gcc.target/i386/pr107432-1.c: Modify test. > * gcc.target/i386/pr107432-6.c: Add test. > --- > gcc/config/i386/mmx.md | 17 +++++++++++++---- > gcc/testsuite/gcc.target/i386/pr107432-1.c | 13 ++++++++++++- > gcc/testsuite/gcc.target/i386/pr107432-6.c | 19 ++++++++++++++++--- > 3 files changed, 41 insertions(+), 8 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 5f342497885..27b080bfeb6 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -67,6 +67,9 @@ (define_mode_iterator V2F_32 [V2HF V2BF]) > ;; 4-byte integer vector modes > (define_mode_iterator VI_32 [V4QI V2HI]) > > +;; 8-byte and 4-byte HImode vector modes > +(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI]) > + > ;; 4-byte and 2-byte integer vector modes > (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) > > @@ -106,6 +109,12 @@ (define_mode_attr mmxinsnmode > (define_mode_attr mmxdoublemode > [(V8QI "V8HI") (V4HI "V4SI")]) > > +(define_mode_attr mmxhalfmode > + [(V4HI "V4QI") (V2HI "V2QI")]) > + > +(define_mode_attr mmxhalfmodelower > + [(V4HI "v4qi") (V2HI "v2qi")]) > + > ;; Mapping of vector float modes to an integer mode of the same size > (define_mode_attr mmxintvecmode > [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI") > @@ -4873,10 +4882,10 @@ (define_expand "<insn>v2qiv2hi2" > DONE; > }) > > -(define_insn "truncv2hiv2qi2" > - [(set (match_operand:V2QI 0 "register_operand" "=v") > - (truncate:V2QI > - (match_operand:V2HI 1 "register_operand" "v")))] > +(define_insn "trunc<mode><mmxhalfmodelower>2" > + [(set (match_operand:<mmxhalfmode> 0 "register_operand" "=v") > + (truncate:<mmxhalfmode> > + (match_operand:VI2_32_64 1 "register_operand" "v")))] > "TARGET_AVX512VL && TARGET_AVX512BW" > "vpmovwb\t{%1, %0|%0, %1}" > [(set_attr "type" "ssemov") > diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c > b/gcc/testsuite/gcc.target/i386/pr107432-1.c > index a4f37447eb4..afdf367afe2 100644 > --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c > +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c > @@ -7,7 +7,8 @@ > /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ > -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } > */ > > #include <x86intrin.h> > > @@ -113,6 +114,11 @@ __v2qi > mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a) > return __builtin_convertvector((__v2hi)a, __v2qi); > } > > +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a) > +{ > + return __builtin_convertvector((__v4hi)a, __v4qi); > +} > + > __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) > { > return __builtin_convertvector((__v8hi)a, __v8qi); > @@ -218,6 +224,11 @@ __v2qu > mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a) > return __builtin_convertvector((__v2hu)a, __v2qu); > } > > +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a) > +{ > + return __builtin_convertvector((__v4hu)a, __v4qu); > +} > + > __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a) > { > return __builtin_convertvector((__v8hu)a, __v8qu); > diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c > b/gcc/testsuite/gcc.target/i386/pr107432-6.c > index 4a68a10b089..7d3717d45bc 100644 > --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c > +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c > @@ -8,11 +8,14 @@ > /* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } > } */ > /* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } > */ > /* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } > } */ > -/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */ > -/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */ > +/* { dg-final { scan-assembler-times "vcvttph2w" 4 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vcvttph2w" 5 { target { ! ia32 } } } } > */ > +/* { dg-final { scan-assembler-times "vcvttph2uw" 4 { target { ia32 } } } } > */ > +/* { dg-final { scan-assembler-times "vcvttph2uw" 5 { target { ! ia32 } } } > } */ > /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } > */ > -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } > */ > > #include <x86intrin.h> > > @@ -103,6 +106,11 @@ __v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf > a) > return __builtin_convertvector((__v2hf)a, __v2qi); > } > > +__v4qi mm64_cvtph_epi8_builtin_convertvector(__v4hf a) > +{ > + return __builtin_convertvector((__v4hf)a, __v4qi); > +} > + > __v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a) > { > return __builtin_convertvector((__v8hf)a, __v8qi); > @@ -123,6 +131,11 @@ __v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf > a) > return __builtin_convertvector((__v2hf)a, __v2qu); > } > > +__v4qu mm64_cvtph_epu8_builtin_convertvector(__v4hf a) > +{ > + return __builtin_convertvector((__v4hf)a, __v4qu); > +} > + > __v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a) > { > return __builtin_convertvector((__v8hf)a, __v8qu); > -- > 2.31.1 >
-- BR, Hongtao