On Mon, May 9, 2022 at 8:44 AM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Mon, May 9, 2022 at 2:43 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Clean up of 16-bit uppers is not needed for pmovzxbq/pmovsxbq.
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         PR target/105072
> >         * config/i386/sse.md (*sse4_1_<code>v2qiv2di2<mask_name>_1):
> >         New define_insn.
> >         (*sse4_1_zero_extendv2qiv2di2_2): New pre_reload
> >         define_insn_and_split.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr105072.c: New test.

OK.

Thanks,
Uros.

> > ---
> >  gcc/config/i386/sse.md                   | 45 +++++++++++++++++++++---
> >  gcc/testsuite/gcc.target/i386/pr105072.c | 24 +++++++++++++
> >  2 files changed, 65 insertions(+), 4 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr105072.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 7b791def542..47f8b18b82e 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -22297,15 +22297,52 @@ (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
> >     (set_attr "prefix" "orig,orig,maybe_evex")
> >     (set_attr "mode" "TI")])
> >
> > +(define_insn "*sse4_1_<code>v2qiv2di2<mask_name>_1"
> > +  [(set (match_operand:V2DI 0 "register_operand" "=v")
> > +       (any_extend:V2DI
> > +        (match_operand:V2QI 1 "memory_operand" "m")))]
> > +  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
> > +  "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
> > +  [(set_attr "type" "ssemov")
> > +   (set_attr "prefix_extra" "1")
> > +   (set_attr "prefix" "maybe_evex")
> > +   (set_attr "mode" "TI")])
> > +
> >  (define_expand "<insn>v2qiv2di2"
> >    [(set (match_operand:V2DI 0 "register_operand")
> >         (any_extend:V2DI
> > -         (match_operand:V2QI 1 "register_operand")))]
> > +        (match_operand:V2QI 1 "nonimmediate_operand")))]
> >    "TARGET_SSE4_1"
> >  {
> > -  rtx op1 = force_reg (V2QImode, operands[1]);
> > -  op1 = lowpart_subreg (V16QImode, op1, V2QImode);
> > -  emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], op1));
> > +  if (!MEM_P (operands[1]))
> > +    {
> > +      rtx op1 = force_reg (V2QImode, operands[1]);
> > +      op1 = lowpart_subreg (V16QImode, op1, V2QImode);
> > +      emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], op1));
> > +      DONE;
> > +    }
> > +})
> > +
> > +(define_insn_and_split "*sse4_1_zero_extendv2qiv2di2_2"
> > +  [(set (match_operand:V2DI 0 "register_operand")
> > +       (zero_extend:V2DI
> > +        (vec_select:V2QI
> > +         (subreg:V16QI
> > +          (vec_merge:V8_128
> > +           (vec_duplicate:V8_128
> > +            (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))
> > +           (match_operand:V8_128 2 "const0_operand")
> > +           (const_int 1)) 0)
> > +         (parallel [(const_int 0) (const_int 1)]))))]
> > +  "TARGET_SSE4_1 && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(const_int 0)]
> > +{
> > +  if (!MEM_P (operands[1]))
> > +    operands[1] = force_reg (<ssescalarmode>mode, operands[1]);
> > +  operands[1] = lowpart_subreg (V2QImode, operands[1], 
> > <ssescalarmode>mode);
> > +  emit_insn (gen_zero_extendv2qiv2di2 (operands[0], operands[1]));
> >    DONE;
> >  })
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr105072.c 
> > b/gcc/testsuite/gcc.target/i386/pr105072.c
> > new file mode 100644
> > index 00000000000..54e229731b8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr105072.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-msse4.1 -O2" } */
> > +/* { dg-final { scan-assembler-times {(?n)pmovzxbq[ \t]+} "4" } } */
> > +/* { dg-final { scan-assembler-not {(?n)pinsrw[ \t]+} } } */
> > +
> > +#include<immintrin.h>
> > +
> > +__m128i foo (void *p){
> > +  return _mm_cvtepu8_epi64(_mm_loadu_si16(p));
> > +}
> > +
> > +__m128i foo2 (short a){
> > +  return _mm_cvtepu8_epi64(_mm_set_epi16(0, 0, 0, 0, 0, 0, 0, a));
> > +}
> > +
> > +__m128i
> > +foo3 (void *p){
> > +  return _mm_cvtepu8_epi64((__m128i)__extension__(__m128h) {*(_Float16 
> > const*)p, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
> > +}
> > +
> > +__m128i
> > +foo4 (_Float16 a){
> > +  return _mm_cvtepu8_epi64((__m128i)__extension__(__m128h) {a, 0.0f, 0.0f, 
> > 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
> > +}
> > --
> > 2.18.1
> >
>
>
> --
> BR,
> Hongtao

Reply via email to