On Thu, May 26, 2016 at 9:24 PM, Jakub Jelinek <ja...@redhat.com> wrote: > On Thu, May 26, 2016 at 07:39:01PM +0200, Uros Bizjak wrote: >> On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <ja...@redhat.com> wrote: >> > Hi! >> > >> > This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and >> > enables EVEX vmovd and vpunpckldq. >> > >> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? >> > >> > 2016-05-26 Jakub Jelinek <ja...@redhat.com> >> > >> > * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm >> > alternative. Change x=x,x alternative to v=Yv,Yv and x=rm,C >> > alternative to v=rm,C. >> > >> > * gcc.target/i386/avx512dq-concatv2si-1.c: New test. >> > * gcc.target/i386/avx512vl-concatv2si-1.c: New test. >> >> Ouch, I have just changed these mega strings in attribute definitions >> to something more readable. Can you please redo the attribute part? It >> should be much more pleasant experience than counting all the >> commas...). > > Here is updated version of this patch (the other two pending sse.md patches > from me still apply cleanly): > > 2016-05-26 Jakub Jelinek <ja...@redhat.com> > > * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm > alternative. Change x=x,x alternative to v=Yv,Yv and x=rm,C > alternative to v=rm,C. > > * gcc.target/i386/avx512dq-concatv2si-1.c: New test. > * gcc.target/i386/avx512vl-concatv2si-1.c: New test.
OK. Thanks, Uros. > --- gcc/config/i386/sse.md.jj 2016-05-26 10:44:25.000000000 +0200 > +++ gcc/config/i386/sse.md 2016-05-26 14:22:26.819313220 +0200 > @@ -13488,43 +13488,44 @@ > > (define_insn "*vec_concatv2si_sse4_1" > [(set (match_operand:V2SI 0 "register_operand" > - "=Yr,*x,x, Yr,*x,x, x, *y,*y") > + "=Yr,*x, x, v,Yr,*x, v, v, *y,*y") > (vec_concat:V2SI > (match_operand:SI 1 "nonimmediate_operand" > - " 0, 0,x, 0,0, x,rm, 0,rm") > + " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm") > (match_operand:SI 2 "vector_move_operand" > - " rm,rm,rm,Yr,*x,x, C,*ym, C")))] > + " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))] > "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" > "@ > pinsrd\t{$1, %2, %0|%0, %2, 1} > pinsrd\t{$1, %2, %0|%0, %2, 1} > vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} > + vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} > punpckldq\t{%2, %0|%0, %2} > punpckldq\t{%2, %0|%0, %2} > vpunpckldq\t{%2, %1, %0|%0, %1, %2} > %vmovd\t{%1, %0|%0, %1} > punpckldq\t{%2, %0|%0, %2} > movd\t{%1, %0|%0, %1}" > - [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") > + [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*") > (set (attr "type") > - (cond [(eq_attr "alternative" "6") > + (cond [(eq_attr "alternative" "7") > (const_string "ssemov") > - (eq_attr "alternative" "7") > - (const_string "mmxcvt") > (eq_attr "alternative" "8") > + (const_string "mmxcvt") > + (eq_attr "alternative" "9") > (const_string "mmxmov") > ] > (const_string "sselog"))) > (set (attr "prefix_extra") > - (if_then_else (eq_attr "alternative" "0,1,2") > + (if_then_else (eq_attr "alternative" "0,1,2,3") > (const_string "1") > (const_string "*"))) > (set (attr "length_immediate") > - (if_then_else (eq_attr "alternative" "0,1,2") > + (if_then_else (eq_attr "alternative" "0,1,2,3") > (const_string "1") > (const_string "*"))) > - (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") > - (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")]) > + (set_attr "prefix" > "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig") > + (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")]) > > ;; ??? In theory we can match memory for the MMX alternative, but allowing > ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE > --- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj 2016-05-26 > 15:14:55.853786550 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c 2016-05-26 > 15:13:57.000000000 +0200 > @@ -0,0 +1,43 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */ > + > +typedef int V __attribute__((vector_size (8))); > + > +void > +f1 (int x, int y) > +{ > + register int a __asm ("xmm16"); > + register int b __asm ("xmm17"); > + register V c __asm ("xmm3"); > + a = x; > + b = y; > + asm volatile ("" : "+v" (a), "+v" (b)); > + c = (V) { a, b }; > + asm volatile ("" : "+v" (c)); > +} > + > +/* { dg-final { scan-assembler > "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */ > + > +void > +f2 (int x, int y) > +{ > + register int a __asm ("xmm16"); > + register V c __asm ("xmm3"); > + a = x; > + asm volatile ("" : "+v" (a)); > + c = (V) { a, y }; > + asm volatile ("" : "+v" (c)); > +} > + > +void > +f3 (int x, int *y) > +{ > + register int a __asm ("xmm16"); > + register V c __asm ("xmm3"); > + a = x; > + asm volatile ("" : "+v" (a)); > + c = (V) { a, *y }; > + asm volatile ("" : "+v" (c)); > +} > + > +/* { dg-final { scan-assembler-times > "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */ > --- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj 2016-05-26 > 15:15:11.921574803 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c 2016-05-26 > 15:16:24.936612585 +0200 > @@ -0,0 +1,43 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */ > + > +typedef int V __attribute__((vector_size (8))); > + > +void > +f1 (int x, int y) > +{ > + register int a __asm ("xmm16"); > + register int b __asm ("xmm17"); > + register V c __asm ("xmm3"); > + a = x; > + b = y; > + asm volatile ("" : "+v" (a), "+v" (b)); > + c = (V) { a, b }; > + asm volatile ("" : "+v" (c)); > +} > + > +/* { dg-final { scan-assembler > "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */ > + > +void > +f2 (int x, int y) > +{ > + register int a __asm ("xmm16"); > + register V c __asm ("xmm3"); > + a = x; > + asm volatile ("" : "+v" (a)); > + c = (V) { a, y }; > + asm volatile ("" : "+v" (c)); > +} > + > +void > +f3 (int x, int *y) > +{ > + register int a __asm ("xmm16"); > + register V c __asm ("xmm3"); > + a = x; > + asm volatile ("" : "+v" (a)); > + c = (V) { a, *y }; > + asm volatile ("" : "+v" (c)); > +} > + > +/* { dg-final { scan-assembler-not > "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */ > > > Jakub