Hi! This is the last pattern I'm aware of that didn't have any v/Yv constraints that ought to be changed (there perhaps are others which have v/Yv in some of the alternatives, but not in all the ones that could use it).
The testcases show what are the changes useful for. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-06-01 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (*vec_concatv2di): Add x86_avx512dq v=Yv,rm alternative. Change x=xm,C alternative to v=vm,C, x=x,x alternative to v=Yv,Yv and x=x,m to v=v,m. Use maybe_evex prefix attribute instead of vex for the last two above mentioned alternatives. * gcc.target/i386/avx512dq-concatv2di-1.c: New test. * gcc.target/i386/avx512vl-concatv2di-1.c: New test. * gcc.target/i386/sse2-init-v2di-2.c: Adjust expected vec_concatv2di alternative number. --- gcc/config/i386/sse.md.jj 2016-06-01 14:17:18.000000000 +0200 +++ gcc/config/i386/sse.md 2016-06-01 18:11:35.058942131 +0200 @@ -13567,17 +13567,18 @@ (define_insn "*vec_concatv4si" ;; movd instead of movq is required to handle broken assemblers. (define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" - "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x") + "=Yr,*x,x ,v ,Yi,v ,!x,x,v ,x,x,v") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" - " 0, 0,x ,r ,xm,*y,0,x,0,0,x") + " 0, 0,x ,Yv,r ,vm,*y,0,Yv,0,0,v") (match_operand:DI 2 "vector_move_operand" - "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))] + "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))] "TARGET_SSE" "@ pinsrq\t{$1, %2, %0|%0, %2, 1} pinsrq\t{$1, %2, %0|%0, %2, 1} vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} + vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\"; %vmovq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1} @@ -13592,40 +13593,46 @@ (define_insn "vec_concatv2di" (eq_attr "alternative" "2") (const_string "x64_avx") (eq_attr "alternative" "3") + (const_string "x64_avx512dq") + (eq_attr "alternative" "4") (const_string "x64") - (eq_attr "alternative" "4,5") + (eq_attr "alternative" "5,6") (const_string "sse2") - (eq_attr "alternative" "6") + (eq_attr "alternative" "7") (const_string "sse2_noavx") - (eq_attr "alternative" "7,10") + (eq_attr "alternative" "8,11") (const_string "avx") ] (const_string "noavx"))) (set (attr "type") (if_then_else - (eq_attr "alternative" "0,1,2,6,7") + (eq_attr "alternative" "0,1,2,3,7,8") (const_string "sselog") (const_string "ssemov"))) (set (attr "prefix_rex") - (if_then_else (eq_attr "alternative" "0,1,2,3") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") (const_string "1") (const_string "*"))) (set (attr "prefix_extra") - (if_then_else (eq_attr "alternative" "0,1,2") + (if_then_else (eq_attr "alternative" "0,1,2,3") (const_string "1") (const_string "*"))) (set (attr "length_immediate") - (if_then_else (eq_attr "alternative" "0,1,2") + (if_then_else (eq_attr "alternative" "0,1,2,3") (const_string "1") (const_string "*"))) (set (attr "prefix") - (cond [(eq_attr "alternative" "2,7,10") + (cond [(eq_attr "alternative" "2") (const_string "vex") - (eq_attr "alternative" "3,4") + (eq_attr "alternative" "3") + (const_string "evex") + (eq_attr "alternative" "4,5") (const_string "maybe_vex") + (eq_attr "alternative" "8,11") + (const_string "maybe_evex") ] (const_string "orig"))) - (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) (define_expand "vec_unpacks_lo_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") --- gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c.jj 2016-06-01 18:58:33.037699493 +0200 +++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c 2016-06-01 18:53:29.000000000 +0200 @@ -0,0 +1,78 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att -mtune=haswell" } */ + +typedef long long V __attribute__((vector_size (16))); + +void +f1 (long long x, long long y) +{ + register long long a __asm ("xmm16"); + register V c __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + c = (V) { a, y }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%rsi\[^\n\r]*%xmm16\[^\n\r]*%xmm17" } } */ + +void +f2 (long long x, long long *y) +{ + register long long a __asm ("xmm18"); + register V c __asm ("xmm19"); + a = x; + asm volatile ("" : "+v" (a)); + c = (V) { a, *y }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */ + +void +f3 (long long x) +{ + register V a __asm ("xmm20"); + a = (V) { x, 0 }; + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vmov\[dq]\[^\n\r]*%rdi\[^\n\r]*%xmm20" } } */ + +void +f4 (long long *x) +{ + register V a __asm ("xmm21"); + a = (V) { *x, 0 }; + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vmovq\[^\n\r]*%\[re]di\[^\n\r]*%xmm21" } } */ + +void +f5 (long long x) +{ + register long long a __asm ("xmm22"); + register V c __asm ("xmm23"); + a = x; + asm volatile ("" : "+v" (a)); + c = (V) { a, 0 }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vmovq\[^\n\r]*%xmm22\[^\n\r]*%xmm23" } } */ + +void +f6 (long long x, long long y) +{ + register long long a __asm ("xmm24"); + register long long b __asm ("xmm25"); + register V c __asm ("xmm26"); + a = x; + b = y; + asm volatile ("" : "+v" (a), "+v" (b)); + c = (V) { a, b }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm25\[^\n\r]*%xmm24\[^\n\r]*%xmm26" } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c.jj 2016-06-01 18:58:45.761535711 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c 2016-06-01 19:04:11.367344524 +0200 @@ -0,0 +1,79 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att -mtune=haswell" } */ + +typedef long long V __attribute__((vector_size (16))); + +void +f1 (long long x, long long y) +{ + register long long a __asm ("xmm16"); + register V c __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + c = (V) { a, y }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*\[^\n\r]*%xmm1\[6-9]" } } */ +/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*\[^\n\r]*%xmm\[23]\[0-9]" } } */ + +void +f2 (long long x, long long *y) +{ + register long long a __asm ("xmm18"); + register V c __asm ("xmm19"); + a = x; + asm volatile ("" : "+v" (a)); + c = (V) { a, *y }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vmovhps\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */ + +void +f3 (long long x) +{ + register V a __asm ("xmm20"); + a = (V) { x, 0 }; + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vmov\[dq]\[^\n\r]*%rdi\[^\n\r]*%xmm20" } } */ + +void +f4 (long long *x) +{ + register V a __asm ("xmm21"); + a = (V) { *x, 0 }; + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vmovq\[^\n\r]*%\[re]di\[^\n\r]*%xmm21" } } */ + +void +f5 (long long x) +{ + register long long a __asm ("xmm22"); + register V c __asm ("xmm23"); + a = x; + asm volatile ("" : "+v" (a)); + c = (V) { a, 0 }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vmovq\[^\n\r]*%xmm22\[^\n\r]*%xmm23" } } */ + +void +f6 (long long x, long long y) +{ + register long long a __asm ("xmm24"); + register long long b __asm ("xmm25"); + register V c __asm ("xmm26"); + a = x; + b = y; + asm volatile ("" : "+v" (a), "+v" (b)); + c = (V) { a, b }; + asm volatile ("" : "+v" (c)); +} + +/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm25\[^\n\r]*%xmm24\[^\n\r]*%xmm26" } } */ --- gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c.jj 2015-12-31 01:11:11.000000000 +0100 +++ gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c 2016-06-01 21:23:02.455281080 +0200 @@ -10,4 +10,4 @@ test (long long b) return _mm_cvtsi64_si128 (b); } -/* { dg-final { scan-assembler-times "vec_concatv2di/4" 1 } } */ +/* { dg-final { scan-assembler-times "vec_concatv2di/5" 1 } } */ Jakub