Hi! vpinsr{b,w} are AVX512BW, vpinsr{d,q} are AVX512DQ. This patch makes us use v constraint instead of x in those cases.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-05-09 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (pinsr_evex_isa): New mode attr. (<sse2p4_1>_pinsr<ssemodesuffix>): Add 2 alternatives with v constraints instead of x and <pinsr_evex_isa> isa attribute. * gcc.target/i386/avx512bw-vpinsr-1.c: New test. * gcc.target/i386/avx512dq-vpinsr-1.c: New test. * gcc.target/i386/avx512vl-vpinsr-1.c: New test. --- gcc/config/i386/sse.md.jj 2016-05-09 13:31:21.000000000 +0200 +++ gcc/config/i386/sse.md 2016-05-09 14:15:50.241028739 +0200 @@ -12036,13 +12036,17 @@ (define_mode_attr sse2p4_1 [(V16QI "sse4_1") (V8HI "sse2") (V4SI "sse4_1") (V2DI "sse4_1")]) +(define_mode_attr pinsr_evex_isa + [(V16QI "avx512bw") (V8HI "avx512bw") + (V4SI "avx512dq") (V2DI "avx512dq")]) + ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" - [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") + [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v") (vec_merge:PINSR_MODE (vec_duplicate:PINSR_MODE - (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) - (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m")) + (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v") (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE2 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -12059,16 +12063,18 @@ (define_insn "<sse2p4_1>_pinsr<ssemodesu case 1: return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; case 2: + case 4: if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; /* FALLTHRU */ case 3: + case 5: return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,noavx,avx,avx") + [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>") (set_attr "type" "sselog") (set (attr "prefix_rex") (if_then_else @@ -12089,7 +12095,7 @@ (define_insn "<sse2p4_1>_pinsr<ssemodesu (const_string "*") (const_string "1"))) (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,orig,vex,vex") + (set_attr "prefix" "orig,orig,vex,vex,evex,evex") (set_attr "mode" "TI")]) (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask" --- gcc/testsuite/gcc.target/i386/avx512bw-vpinsr-1.c.jj 2016-05-09 14:36:49.618145755 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-vpinsr-1.c 2016-05-09 14:49:57.830574216 +0200 @@ -0,0 +1,33 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ + +typedef char v16qi __attribute__((vector_size (16))); +typedef short v8hi __attribute__((vector_size (16))); + +v16qi +f1 (v16qi a, char b) +{ + register v16qi c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v16qi d = c; + ((char *) &d)[3] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler "vpinsrb\[^\n\r]*xmm16" } } */ + +v8hi +f2 (v8hi a, short b) +{ + register v8hi c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v8hi d = c; + ((short *) &d)[3] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler "vpinsrw\[^\n\r]*xmm16" } } */ --- gcc/testsuite/gcc.target/i386/avx512dq-vpinsr-1.c.jj 2016-05-09 14:39:15.588184128 +0200 +++ gcc/testsuite/gcc.target/i386/avx512dq-vpinsr-1.c 2016-05-09 14:48:38.000000000 +0200 @@ -0,0 +1,33 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512dq" } */ + +typedef int v4si __attribute__((vector_size (16))); +typedef long long v2di __attribute__((vector_size (16))); + +v4si +f1 (v4si a, int b) +{ + register v4si c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v4si d = c; + ((int *) &d)[3] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler "vpinsrd\[^\n\r]*xmm16" } } */ + +v2di +f2 (v2di a, long long b) +{ + register v2di c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v2di d = c; + ((long long *) &d)[1] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*xmm16" } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-vpinsr-1.c.jj 2016-05-09 14:41:21.195496147 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpinsr-1.c 2016-05-09 14:50:32.188114909 +0200 @@ -0,0 +1,63 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw -mno-avx512dq" } */ + +typedef char v16qi __attribute__((vector_size (16))); +typedef short v8hi __attribute__((vector_size (16))); +typedef int v4si __attribute__((vector_size (16))); +typedef long long v2di __attribute__((vector_size (16))); + +v16qi +f1 (v16qi a, char b) +{ + register v16qi c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v16qi d = c; + ((char *) &d)[3] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-not "vpinsrb\[^\n\r]*xmm16" } } */ + +v8hi +f2 (v8hi a, short b) +{ + register v8hi c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v8hi d = c; + ((short *) &d)[3] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-not "vpinsrw\[^\n\r]*xmm16" } } */ + +v4si +f3 (v4si a, int b) +{ + register v4si c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v4si d = c; + ((int *) &d)[3] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*xmm16" } } */ + +v2di +f4 (v2di a, char b) +{ + register v2di c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + v2di d = c; + ((long long *) &d)[1] = b; + c = d; + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*xmm16" } } */ Jakub