Hi! The vpermt2* and vpermi2* instructions are pretty much the same (at least when not using non-{z} masking), so by treating them as something significantly different we generate often very bad code.
The following patch attempts to treat them the same (except for non-{z} masking), so that the RA can choose whatever is more appropriate. E.g. in the pr82460-2.c testcase we used to emit vpermt2b which destroys the index operand, even when we actually need the same index in every iteration, which means we have to copy it over from some other reg. While on the pr82460-1.c testcase, there are cases when we want the result to be in %zmm0, but because we were tied to one choice we had to move the result from %zmm1 to %zmm0. Also, the way vpermi2* with non-{z} masking was emitted had various issues, the RA isn't able to emit two input operands with different modes both tied to the same "0" constraint, so a match_dup for the output was used, but that means the middle-end would see an uninitialized pseudo, clear it and generate say for: __m512i f5 (__m512i x, __m512i y, char *z, __mmask64 w) { return _mm512_mask2_permutex2var_epi32 (y, x, w, _mm512_loadu_si512 (z)); } code like: vmovdqa64 %zmm0, %zmm2 kmovw %esi, %k1 vpermi2d (%rdi), %zmm1, %zmm2{%k1} vmovdqa64 %zmm2, %zmm0 while with the patch we can emit: kmovw %esi, %k1 vpermi2d (%rdi), %zmm1, %zmm0{%k1} ubstead, So that we don't have too many patterns, I've added 2 mode iterators for this case which allowed to merge each 3 define_expand/define_insn we had into one (except for the non-{z} masking we need one define_insn for integral vectors where we can use match_dup and one for floating vectors where we need to subreg it). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-10-09 Jakub Jelinek <ja...@redhat.com> PR target/82460 * config/i386/sse.md (UNSPEC_VPERMI2, UNSPEC_VPERMI2_MASK): Remove. (VPERMI2, VPERMI2I): New mode iterators. (<avx512>_vpermi2var<mode>3_maskz): Remove 3 define_expand patterns. (<avx512>_vpermi2var<mode>3<sd_maskz_name>): Remove 3 define_insn patterns. (<avx512>_vpermi2var<mode>3_mask): New define_expand using VPERMI2 mode iterator. Remove 3 old define_insn patterns. (*<avx512>_vpermi2var<mode>3_mask): 2 new define_insn patterns. (<avx512>_vpermt2var<mode>3_maskz): Adjust 1 define_expand to use VPERMI2 mode iterator, remove the other two expanders. (<avx512>_vpermt2var<mode>3<sd_maskz_name>): Adjust 1 define_insn to use VPERMI2 mode iterator, add another alternative for vpermi2* instructions, remove the other two patterns. (<avx512>_vpermt2var<mode>3_mask): Adjust 1 define_insn to use VPERMI2 mode iterator, remove the other two patterns. * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Renamed to ... (ix86_expand_vec_perm_vpermt2): ... this. Swap mask and op0 arguments, use gen_*vpermt2* expanders instead of gen_*vpermi2* and adjust argument order accordingly. (ix86_expand_vec_perm): Adjust caller. (expand_vec_perm_1): Likewise. (expand_vec_perm_vpermi2_vpshub2): Rename to ... (expand_vec_perm_vpermt2_vpshub2): ... this. (ix86_expand_vec_perm_const_1): Adjust caller. (ix86_vectorize_vec_perm_const_ok): Adjust comments. * gcc.target/i386/pr82460-1.c: New test. * gcc.target/i386/pr82460-2.c: New test. * gcc.target/i386/avx512f-vpermt2pd-1.c: Adjust scan-assembler* regexps to allow vpermt2* to vpermi2* replacement or vice versa where possible. * gcc.target/i386/avx512vl-vpermt2pd-1.c: Likewise. * gcc.target/i386/avx512f-vpermt2d-1.c: Likewise. * gcc.target/i386/vect-pack-trunc-2.c: Likewise. * gcc.target/i386/avx512vl-vpermt2ps-1.c: Likewise. * gcc.target/i386/avx512vl-vpermt2q-1.c: Likewise. * gcc.target/i386/avx512f-vpermt2ps-1.c: Likewise. * gcc.target/i386/avx512vl-vpermt2d-1.c: Likewise. * gcc.target/i386/avx512bw-vpermt2w-1.c: Likewise. * gcc.target/i386/avx512vbmi-vpermt2b-1.c: Likewise. * gcc.target/i386/avx512f-vpermt2q-1.c: Likewise. --- gcc/config/i386/sse.md.jj 2017-10-04 16:45:19.000000000 +0200 +++ gcc/config/i386/sse.md 2017-10-09 17:44:38.342212895 +0200 @@ -83,9 +83,7 @@ (define_c_enum "unspec" [ UNSPEC_VSIBADDR ;; For AVX512F support - UNSPEC_VPERMI2 UNSPEC_VPERMT2 - UNSPEC_VPERMI2_MASK UNSPEC_UNSIGNED_FIX_NOTRUNC UNSPEC_UNSIGNED_PCMP UNSPEC_TESTM @@ -18105,96 +18103,48 @@ (define_insn "<sse2_avx_avx512f>_vpermil (set_attr "prefix" "<mask_prefix>") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI48F 0 "register_operand") - (match_operand:VI48F 1 "register_operand") - (match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:VI48F 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512F" -{ - emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand") - (match_operand:VI1_AVX512VL 1 "register_operand") - (match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512VBMI" -{ - emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand") - (match_operand:VI2_AVX512VL 1 "register_operand") - (match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512BW" -{ - emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (unspec:VI48F - [(match_operand:VI48F 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] +(define_mode_iterator VPERMI2 + [V16SI V16SF V8DI V8DF + (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") + (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") + (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) + +(define_mode_iterator VPERMI2I + [V16SI V8DI + (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") + (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) + +(define_expand "<avx512>_vpermi2var<mode>3_mask" + [(set (match_operand:VPERMI2 0 "register_operand") + (vec_merge:VPERMI2 + (unspec:VPERMI2 + [(match_operand:<sseintvecmode> 2 "register_operand") + (match_operand:VPERMI2 1 "register_operand") + (match_operand:VPERMI2 3 "nonimmediate_operand")] + UNSPEC_VPERMT2) + (match_dup 5) + (match_operand:<avx512fmaskmode> 4 "register_operand")))] "TARGET_AVX512F" - "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (unspec:VI1_AVX512VL - [(match_operand:VI1_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] - "TARGET_AVX512VBMI" - "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (unspec:VI2_AVX512VL - [(match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] - "TARGET_AVX512BW" - "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + "operands[5] = gen_lowpart (<MODE>mode, operands[2]);") -(define_insn "<avx512>_vpermi2var<mode>3_mask" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (vec_merge:VI48F - (unspec:VI48F - [(match_operand:VI48F 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) +(define_insn "*<avx512>_vpermi2var<mode>3_mask" + [(set (match_operand:VPERMI2I 0 "register_operand" "=v") + (vec_merge:VPERMI2I + (unspec:VPERMI2I + [(match_operand:<sseintvecmode> 2 "register_operand" "0") + (match_operand:VPERMI2I 1 "register_operand" "v") + (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")] + UNSPEC_VPERMT2) + (match_dup 2) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" @@ -18202,43 +18152,27 @@ (define_insn "<avx512>_vpermi2var<mode>3 (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_vpermi2var<mode>3_mask" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI1_AVX512VL - (unspec:VI1_AVX512VL - [(match_operand:VI1_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512VBMI" - "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3_mask" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI2_AVX512VL - (unspec:VI2_AVX512VL - [(match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) +(define_insn "*<avx512>_vpermi2var<mode>3_mask" + [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF_AVX512VL + (unspec:VF_AVX512VL + [(match_operand:<sseintvecmode> 2 "register_operand" "0") + (match_operand:VF_AVX512VL 1 "register_operand" "v") + (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")] + UNSPEC_VPERMT2) + (subreg:VF_AVX512VL (match_dup 2) 0) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" + "TARGET_AVX512F" "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI48F 0 "register_operand") + [(match_operand:VPERMI2 0 "register_operand") (match_operand:<sseintvecmode> 1 "register_operand") - (match_operand:VI48F 2 "register_operand") - (match_operand:VI48F 3 "nonimmediate_operand") + (match_operand:VPERMI2 2 "register_operand") + (match_operand:VPERMI2 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F" { @@ -18248,80 +18182,28 @@ (define_expand "<avx512>_vpermt2var<mode DONE; }) -(define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand") - (match_operand:<sseintvecmode> 1 "register_operand") - (match_operand:VI1_AVX512VL 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512VBMI" -{ - emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand") - (match_operand:<sseintvecmode> 1 "register_operand") - (match_operand:VI2_AVX512VL 2 "register_operand") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512BW" -{ - emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (unspec:VI48F - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VPERMI2 0 "register_operand" "=v,v") + (unspec:VPERMI2 + [(match_operand:<sseintvecmode> 1 "register_operand" "v,0") + (match_operand:VPERMI2 2 "register_operand" "0,v") + (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")] UNSPEC_VPERMT2))] "TARGET_AVX512F" - "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (unspec:VI1_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2))] - "TARGET_AVX512VBMI" - "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (unspec:VI2_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2))] - "TARGET_AVX512BW" - "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" + "@ + vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3} + vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_vpermt2var<mode>3_mask" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (vec_merge:VI48F - (unspec:VI48F + [(set (match_operand:VPERMI2 0 "register_operand" "=v") + (vec_merge:VPERMI2 + (unspec:VPERMI2 [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] + (match_operand:VPERMI2 2 "register_operand" "0") + (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")] UNSPEC_VPERMT2) (match_dup 2) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] @@ -18329,38 +18211,6 @@ (define_insn "<avx512>_vpermt2var<mode>3 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3_mask" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI1_AVX512VL - (unspec:VI1_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2) - (match_dup 2) - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512VBMI" - "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3_mask" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI2_AVX512VL - (unspec:VI2_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2) - (match_dup 2) - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx_vperm2f128<mode>3" --- gcc/config/i386/i386.c.jj 2017-10-09 13:26:48.000000000 +0200 +++ gcc/config/i386/i386.c 2017-10-09 15:34:08.757761012 +0200 @@ -25905,10 +25905,10 @@ struct expand_vec_perm_d }; static bool -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, +ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, struct expand_vec_perm_d *d) { - /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const expander, so args are either in d, or in op0, op1 etc. */ machine_mode mode = GET_MODE (d ? d->op0 : op0); machine_mode maskmode = mode; @@ -25918,83 +25918,83 @@ ix86_expand_vec_perm_vpermi2 (rtx target { case E_V8HImode: if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_vpermi2varv8hi3; + gen = gen_avx512vl_vpermt2varv8hi3; break; case E_V16HImode: if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_vpermi2varv16hi3; + gen = gen_avx512vl_vpermt2varv16hi3; break; case E_V64QImode: if (TARGET_AVX512VBMI) - gen = gen_avx512bw_vpermi2varv64qi3; + gen = gen_avx512bw_vpermt2varv64qi3; break; case E_V32HImode: if (TARGET_AVX512BW) - gen = gen_avx512bw_vpermi2varv32hi3; + gen = gen_avx512bw_vpermt2varv32hi3; break; case E_V4SImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv4si3; + gen = gen_avx512vl_vpermt2varv4si3; break; case E_V8SImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv8si3; + gen = gen_avx512vl_vpermt2varv8si3; break; case E_V16SImode: if (TARGET_AVX512F) - gen = gen_avx512f_vpermi2varv16si3; + gen = gen_avx512f_vpermt2varv16si3; break; case E_V4SFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv4sf3; + gen = gen_avx512vl_vpermt2varv4sf3; maskmode = V4SImode; } break; case E_V8SFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv8sf3; + gen = gen_avx512vl_vpermt2varv8sf3; maskmode = V8SImode; } break; case E_V16SFmode: if (TARGET_AVX512F) { - gen = gen_avx512f_vpermi2varv16sf3; + gen = gen_avx512f_vpermt2varv16sf3; maskmode = V16SImode; } break; case E_V2DImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv2di3; + gen = gen_avx512vl_vpermt2varv2di3; break; case E_V4DImode: if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermi2varv4di3; + gen = gen_avx512vl_vpermt2varv4di3; break; case E_V8DImode: if (TARGET_AVX512F) - gen = gen_avx512f_vpermi2varv8di3; + gen = gen_avx512f_vpermt2varv8di3; break; case E_V2DFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv2df3; + gen = gen_avx512vl_vpermt2varv2df3; maskmode = V2DImode; } break; case E_V4DFmode: if (TARGET_AVX512VL) { - gen = gen_avx512vl_vpermi2varv4df3; + gen = gen_avx512vl_vpermt2varv4df3; maskmode = V4DImode; } break; case E_V8DFmode: if (TARGET_AVX512F) { - gen = gen_avx512f_vpermi2varv8df3; + gen = gen_avx512f_vpermt2varv8df3; maskmode = V8DImode; } break; @@ -26005,7 +26005,7 @@ ix86_expand_vec_perm_vpermi2 (rtx target if (gen == NULL) return false; - /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const expander, so args are either in d, or in op0, op1 etc. */ if (d) { @@ -26018,7 +26018,7 @@ ix86_expand_vec_perm_vpermi2 (rtx target mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); } - emit_insn (gen (target, op0, force_reg (maskmode, mask), op1)); + emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); return true; } @@ -26069,7 +26069,7 @@ ix86_expand_vec_perm (rtx operands[]) } } - if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL)) + if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) return; if (TARGET_AVX2) @@ -48009,8 +48009,8 @@ expand_vec_perm_1 (struct expand_vec_per if (ix86_expand_vec_one_operand_perm_avx512 (d)) return true; - /* Try the AVX512F vpermi2 instructions. */ - if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) + /* Try the AVX512F vpermt2/vpermi2 instructions. */ + if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) return true; /* See if we can get the same permutation in different vector integer @@ -49669,9 +49669,9 @@ expand_vec_perm_broadcast (struct expand } /* Implement arbitrary permutations of two V64QImode operands - will 2 vpermi2w, 2 vpshufb and one vpor instruction. */ + with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */ static bool -expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d) +expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d) { if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) return false; @@ -49916,7 +49916,7 @@ ix86_expand_vec_perm_const_1 (struct exp if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) return true; - if (expand_vec_perm_vpermi2_vpshub2 (d)) + if (expand_vec_perm_vpermt2_vpshub2 (d)) return true; /* ??? Look for narrow permutations whose element orderings would @@ -50064,17 +50064,17 @@ ix86_vectorize_vec_perm_const_ok (machin case E_V8DImode: case E_V8DFmode: if (TARGET_AVX512F) - /* All implementable with a single vpermi2 insn. */ + /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V32HImode: if (TARGET_AVX512BW) - /* All implementable with a single vpermi2 insn. */ + /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V64QImode: if (TARGET_AVX512BW) - /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */ + /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ return true; break; case E_V8SImode: @@ -50082,7 +50082,7 @@ ix86_vectorize_vec_perm_const_ok (machin case E_V4DFmode: case E_V4DImode: if (TARGET_AVX512VL) - /* All implementable with a single vpermi2 insn. */ + /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V16HImode: --- gcc/testsuite/gcc.target/i386/pr82460-1.c.jj 2017-10-09 18:16:48.143341649 +0200 +++ gcc/testsuite/gcc.target/i386/pr82460-1.c 2017-10-09 18:19:14.121537440 +0200 @@ -0,0 +1,30 @@ +/* PR target/82460 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vbmi" } */ +/* { dg-final { scan-assembler-not {\mvmovd} } } */ + +#include <x86intrin.h> + +__m512i +f1 (__m512i x, __m512i y, char *z) +{ + return _mm512_permutex2var_epi32 (y, x, _mm512_loadu_si512 (z)); +} + +__m512i +f2 (__m512i x, __m512i y, char *z) +{ + return _mm512_permutex2var_epi32 (x, y, _mm512_loadu_si512 (z)); +} + +__m512i +f3 (__m512i x, __m512i y, __m512i z) +{ + return _mm512_permutex2var_epi8 (y, x, z); +} + +__m512i +f4 (__m512i x, __m512i y, __m512i z) +{ + return _mm512_permutex2var_epi8 (x, y, z); +} --- gcc/testsuite/gcc.target/i386/pr82460-2.c.jj 2017-10-09 18:19:29.014353373 +0200 +++ gcc/testsuite/gcc.target/i386/pr82460-2.c 2017-10-09 18:22:56.806785174 +0200 @@ -0,0 +1,17 @@ +/* PR target/82460 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512vbmi -mno-prefer-avx256" } */ +/* We want to reuse the permutation mask in the loop, so use vpermt2b rather + than vpermi2b. */ +/* { dg-final { scan-assembler-not {\mvpermi2b\M} } } */ +/* { dg-final { scan-assembler {\mvpermt2b\M} } } */ + +void +foo (unsigned char *__restrict__ x, const unsigned short *__restrict__ y, + unsigned long z) +{ + unsigned char *w = x + z; + do + *x++ = *y++ >> 8; + while (x < w); +} --- gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c.jj 2016-05-22 12:20:19.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c 2017-10-09 18:35:28.482494049 +0200 @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c.jj 2016-05-22 12:20:26.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c 2017-10-09 18:49:48.444857047 +0200 @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c.jj 2016-05-22 12:20:26.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c 2017-10-09 18:35:46.693268798 +0200 @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c.jj 2015-10-11 19:11:13.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c 2017-10-09 18:46:06.067607664 +0200 @@ -25,4 +25,4 @@ avx512bw_test () abort (); } -/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[it]2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c.jj 2016-05-22 12:20:09.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c 2017-10-09 18:49:30.598077797 +0200 @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c.jj 2016-05-22 12:20:26.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c 2017-10-09 18:38:17.578402477 +0200 @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c.jj 2016-05-22 12:20:04.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c 2017-10-09 18:35:13.240682578 +0200 @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c.jj 2016-05-22 12:20:27.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c 2017-10-09 18:37:44.068816962 +0200 @@ -1,11 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c.jj 2016-05-22 12:20:18.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c 2017-10-09 18:38:52.161974708 +0200 @@ -1,14 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } * -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } * +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c.jj 2016-05-22 12:20:17.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vbmi-vpermt2b-1.c 2017-10-09 18:39:22.822595462 +0200 @@ -1,14 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vbmi -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } * -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } * +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ #include <immintrin.h> --- gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c.jj 2016-05-22 12:20:24.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c 2017-10-09 18:34:50.306966248 +0200 @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> Jakub