Hi, Current kunpck[hi|si|di] patterns emit operands in a wrong order. This is compensated by a wrong operands order in vec_pack_trunc_[qi|hi|si] expands and therefore we get correct code for vectorized loops. Code using kunpck* intrinsics would be wrong though. This patch fixes operands order and adds runtime tests for _mm512_kunpack* intrinsics.
Bootstrapped and regtested on x86_64-pc-linux-gnu. OK for trunk? Thanks, Ilya -- gcc/ 2016-04-13 Ilya Enkovich <ilya.enkov...@intel.com> * config/i386/i386.md (kunpckhi): Swap operands. (kunpcksi): Likewise. (kunpckdi): Likewise. * config/i386/sse.md (vec_pack_trunc_qi): Likewise. (vec_pack_trunc_<mode>): Likewise. gcc/testsuite/ 2016-04-13 Ilya Enkovich <ilya.enkov...@intel.com> * gcc.target/i386/avx512bw-kunpckdq-2.c: New test. * gcc.target/i386/avx512bw-kunpckwd-2.c: New test. * gcc.target/i386/avx512f-kunpckbw-2.c: New test. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 09da69e..56a3050 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8907,7 +8907,7 @@ (const_int 8)) (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))] "TARGET_AVX512F" - "kunpckbw\t{%1, %2, %0|%0, %2, %1}" + "kunpckbw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "HI") (set_attr "type" "msklog") (set_attr "prefix" "vex")]) @@ -8920,7 +8920,7 @@ (const_int 16)) (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))] "TARGET_AVX512BW" - "kunpckwd\t{%1, %2, %0|%0, %2, %1}" + "kunpckwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "SI")]) (define_insn "kunpckdi" @@ -8931,7 +8931,7 @@ (const_int 32)) (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))] "TARGET_AVX512BW" - "kunpckdq\t{%1, %2, %0|%0, %2, %1}" + "kunpckdq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "DI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5132955..b64457e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11747,16 +11747,16 @@ (define_expand "vec_pack_trunc_qi" [(set (match_operand:HI 0 ("register_operand")) - (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 ("register_operand"))) + (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand"))) (const_int 8)) - (zero_extend:HI (match_operand:QI 2 ("register_operand")))))] + (zero_extend:HI (match_operand:QI 1 ("register_operand")))))] "TARGET_AVX512F") (define_expand "vec_pack_trunc_<mode>" [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand")) - (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand"))) + (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand"))) (match_dup 3)) - (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))))] + (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))] "TARGET_AVX512BW" { operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-2.c new file mode 100644 index 0000000..4fe503e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-2.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +#define AVX512BW + +#include "avx512f-helper.h" + +static __mmask64 __attribute__((noinline,noclone)) +unpack (__mmask64 arg1, __mmask64 arg2) +{ + __mmask64 res; + + res = _mm512_kunpackd (arg1, arg2); + + return res; +} + +void +TEST (void) +{ + if (unpack (0x07UL, 0x70UL) != 0x0700000070UL) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-2.c new file mode 100644 index 0000000..5d7f895 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-2.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +#define AVX512BW + +#include "avx512f-helper.h" + +static __mmask32 __attribute__((noinline,noclone)) +unpack (__mmask32 arg1, __mmask32 arg2) +{ + __mmask32 res; + + res = _mm512_kunpackw (arg1, arg2); + + return res; +} + +void +TEST (void) +{ + if (unpack (0x07, 0x70) != 0x070070) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-2.c new file mode 100644 index 0000000..86580f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-2.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#define AVX512F + +#include "avx512f-helper.h" + +static __mmask16 __attribute__((noinline,noclone)) +unpack (__mmask16 arg1, __mmask16 arg2) +{ + __mmask16 res; + + res = _mm512_kunpackb (arg1, arg2); + + return res; +} + +void +TEST (void) +{ + if (unpack (0x07, 0x70) != 0x0770) + __builtin_abort (); +}