Hello, Patch in the bottom fixes memory operand modifiers for Intel syntax on broadcast patter.
Bootstrapped and regtested on 32,64b x86 target. I'll check it into main trunk and gcc-5 branch. gcc/ PR target/70662 * config/i386/sse.md: Use proper memory operand modifiers. gcc/testsuite/ PR target/70662 * gcc.target/i386/pr70662.c: New test. -- Thanks, K commit 4923dda50a901bf38d386818fcc5347e3882cd99 Author: Kirill Yukhin <kirill.yuk...@intel.com> Date: Fri Apr 15 09:37:48 2016 +0300 AVX-512. Fix PR target/70662 - use proper operand modifiers. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b64457e..4d2927e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17262,9 +17262,12 @@ /* There is no DF broadcast (in AVX-512*) to 128b register. Mimic it with integer variant. */ if (<MODE>mode == V2DFmode) - return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; + return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"; + + if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 32) + return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"; else - return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; + return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "evex") diff --git a/gcc/testsuite/gcc.target/i386/pr70662.c b/gcc/testsuite/gcc.target/i386/pr70662.c new file mode 100755 index 0000000..546211d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr70662.c @@ -0,0 +1,18 @@ +/* { dg-do assemble { target { ! ia32 } } } */ +/* { dg-require-effective-target avx512vbmi } */ +/* { dg-options "-Og -fschedule-insns -fno-tree-fre -mavx512vbmi --param=max-sched-ready-insns=1 -masm=intel" } */ + +typedef char v64u8 __attribute__((vector_size(64))); +typedef int v64u32 __attribute__((vector_size(64))); +typedef long v64u64 __attribute__((vector_size(64))); +typedef __int128 v64u128 __attribute__((vector_size(64))); + +v64u128 +foo(int u8_0, unsigned u128_0, v64u32 v64u32_1, v64u32 v64u32_0, v64u64 v64u64_0, v64u128 v64u128_0) +{ + v64u8 v64u8_0 = v64u8_0; + v64u32_0 = v64u32_0 >> (v64u32){0, 0, 0, 1, 0, ((v64u64)v64u64_0)[u8_0], ((v64u32)v64u128_0)[15], 0, 0, 0, 0, 4, ((v64u64)v64u64_0)[v64u32_0[0]] - 1}; + v64u8_0 = v64u8_0 << ((v64u8)v64u32_1 & 1); + v64u64_0[0] >>= 0; + return u128_0 + (v64u128)v64u8_0 + (v64u128)v64u32_0 + (v64u128)v64u64_0; +}