r12-6103-g1a7ce8570997eb combines vpcmpuw + zero_extend to vpcmpuw with the pre_reload splitter, but the splitter transforms the zero_extend into a subreg which make reload think the upper part is garbage, it's not correct.
The patch adjusts the zero_extend define_insn_and_split to define_insn to keep zero_extend. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: PR target/117159 * config/i386/sse.md (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Change from define_insn_and_split to define_insn. (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Ditto. (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Ditto. (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Ditto. (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Split to the zero_extend pattern. (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr117159.c: New test. * gcc.target/i386/avx512bw-pr103750-1.c: Remove xfail. * gcc.target/i386/avx512bw-pr103750-2.c: Remove xfail. --- gcc/config/i386/sse.md | 186 +++++++----------- .../gcc.target/i386/avx512bw-pr103750-1.c | 3 +- .../gcc.target/i386/avx512bw-pr103750-2.c | 3 +- gcc/testsuite/gcc.target/i386/pr117159.c | 42 ++++ 4 files changed, 113 insertions(+), 121 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr117159.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a45b50ad732..06c2c9d7a5e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4298,32 +4298,19 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>" ;; Since vpcmpd implicitly clear the upper bits of dest, transform ;; vpcmpd + zero_extend to vpcmpd since the instruction -(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec:<V48H_AVX512VL:avx512fmaskmode> - [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") - (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) - && ix86_pre_reload_split () && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode) < GET_MODE_PRECISION (<SWI248x:MODE>mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec:<V48H_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP))] -{ - operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode, - operands[0], <SWI248x:MODE>mode); -} + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -4351,21 +4338,19 @@ (define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:m "#" "&& 1" [(set (match_dup 0) - (unspec:<V48H_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP)) - (set (match_dup 4) (match_dup 0))] + (zero_extend:SWI248x + (unspec:<V48H_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))) + (set (match_dup 4) (match_dup 5))] { - operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode, + operands[5] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode, operands[0], <SWI248x:MODE>mode); -} - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") - (set_attr "prefix" "evex") - (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")]) + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); +}) (define_insn_and_split "*<avx512>_cmp<mode>3" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") @@ -4400,31 +4385,18 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec:<VI12_AVX512VL:avx512fmaskmode> - [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_PCMP)))] "TARGET_AVX512BW - && ix86_pre_reload_split () - && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) - < GET_MODE_PRECISION (<SWI248x:MODE>mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec:<VI12_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP))] -{ - operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, - operands[0], <SWI248x:MODE>mode); -} + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode))" + "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -4451,16 +4423,18 @@ (define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:m "#" "&& 1" [(set (match_dup 0) - (unspec:<VI12_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP)) - (set (match_dup 4) (match_dup 0))] + (zero_extend:SWI248x + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))) + (set (match_dup 4) (match_dup 5))] { - operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, + operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, operands[0], <SWI248x:MODE>mode); + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); } [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -4518,31 +4492,18 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec:<VI12_AVX512VL:avx512fmaskmode> - [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_UNSIGNED_PCMP)))] "TARGET_AVX512BW - && ix86_pre_reload_split () && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < GET_MODE_PRECISION (<SWI248x:MODE>mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec:<VI12_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP))] -{ - operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, - operands[0], <SWI248x:MODE>mode); -} + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -4570,16 +4531,18 @@ (define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x: "#" "&& 1" [(set (match_dup 0) - (unspec:<VI12_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP)) - (set (match_dup 4) (match_dup 0))] -{ - operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, + (zero_extend:SWI248x + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))) + (set (match_dup 4) (match_dup 5))] +{ + operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, operands[0], <SWI248x:MODE>mode); + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); } [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -4615,32 +4578,19 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec:<VI48_AVX512VL:avx512fmaskmode> - [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_UNSIGNED_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) - && ix86_pre_reload_split () && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < GET_MODE_PRECISION (<SWI248x:MODE>mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec:<VI48_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP))] -{ - operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode, - operands[0], <SWI248x:MODE>mode); -} + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -4668,16 +4618,18 @@ (define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x: "#" "&& 1" [(set (match_dup 0) - (unspec:<VI48_AVX512VL:avx512fmaskmode> - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP)) - (set (match_dup 4) (match_dup 0))] -{ - operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]); - operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode, + (zero_extend:SWI248x + (unspec:<VI48_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))) + (set (match_dup 4) (match_dup 5))] +{ + operands[5] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode, operands[0], <SWI248x:MODE>mode); + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); } [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c index b1165f069bb..e7d6183232b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c @@ -1,8 +1,7 @@ /* PR target/103750 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512bw -mavx512vl" } */ -/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ -/* xfail need to be fixed. */ +/* { dg-final { scan-assembler-not "kmov" } } */ #include <immintrin.h> extern __m128i* pi128; diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c index 7303f5403ba..3392e193222 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c @@ -1,8 +1,7 @@ /* PR target/103750 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ -/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ -/* xfail need to be fixed. */ +/* { dg-final { scan-assembler-not "kmov" } } */ #include <immintrin.h> extern __m128i* pi128; diff --git a/gcc/testsuite/gcc.target/i386/pr117159.c b/gcc/testsuite/gcc.target/i386/pr117159.c new file mode 100644 index 00000000000..b67d682ecef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117159.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-options "-Os -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +typedef __attribute__((__vector_size__ (4))) unsigned char W; +typedef __attribute__((__vector_size__ (64))) int V; +typedef __attribute__((__vector_size__ (64))) long long Vq; + +W w; +V v; +Vq vq; + +static inline W +foo (short m) +{ + unsigned k = __builtin_ia32_pcmpgtq512_mask ((Vq) { }, vq, m); + W r = (W) k + w; + return r; +} + +static inline W +foo1 (short m) +{ + unsigned k = __builtin_ia32_pcmpgtd512_mask ((V) {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, v, m); + W r = (W) k + w; + return r; +} + +int +main () +{ + if (!__builtin_cpu_supports ("avx512bw")) + return 0; + W y = foo1 (65535); + if (!y[0] || !y[1] || y[2] || y[3]) + __builtin_abort(); + W x = foo (65535); + if (x[0] || x[1] || x[2] || x[3]) + __builtin_abort(); + + return 0; +} -- 2.31.1