Hi! This patch fixes various *andnot<mode>3* issues. There are two issues on the ISA side that makes stuff harder for andnot: there are no VPANDNB and VPANDNW instructions, and while there used to be just VPANDN instruction in AVX/AVX2, there is only VPANDND and VPANDNQ in EVEX. The patch changes: 1) simplifies asserts, TARGET_AVX512VL implies both TARGET_AVX2 and TARGET_SSE2, so asserts like TARGET_AVX2 || TARGET_AVX512VL make no sense 2) for V32HImode/V64QImode it emits vpandnq instruction, rather than vpandn that fails to assemble 3) the *andnot<mode>3 pattern clearly wasn't expecting subst, but as it used (copy-paste?) <mask_operand3_1> in the template, it actually was substed, which is wrong - we can't implement V64QImode or V32HImode masking of andnot (well, not in a single instruction); checked this was the only case of <mask_oeprand3_1> used in define_insn without <mask_name>; for V*[SD]Imode *andnot<mode>3_mask pattern should DTRT 4) the *andnot<mode>3_mask pattern makes no sense, for similar reasons - VPANDNB and VPANDNW are not in the ISA, not even with AVX512-BW 5) formatting fixes
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-04-04 Jakub Jelinek <ja...@redhat.com> PR target/70525 * config/i386/sse.md (*andnot<mode>3): Simplify assertions. Use vpandn<ssemodesuffix> for V16SI/V8DImode, vpandnq for V32HI/V64QImode, don't use <mask_operand3_1>, fix up formatting. (*andnot<mode>3_mask): Remove insn with VI12_AVX512VL iterator. * gcc.target/i386/pr70525.c: New test. --- gcc/config/i386/sse.md.jj 2016-04-01 17:21:31.000000000 +0200 +++ gcc/config/i386/sse.md 2016-04-04 14:42:06.296867515 +0200 @@ -11377,45 +11377,46 @@ (define_insn "*andnot<mode>3" case MODE_XI: gcc_assert (TARGET_AVX512F); case MODE_OI: - gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); + gcc_assert (TARGET_AVX2); case MODE_TI: - gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); + gcc_assert (TARGET_SSE2); switch (<MODE>mode) - { - case V16SImode: - case V8DImode: - if (TARGET_AVX512F) - { - tmp = "pandn<ssemodesuffix>"; - break; - } - case V8SImode: - case V4DImode: - case V4SImode: - case V2DImode: - if (TARGET_AVX512VL) - { - tmp = "pandn<ssemodesuffix>"; - break; - } - default: - tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; - } + { + case V64QImode: + case V32HImode: + /* There is no vpandnb or vpandnw instruction, nor vpandn for + 512-bit vectors. Use vpandnq instead. */ + tmp = "pandnq"; + break; + case V16SImode: + case V8DImode: + tmp = "pandn<ssemodesuffix>"; + break; + case V8SImode: + case V4DImode: + case V4SImode: + case V2DImode: + tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn"; + break; + default: + tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; + break; + } break; - case MODE_V16SF: + case MODE_V16SF: gcc_assert (TARGET_AVX512F); - case MODE_V8SF: + case MODE_V8SF: gcc_assert (TARGET_AVX); - case MODE_V4SF: + case MODE_V4SF: gcc_assert (TARGET_SSE); tmp = "andnps"; break; - default: + default: gcc_unreachable (); - } + } switch (which_alternative) { @@ -11423,7 +11424,7 @@ (define_insn "*andnot<mode>3" ops = "%s\t{%%2, %%0|%%0, %%2}"; break; case 1: - ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; default: gcc_unreachable (); @@ -11471,21 +11472,6 @@ (define_insn "*andnot<mode>3_mask" "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "*andnot<mode>3_mask" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI12_AVX512VL - (and:VI12_AVX512VL - (not:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "register_operand" "v")) - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) - (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "<code><mode>3" --- gcc/testsuite/gcc.target/i386/pr70525.c.jj 2016-04-04 15:13:23.417615588 +0200 +++ gcc/testsuite/gcc.target/i386/pr70525.c 2016-04-04 15:13:04.000000000 +0200 @@ -0,0 +1,32 @@ +/* PR target/70525 */ +/* { dg-do assemble { target avx512bw } } */ +/* { dg-options "-O2 -mavx512bw -mno-avx512vl" } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__ ((vector_size (64))); + +v64qi +f1 (v64qi x, v64qi y) +{ + return x & ~y; +} + +v32hi +f2 (v32hi x, v32hi y) +{ + return x & ~y; +} + +v16si +f3 (v16si x, v16si y) +{ + return x & ~y; +} + +v8di +f4 (v8di x, v8di y) +{ + return x & ~y; +} Jakub