Hi, On Tue, Sep 08, 2015 at 11:41:50AM +0300, Kirill Yukhin wrote: > Hi, > On 07 Sep 19:07, Alexander Fomin wrote: > > + tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>"; > Suppose masking is applied and 1st alternative chosen... > > + ops = "%s\t{%%2, %%0|%%0, %%2}"; > We'll reach here having p<logic> %xmm17, %xmm18 w/o even mention of mask > register. > I think we need to check here if masking is needed and emit EVEX version (3 > args + mask). > > tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>"; > Despite of alternative chosen, you force insn to be p<logic>q (when compiled > w/ -mavx512vl). > > ops = "%s\t{%%2, %%0|%%0, %%2}"; > So, here you'll emit, e.g. "pandq %xmm16, %xmm17" > If think it'll be better to attach AVX-512VL related suffix while > discriminating > alternatives. > > -- > Thanks, K I've fixed the problems related to masks/alternatives for both patterns. Is updated version OK for trunk?
Regards, Alexander --- gcc/ PR target/67480 * config/i386/sse.md (define_mode_iterator VI48_AVX_AVX512F): New. (define_mode_iterator VI12_AVX_AVX512F): New. (define_insn "<mask_codefor><code><mode>3<mask_name>"): Change all iterators to VI48_AVX_AVX512F. Extract remaining modes ... (define_insn "*<code><mode>3"): ... Into new pattern using VI12_AVX_AVX512F iterators without masking. gcc/testsuite PR target/67480 * gcc.target/i386/pr67480.c: New test. --- gcc/config/i386/sse.md | 129 +++++++++++++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pr67480.c | 10 +++ 2 files changed, 127 insertions(+), 12 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4535570..0a57db0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -416,6 +416,14 @@ [(V16SI "TARGET_AVX512F") V8SI V4SI (V8DI "TARGET_AVX512F") V4DI V2DI]) +(define_mode_iterator VI48_AVX_AVX512F + [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) + +(define_mode_iterator VI12_AVX_AVX512F + [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI]) + (define_mode_iterator V48_AVX2 [V4SF V2DF V8SF V4DF @@ -11077,10 +11085,10 @@ }) (define_insn "<mask_codefor><code><mode>3<mask_name>" - [(set (match_operand:VI 0 "register_operand" "=x,v") - (any_logic:VI - (match_operand:VI 1 "nonimmediate_operand" "%0,v") - (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] + [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v") + (any_logic:VI48_AVX_AVX512F + (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v") + (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && <mask_mode512bit_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" { @@ -11109,24 +11117,120 @@ case V4DImode: case V4SImode: case V2DImode: - if (TARGET_AVX512VL) + tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>"; + break; + default: + gcc_unreachable (); + } + break; + + case MODE_V8SF: + gcc_assert (TARGET_AVX); + case MODE_V4SF: + gcc_assert (TARGET_SSE); + gcc_assert (!<mask_applied>); + tmp = "<logic>ps"; + break; + + default: + gcc_unreachable (); + } + + switch (which_alternative) + { + case 0: + if (<mask_applied>) + ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}"; + else + ops = "%s\t{%%2, %%0|%%0, %%2}"; + break; + case 1: + ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + break; + default: + gcc_unreachable (); + } + + snprintf (buf, sizeof (buf), ops, tmp); + return buf; +} + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") + (set (attr "prefix_data16") + (if_then_else + (and (eq_attr "alternative" "0") + (eq_attr "mode" "TI")) + (const_string "1") + (const_string "*"))) + (set_attr "prefix" "<mask_prefix3>") + (set (attr "mode") + (cond [(and (match_test "<MODE_SIZE> == 16") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (const_string "<ssePSmode>") + (match_test "TARGET_AVX2") + (const_string "<sseinsnmode>") + (match_test "TARGET_AVX") + (if_then_else + (match_test "<MODE_SIZE> > 16") + (const_string "V8SF") + (const_string "<sseinsnmode>")) + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "<sseinsnmode>")))]) + +(define_insn "*<code><mode>3" + [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v") + (any_logic: VI12_AVX_AVX512F + (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v") + (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))] + "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" +{ + static char buf[64]; + const char *ops; + const char *tmp; + const char *ssesuffix; + + switch (get_attr_mode (insn)) + { + case MODE_XI: + gcc_assert (TARGET_AVX512F); + case MODE_OI: + gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); + case MODE_TI: + gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); + switch (<MODE>mode) + { + case V64QImode: + case V32HImode: + if (TARGET_AVX512F) { - tmp = "p<logic><ssemodesuffix>"; + tmp = "p<logic>"; + ssesuffix = "q"; + break; + } + case V32QImode: + case V16HImode: + case V16QImode: + case V8HImode: + if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2) + { + tmp = "p<logic>"; + ssesuffix = TARGET_AVX512VL ? "q" : ""; break; } default: - tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>"; + gcc_unreachable (); } break; - case MODE_V16SF: - gcc_assert (TARGET_AVX512F); case MODE_V8SF: gcc_assert (TARGET_AVX); case MODE_V4SF: gcc_assert (TARGET_SSE); - tmp = "<logic>ps"; + ssesuffix = ""; break; default: @@ -11137,15 +11241,16 @@ { case 0: ops = "%s\t{%%2, %%0|%%0, %%2}"; + snprintf (buf, sizeof (buf), ops, tmp); break; case 1: - ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + snprintf (buf, sizeof (buf), ops, tmp, ssesuffix); break; default: gcc_unreachable (); } - snprintf (buf, sizeof (buf), ops, tmp); return buf; } [(set_attr "isa" "noavx,avx") diff --git a/gcc/testsuite/gcc.target/i386/pr67480.c b/gcc/testsuite/gcc.target/i386/pr67480.c new file mode 100644 index 0000000..90e6a6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67480.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2 -ftree-vectorize" { target i?86-*-* x86_64-*-* } } */ + +void +foo(const char *in, char *out, unsigned n) +{ + unsigned i; + for (i = 0; i < n; i++) + out[i] &= in[i]; +} -- 1.8.3.1