On 14 Aug 13:45, Uros Bizjak wrote: > Please update the above entry. Whoops. Updated ChangeLog: gcc/ * config/i386/i386.c (ix86_expand_special_args_builtin): Handle avx512vl_storev8sf_mask, avx512vl_storev8si_mask, avx512vl_storev4df_mask, avx512vl_storev4di_mask, avx512vl_storev4sf_mask, avx512vl_storev4si_mask, avx512vl_storev2df_mask, avx512vl_storev2di_mask, avx512vl_loadv8sf_mask, avx512vl_loadv8si_mask, avx512vl_loadv4df_mask, avx512vl_loadv4di_mask, avx512vl_loadv4sf_mask, avx512vl_loadv4si_mask, avx512vl_loadv2df_mask, avx512vl_loadv2di_mask, avx512bw_loadv64qi_mask, avx512vl_loadv32qi_mask, avx512vl_loadv16qi_mask, avx512bw_loadv32hi_mask, avx512vl_loadv16hi_mask, avx512vl_loadv8hi_mask. * config/i386/i386.md (define_mode_attr ssemodesuffix): Allow V32HI mode. * config/i386/sse.md (define_mode_iterator VMOVE): Allow V4TI mode. (define_mode_iterator V_AVX512VL): New. (define_mode_iterator V): New handling for AVX512VL. (define_insn "avx512f_load<mode>_mask"): Delete. (define_insn "<avx512>_load<mode>_mask"): New. (define_insn "avx512f_store<mode>_mask"): Delete. (define_insn "<avx512>_store<mode>_mask"): New.
> > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -146,10 +146,21 @@ > > (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI > > (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI > > (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI > > - (V2TI "TARGET_AVX") V1TI > > + (V4TI "TARGET_AVX") (V2TI "TARGET_AVX") V1TI > > Are you sure TARGET_AVX is the correct condition for V4TI? Right! This should be TARGET_AVX512BW (because corresponding shifts belong to AVX-512BW). > > +;; All AVX512VL vector modes > > +(define_mode_iterator V_AVX512VL > > + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") > > + (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") > > + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") > > Is the above && OK? So, you have to pass -m...vl and -m...bw to the > compiler to enable these modes? Yeah. This looks strange, but should be so. Simplest example: vpaddb (with regno > 15). This insn is enabled only when AVX512VL *and* AVX512BW bits are on. Updated patch in the bottom. Bootstrapped. Is it ok? -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 183b7be..da01ac6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -34722,6 +34722,14 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_storev16si_mask: case CODE_FOR_avx512f_storev8df_mask: case CODE_FOR_avx512f_storev8di_mask: + case CODE_FOR_avx512vl_storev8sf_mask: + case CODE_FOR_avx512vl_storev8si_mask: + case CODE_FOR_avx512vl_storev4df_mask: + case CODE_FOR_avx512vl_storev4di_mask: + case CODE_FOR_avx512vl_storev4sf_mask: + case CODE_FOR_avx512vl_storev4si_mask: + case CODE_FOR_avx512vl_storev2df_mask: + case CODE_FOR_avx512vl_storev2di_mask: aligned_mem = true; break; default: @@ -34765,6 +34773,20 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_loadv16si_mask: case CODE_FOR_avx512f_loadv8df_mask: case CODE_FOR_avx512f_loadv8di_mask: + case CODE_FOR_avx512vl_loadv8sf_mask: + case CODE_FOR_avx512vl_loadv8si_mask: + case CODE_FOR_avx512vl_loadv4df_mask: + case CODE_FOR_avx512vl_loadv4di_mask: + case CODE_FOR_avx512vl_loadv4sf_mask: + case CODE_FOR_avx512vl_loadv4si_mask: + case CODE_FOR_avx512vl_loadv2df_mask: + case CODE_FOR_avx512vl_loadv2di_mask: + case CODE_FOR_avx512bw_loadv64qi_mask: + case CODE_FOR_avx512vl_loadv32qi_mask: + case CODE_FOR_avx512vl_loadv16qi_mask: + case CODE_FOR_avx512bw_loadv32hi_mask: + case CODE_FOR_avx512vl_loadv16hi_mask: + case CODE_FOR_avx512vl_loadv8hi_mask: aligned_mem = true; break; default: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a72c206..b8ce6c0 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1054,7 +1054,7 @@ (V4SF "ps") (V2DF "pd") (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q") (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q") - (V64QI "b") (V16SI "d") (V8DI "q")]) + (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")]) ;; SSE vector suffix for floating point modes (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 89a1842..ea56bcb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -146,10 +146,21 @@ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V2TI "TARGET_AVX") V1TI + (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) +;; All AVX512VL vector modes +(define_mode_iterator V_AVX512VL + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") + (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + ;; All vector modes (define_mode_iterator V [(V32QI "TARGET_AVX") V16QI @@ -708,12 +719,10 @@ case 2: /* There is no evex-encoded vmov* for sizes smaller than 64-bytes in avx512f, so we need to use workarounds, to access sse registers - 16-31, which are evex-only. */ - if (TARGET_AVX512F && <MODE_SIZE> < 64 - && ((REG_P (operands[0]) - && EXT_REX_SSE_REGNO_P (REGNO (operands[0]))) - || (REG_P (operands[1]) - && EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))) + 16-31, which are evex-only. In avx512vl we don't need workarounds. */ + if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL + && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0]))) + || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))) { if (memory_operand (operands[0], <MODE>mode)) { @@ -777,9 +786,11 @@ if (TARGET_AVX && (misaligned_operand (operands[0], <MODE>mode) || misaligned_operand (operands[1], <MODE>mode))) - return "vmovdqu\t{%1, %0|%0, %1}"; + return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}" + : "vmovdqu\t{%1, %0|%0, %1}"; else - return "%vmovdqa\t{%1, %0|%0, %1}"; + return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" + : "%vmovdqa\t{%1, %0|%0, %1}"; case MODE_XI: if (misaligned_operand (operands[0], <MODE>mode) || misaligned_operand (operands[1], <MODE>mode)) @@ -813,25 +824,37 @@ ] (const_string "<sseinsnmode>")))]) -(define_insn "avx512f_load<mode>_mask" - [(set (match_operand:VI48F_512 0 "register_operand" "=v,v") - (vec_merge:VI48F_512 - (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m") - (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C") +(define_insn "<avx512>_load<mode>_mask" + [(set (match_operand:V_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:V_AVX512VL + (match_operand:V_AVX512VL 1 "nonimmediate_operand" "v,m") + (match_operand:V_AVX512VL 2 "vector_move_operand" "0C,0C") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] "TARGET_AVX512F" { switch (MODE_<sseinsnmode>) { case MODE_V8DF: + case MODE_V4DF: + case MODE_V2DF: case MODE_V16SF: + case MODE_V8SF: + case MODE_V4SF: if (misaligned_operand (operands[1], <MODE>mode)) return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; default: - if (misaligned_operand (operands[1], <MODE>mode)) + /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */ + if (<MODE>mode == V64QImode + || <MODE>mode == V32QImode + || <MODE>mode == V16QImode + || <MODE>mode == V32HImode + || <MODE>mode == V16HImode + || <MODE>mode == V8HImode + || misaligned_operand (operands[1], <MODE>mode)) return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; - return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; + else + return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; } } [(set_attr "type" "ssemov") @@ -851,10 +874,10 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx512f_store<mode>_mask" - [(set (match_operand:VI48F_512 0 "memory_operand" "=m") - (vec_merge:VI48F_512 - (match_operand:VI48F_512 1 "register_operand" "v") +(define_insn "<avx512>_store<mode>_mask" + [(set (match_operand:V_AVX512VL 0 "memory_operand" "=m") + (vec_merge:V_AVX512VL + (match_operand:V_AVX512VL 1 "register_operand" "v") (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] "TARGET_AVX512F" @@ -862,10 +885,23 @@ switch (MODE_<sseinsnmode>) { case MODE_V8DF: + case MODE_V4DF: + case MODE_V2DF: case MODE_V16SF: + case MODE_V8SF: + case MODE_V4SF: return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; default: - return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */ + if (<MODE>mode == V64QImode + || <MODE>mode == V32QImode + || <MODE>mode == V16QImode + || <MODE>mode == V32HImode + || <MODE>mode == V16HImode + || <MODE>mode == V8HImode) + return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + else + return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; } } [(set_attr "type" "ssemov")