Hello, > This patch is still far too large. > > I think you should split it up based on every single mode iterator that > you need to add or change.
Here's 11th subpatch. It introduces AVX-512 FMA instructions. Is it Ok? Testing: 1. Bootstrap pass. 2. make check shows no regressions. 3. Spec 2000 & 2006 build show no regressions both with and without -mavx512f option. 4. Spec 2000 & 2006 run shows no stability regressions without -mavx512f option. -- Thanks, K PS. If it is Ok - I am going to strip out ChangeLog lines from big patch. --- gcc/config/i386/i386.c | 2 +- gcc/config/i386/sse.md | 60 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f10113f..5908383 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -34785,7 +34785,7 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, rtx sub; gcc_assert (FLOAT_MODE_P (mode)); - gcc_assert (TARGET_FMA || TARGET_FMA4); + gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); /* ??? SSE scalar/vector cost should be used here. */ /* ??? Bald assumption that fma has the same cost as fmul. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a380690..6adcdd3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2254,9 +2254,18 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The standard names for scalar FMA are only available with SSE math enabled. -(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH") - (DF "TARGET_SSE_MATH") - V4SF V2DF V8SF V4DF]) +;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't +;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA +;; and TARGET_FMA4 are both false. +(define_mode_iterator FMAMODEM + [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") + (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4") + (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "fma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2264,7 +2273,7 @@ (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4") + "") (define_expand "fms<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2272,7 +2281,7 @@ (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "TARGET_FMA || TARGET_FMA4") + "") (define_expand "fnma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2280,7 +2289,7 @@ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4") + "") (define_expand "fnms<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2288,10 +2297,17 @@ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "TARGET_FMA || TARGET_FMA4") + "") ;; The builtins for intrinsics are not constrained by SSE math enabled. -(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) +(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4") + (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "fma4i_fmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand") @@ -2299,7 +2315,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4") + "") (define_insn "*fma_fmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") @@ -2307,7 +2323,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} @@ -2318,14 +2334,14 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fma_fmsub_<mode>" +(define_insn "fma_fmsub_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} @@ -2336,14 +2352,14 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fma_fnmadd_<mode>" +(define_insn "fma_fnmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")) (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} @@ -2362,7 +2378,7 @@ (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} @@ -2391,7 +2407,7 @@ (match_operand:VF 2 "nonimmediate_operand") (match_operand:VF 3 "nonimmediate_operand")] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4") + "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") (define_insn "*fma_fmaddsub_<mode>" [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") @@ -2400,7 +2416,7 @@ (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m") (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4" + "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)" "@ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} @@ -2419,7 +2435,7 @@ (neg:VF (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4" + "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)" "@ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} @@ -2453,7 +2469,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" @@ -2470,7 +2486,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" @@ -2487,7 +2503,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" @@ -2505,7 +2521,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" -- 1.7.11.7