Hello, The attached patch adds support for the fmasf4 pattern on SH. Tested against rev 188280 with
make info dvi pdf make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb, -m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}" and no new failures, except for the recently added PR 51340 test cases. I'd like to remove those in another patch because they don't make much sense anymore after this patch. Cheers, Oleg ChangeLog: PR target/53511 * config/sh/sh.md (fmasf4): New expander. (*macsf3): Rename to fmasf4_i. Adapt to fma pattern. (mac_media): Rename to fmasf4_media. Adapt to fma pattern. * config/sh/sh.opt (mfused-madd): Remove. * config/sh/sh.c (sh_option_override): Remove mfused-madd handling. (builtin_description bdesc): Remove __builtin_sh_media_FMAC_S. * config.gcc (sh[123456789lbe]*-*-* | sh-*-*): Add fused-madd.opt as extra options. * doc/invoke.texi (SH Options): Update mfused-madd and mno-fused-madd descriptions. testsuite/ChangeLog: PR target/53511 * gcc.target/sh/pr53511-1.c: New.
Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 188280) +++ gcc/config/sh/sh.md (working copy) @@ -10329,6 +10329,9 @@ "fmul.s %1, %2, %0" [(set_attr "type" "fparith_media")]) +;; FIXME: These fmac combine pass assisting specifics are obsolete since +;; we now use the FMA patterns, which do not depend on the combine +;; pass anymore. ;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR ;; register in feeding fp instructions. Thus, in order to generate fmac, ;; we start out with a mulsf pattern that does not depend on fpscr. @@ -10359,26 +10362,42 @@ [(set_attr "type" "fp") (set_attr "fp_mode" "single")]) -(define_insn "mac_media" - [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") - (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") - (match_operand:SF 2 "fp_arith_reg_operand" "f")) - (match_operand:SF 3 "fp_arith_reg_operand" "0")))] - "TARGET_SHMEDIA_FPU && TARGET_FMAC" - "fmac.s %1, %2, %0" - [(set_attr "type" "fparith_media")]) +;; FMA (fused multiply-add) patterns +(define_expand "fmasf4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "") + (match_operand:SF 2 "fp_arith_reg_operand" "") + (match_operand:SF 3 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + emit_sf_insn (gen_fmasf4_i (operands[0], operands[1], operands[2], + operands[3], get_fpscr_rtx ())); + DONE; + } +}) -(define_insn "*macsf3" +(define_insn "fmasf4_i" [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") - (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w") - (match_operand:SF 2 "fp_arith_reg_operand" "f")) - (match_operand:SF 3 "arith_reg_operand" "0"))) + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "w") + (match_operand:SF 2 "fp_arith_reg_operand" "f") + (match_operand:SF 3 "fp_arith_reg_operand" "0"))) (use (match_operand:PSI 4 "fpscr_operand" "c"))] - "TARGET_SH2E && TARGET_FMAC" - "fmac fr0,%2,%0" + "TARGET_SH2E" + "fmac %1,%2,%0" [(set_attr "type" "fp") (set_attr "fp_mode" "single")]) +(define_insn "fmasf4_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f") + (match_operand:SF 3 "fp_arith_reg_operand" "0")))] + "TARGET_SHMEDIA_FPU" + "fmac.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + (define_expand "divsf3" [(set (match_operand:SF 0 "arith_reg_operand" "") (div:SF (match_operand:SF 1 "arith_reg_operand" "") Index: gcc/config/sh/sh.opt =================================================================== --- gcc/config/sh/sh.opt (revision 188280) +++ gcc/config/sh/sh.opt (working copy) @@ -257,10 +257,6 @@ Target RejectNegative Joined Var(sh_fixed_range_str) Specify range of registers to make fixed -mfused-madd -Target Var(TARGET_FMAC) -Enable the use of the fused floating point multiply-accumulate operation - mgettrcost= Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1) Cost to assume for gettr insn Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 188280) +++ gcc/config/sh/sh.c (working copy) @@ -878,13 +878,6 @@ if (flag_unsafe_math_optimizations) { - /* Enable fmac insn for "a * b + c" SFmode calculations when -ffast-math - is enabled and -mno-fused-madd is not specified by the user. - The fmac insn can't be enabled by default due to the implied - FMA semantics. See also PR target/29100. */ - if (global_options_set.x_TARGET_FMAC == 0) - TARGET_FMAC = 1; - /* Enable fsca insn for SH4A if not otherwise specified by the user. */ if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP) TARGET_FSCA = 1; @@ -11231,7 +11224,6 @@ { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 }, { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 }, { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 }, - { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 }, { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 }, { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 }, { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 }, Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 188280) +++ gcc/doc/invoke.texi (working copy) @@ -18307,14 +18307,12 @@ @itemx -mno-fused-madd @opindex mfused-madd @opindex mno-fused-madd -If the processor type supports it, setting @code{-mfused-madd} will allow the -usage of the @code{fmac} instruction (floating-point multiply-accumulate) for -regular calculations. Enabling this option might generate faster code but also -produce different numeric floating-point results compared to strict IEEE 754 -arithmetic. @code{-mfused-madd} is enabled by default by option -@option{-funsafe-math-optimizations}. Setting @code{-mno-fused-madd} will -disallow the usage of the @code{fmac} instruction for regular calculations -even if @option{-funsafe-math-optimizations} is in effect. +Generate code that uses (does not use) the floating-point multiply and +accumulate instructions. These instructions are generated by default +if hardware floating point is used. The machine-dependent +@option{-mfused-madd} option is now mapped to the machine-independent +@option{-ffp-contract=fast} option, and @option{-mno-fused-madd} is +mapped to @option{-ffp-contract=off}. @item -mfsca @itemx -mno-fsca Index: gcc/testsuite/gcc.target/sh/pr53511-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr53511-1.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr53511-1.c (revision 0) @@ -0,0 +1,14 @@ +/* Verify that the fmac insn is used for the standard fmaf function. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler "fmac" } } */ + +#include <math.h> + +float +test_func_00 (float a, float b, float c) +{ + return fmaf (a, b, c); +} + Index: gcc/config.gcc =================================================================== --- gcc/config.gcc (revision 188280) +++ gcc/config.gcc (working copy) @@ -449,6 +449,7 @@ sh[123456789lbe]*-*-* | sh-*-*) cpu_type=sh need_64bit_hwint=yes + extra_options="${extra_options} fused-madd.opt" ;; v850*-*-*) cpu_type=v850