On Fri, 2012-06-01 at 22:15 +0900, Kaz Kojima wrote: > I see a new failure > > FAIL: gcc.target/sh/pr53512-1.c scan-assembler fsca > > on sh4-unknown-linux-gnu with the patch. Looks that the test > fails due to TARGET_HAS_SINCOS which is defined on linux targets. > It seems that we need a sincossf3 expander for this test on > TARGET_HAS_SINCOS targets. >
Thanks for catching this! The attached patch should fix the issue, by folding the existing sinsf2 and cossf2 expanders into a single sincossf3. I ran the test cases again with TARGET_HAS_SINCOS set to '1' and to '0' and they pass. Cheers, Oleg ChangeLog: PR target/53512 * sh.opt (mfsca, mfsrra): New options. * sh.md (rsqrtsf2): Use TARGET_FPU_ANY and TARGET_FSRRA condition. (fsca): Use TARGET_FPU_ANY and TARGET_FSCA condition. (sinssf2, cossf2): Fold expanders to... (sincossf3): ...this new expander. Use TARGET_FPU_ANY and TARGET_FSCA condition. * sh.c (sh_option_override): Handle TARGET_FSRRA and TARGET_FSCA. * doc/invoke.texi (SH Options): Add descriptions for -mfsca, -mno-fsca, -mfsrra, -mno-fsrra. testsuite/ChangeLog: PR target/53512 * gcc.target/sh/pr53512-1.c: New. * gcc.target/sh/pr53512-2.c: New. * gcc.target/sh/pr53512-3.c: New. * gcc.target/sh/pr53512-4.c: New.
Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 188026) +++ gcc/config/sh/sh.c (working copy) @@ -876,13 +876,30 @@ align_functions = min_align; } - /* Enable fmac insn for "a * b + c" SFmode calculations when -ffast-math - is enabled and -mno-fused-madd is not specified by the user. - The fmac insn can't be enabled by default due to the implied - FMA semantics. See also PR target/29100. */ - if (global_options_set.x_TARGET_FMAC == 0 && flag_unsafe_math_optimizations) - TARGET_FMAC = 1; + if (flag_unsafe_math_optimizations) + { + /* Enable fmac insn for "a * b + c" SFmode calculations when -ffast-math + is enabled and -mno-fused-madd is not specified by the user. + The fmac insn can't be enabled by default due to the implied + FMA semantics. See also PR target/29100. */ + if (global_options_set.x_TARGET_FMAC == 0) + TARGET_FMAC = 1; + /* Enable fsca insn for SH4A if not otherwise specified by the user. */ + if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP) + TARGET_FSCA = 1; + + /* Enable fsrra insn for SH4A if not otherwise specified by the user. */ + if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP) + TARGET_FSRRA = 1; + } + + /* Allow fsrra insn only if -funsafe-math-optimizations and + -ffinite-math-only is enabled. */ + TARGET_FSRRA = TARGET_FSRRA + && flag_unsafe_math_optimizations + && flag_finite_math_only; + if (sh_fixed_range_str) sh_fix_range (sh_fixed_range_str); @@ -896,7 +913,6 @@ error ("-msoft-atomic and -mhard-atomic cannot be used at the same time"); if (TARGET_HARD_ATOMIC && ! TARGET_SH4A_ARCH) error ("-mhard-atomic is only available for SH4A targets"); - } /* Print the operand address in x to the stream. */ Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 188026) +++ gcc/config/sh/sh.md (working copy) @@ -10689,7 +10689,7 @@ (div:SF (match_operand:SF 1 "immediate_operand" "i") (sqrt:SF (match_operand:SF 2 "register_operand" "0")))) (use (match_operand:PSI 3 "fpscr_operand" "c"))] - "TARGET_SH4A_FP && flag_unsafe_math_optimizations + "TARGET_FPU_ANY && TARGET_FSRRA && operands[1] == CONST1_RTX (SFmode)" "fsrra %0" [(set_attr "type" "fsrra") @@ -10705,50 +10705,38 @@ (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2)) ] UNSPEC_FCOSA))) (use (match_operand:PSI 3 "fpscr_operand" "c"))] - "TARGET_SH4A_FP && flag_unsafe_math_optimizations + "TARGET_FPU_ANY && TARGET_FSCA && operands[2] == sh_fsca_int2sf ()" "fsca fpul,%d0" [(set_attr "type" "fsca") (set_attr "fp_mode" "single")]) -(define_expand "sinsf2" +;; When the sincos pattern is defined, the builtin functions sin and cos +;; will be expanded to the sincos pattern and one of the output values will +;; remain unused. +(define_expand "sincossf3" [(set (match_operand:SF 0 "nonimmediate_operand" "") - (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")] - UNSPEC_FSINA))] - "TARGET_SH4A_FP && flag_unsafe_math_optimizations" + (unspec:SF [(match_operand:SF 2 "fp_arith_reg_operand" "")] + UNSPEC_FSINA)) + (set (match_operand:SF 1 "nonimmediate_operand" "") + (unspec:SF [(match_dup 2)] UNSPEC_FCOSA))] + "TARGET_FPU_ANY && TARGET_FSCA" { rtx scaled = gen_reg_rtx (SFmode); rtx truncated = gen_reg_rtx (SImode); rtx fsca = gen_reg_rtx (V2SFmode); rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); - emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg)); + emit_sf_insn (gen_mulsf3 (scaled, operands[2], scale_reg)); emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), get_fpscr_rtx ())); + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0)); + emit_move_insn (operands[1], gen_rtx_SUBREG (SFmode, fsca, 4)); DONE; }) -(define_expand "cossf2" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")] - UNSPEC_FCOSA))] - "TARGET_SH4A_FP && flag_unsafe_math_optimizations" -{ - rtx scaled = gen_reg_rtx (SFmode); - rtx truncated = gen_reg_rtx (SImode); - rtx fsca = gen_reg_rtx (V2SFmode); - rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); - - emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg)); - emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); - emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), - get_fpscr_rtx ())); - emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4)); - DONE; -}) - (define_expand "abssf2" [(set (match_operand:SF 0 "fp_arith_reg_operand" "") (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] Index: gcc/config/sh/sh.opt =================================================================== --- gcc/config/sh/sh.opt (revision 188026) +++ gcc/config/sh/sh.opt (working copy) @@ -348,3 +348,12 @@ mpretend-cmove Target Var(TARGET_PRETEND_CMOVE) Pretend a branch-around-a-move is a conditional move. + +mfsca +Target Var(TARGET_FSCA) +Enable the use of the fsca instruction + +mfsrra +Target Var(TARGET_FSRRA) +Enable the use of the fsrra instruction + Index: gcc/testsuite/gcc.target/sh/pr53512-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr53512-1.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr53512-1.c (revision 0) @@ -0,0 +1,26 @@ +/* Verify that the fsca insn is used when specifying -mfsca and + -funsafe-math-optimizations. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1 -mfsca -funsafe-math-optimizations" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-times "fsca" 3 } } */ + +#include <math.h> + +float +test_func_00 (float x) +{ + return sinf (x) + cosf (x); +} + +float +test_func_01 (float x) +{ + return sinf (x); +} + +float +test_func_02 (float x) +{ + return cosf (x); +} Index: gcc/testsuite/gcc.target/sh/pr53512-2.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr53512-2.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr53512-2.c (revision 0) @@ -0,0 +1,26 @@ +/* Verify that the fsca insn is not used when specifying -mno-fsca and + -funsafe-math-optimizations. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1 -mno-fsca -funsafe-math-optimizations" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "fsca" } } */ + +#include <math.h> + +float +test_func_00 (float x) +{ + return sinf (x) + cosf (x); +} + +float +test_func_01 (float x) +{ + return sinf (x); +} + +float +test_func_02 (float x) +{ + return cosf (x); +} Index: gcc/testsuite/gcc.target/sh/pr53512-3.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr53512-3.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr53512-3.c (revision 0) @@ -0,0 +1,15 @@ +/* Verify that the fsrra insn is used when specifying -mfsrra and + -funsafe-math-optimizations and -ffinite-math-only. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1 -mfsrra -funsafe-math-optimizations -ffinite-math-only" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler "fsrra" } } */ + +#include <math.h> + +float +test_func_00 (float x) +{ + return 1 / sqrtf (x); +} + Index: gcc/testsuite/gcc.target/sh/pr53512-4.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr53512-4.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr53512-4.c (revision 0) @@ -0,0 +1,15 @@ +/* Verify that the fsrra insn is not used when specifying -mno-fsrra and + -funsafe-math-optimizations and -ffinite-math-only. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1 -mno-fsrra -funsafe-math-optimizations -ffinite-math-only" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "fsrra" } } */ + +#include <math.h> + +float +test_func_00 (float x) +{ + return 1 / sqrtf (x); +} + Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 188026) +++ gcc/doc/invoke.texi (working copy) @@ -886,7 +886,7 @@ -mindexed-addressing -mgettrcost=@var{number} -mpt-fixed @gol -maccumulate-outgoing-args -minvalid-symbols -msoft-atomic -mhard-atomic @gol -mbranch-cost=@var{num} -mcbranchdi -mcmpeqdi -mfused-madd -mno-fused-madd @gol --mpretend-cmove -menable-tas} +-mfsca -mno-fsca -mfsrra -mno-fsrra -mpretend-cmove -menable-tas} @emph{Solaris 2 Options} @gccoptlist{-mimpure-text -mno-impure-text @gol @@ -18262,6 +18262,28 @@ disallow the usage of the @code{fmac} instruction for regular calculations even if @option{-funsafe-math-optimizations} is in effect. +@item -mfsca +@itemx -mno-fsca +@opindex mfsca +@opindex mno-fsca +Allow or disallow the compiler to emit the @code{fsca} instruction for sine +and cosine approximations. The option @code{-mfsca} must be used in +combination with @code{-funsafe-math-optimizations}. It is enabled by default +when generating code for SH4A. Using @code{-mno-fsca} disables sine and cosine +approximations even if @code{-funsafe-math-optimizations} is in effect. + +@item -mfsrra +@itemx -mno-fsrra +@opindex mfsrra +@opindex mno-fsrra +Allow or disallow the compiler to emit the @code{fsrra} instruction for +reciprocal square root approximations. The option @code{-mfsrra} must be used +in combination with @code{-funsafe-math-optimizations} and +@code{-ffinite-math-only}. It is enabled by default when generating code for +SH4A. Using @code{-mno-fsrra} disables reciprocal square root approximations +even if @code{-funsafe-math-optimizations} and @code{-ffinite-math-only} are +in effect. + @item -mpretend-cmove @opindex mpretend-cmove Prefer zero-displacement conditional branches for conditional move instruction