On Mon, Dec 17, 2018 at 9:26 AM Richard Biener <rguent...@suse.de> wrote: > > On Mon, 17 Dec 2018, Uros Bizjak wrote: > > > ... and the patch. > > middle-end parts are OK. > > > On Mon, Dec 17, 2018 at 8:58 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > Attached patch inlines calls to asinh{,f}, acosh{,f,l} and atanh{,f,l} > > > using x87 XFmode arithmetic. In the patch, I left out asinhl due to > > > its reduced input argument range, but perhaps it could be added back, > > > since we are expanding under flag_unsafe_math_optimizations. The > > > expanders are modelled after the removed inlines in glibc [1] (which > > > also include asinhl, with a comment mentioning its reduced input > > > argument range).
Thinking a bit more about reduced input range of asinhl - we have similar situation with other trigonometric functions, where argument range is reduced to +-2^63. So, I have committed version 2 of the patch, which also expands asinhl. 2018-12-17 Uros Bizjak <ubiz...@gmail.com> PR target/88502 * internal-fn.def (ACOSH): New. (ASINH): Ditto. (ATANH): Ditto. * optabs.def (acosh_optab): New. (asinh_optab): Ditto. (atanh_optab): Ditto. * config/i386/i386-protos.h (ix86_emit_i387_asinh): New prototype. (ix86_emit_i387_acosh): Ditto. (ix86_emit_i387_atanh): Ditto. * config/i386/i386.c (ix86_emit_i387_asinh): New function. (ix86_emit_i387_acosh): Ditto. (ix86_emit_i387_atanh): Ditto. * config/i386/i386.md (asinhxf2): New expander. (asinh<mode>2): Ditto. (acoshxf2): Ditto. (acosh<mode>2): Ditto. (atanhxf2): Ditto. (atanh<mode>2): Ditto. Uros.
Index: config/i386/i386-protos.h =================================================================== --- config/i386/i386-protos.h (revision 267203) +++ config/i386/i386-protos.h (working copy) @@ -170,6 +170,9 @@ extern void x86_emit_floatuns (rtx [2]); extern void ix86_emit_fp_unordered_jump (rtx); +extern void ix86_emit_i387_asinh (rtx, rtx); +extern void ix86_emit_i387_acosh (rtx, rtx); +extern void ix86_emit_i387_atanh (rtx, rtx); extern void ix86_emit_i387_log1p (rtx, rtx); extern void ix86_emit_i387_round (rtx, rtx); extern void ix86_emit_swdivsf (rtx, rtx, rtx, machine_mode); Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 267203) +++ config/i386/i386.c (working copy) @@ -44054,6 +44054,135 @@ JUMP_LABEL (insn) = label; } +/* Output code to perform an asinh XFmode calculation. */ + +void ix86_emit_i387_asinh (rtx op0, rtx op1) +{ + rtx e1 = gen_reg_rtx (XFmode); + rtx e2 = gen_reg_rtx (XFmode); + rtx scratch = gen_reg_rtx (HImode); + rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); + rtx cst1, tmp; + rtx_code_label *jump_label = gen_label_rtx (); + rtx_insn *insn; + + /* e2 = sqrt (op1^2 + 1.0) + 1.0 */ + emit_insn (gen_mulxf3 (e1, op1, op1)); + cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); + emit_insn (gen_addxf3 (e2, e1, cst1)); + emit_insn (gen_sqrtxf2 (e2, e2)); + emit_insn (gen_addxf3 (e2, e2, cst1)); + + /* e1 = e1 / e2 */ + emit_insn (gen_divxf3 (e1, e1, e2)); + + /* scratch = fxam (op1) */ + emit_insn (gen_fxamxf2_i387 (scratch, op1)); + + /* e1 = e1 + |op1| */ + emit_insn (gen_absxf2 (e2, op1)); + emit_insn (gen_addxf3 (e1, e1, e2)); + + /* e2 = log1p (e1) */ + ix86_emit_i387_log1p (e2, e1); + + /* flags = signbit (op1) */ + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); + + /* if (flags) then e2 = -e2 */ + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_EQ (VOIDmode, flags, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, jump_label), + pc_rtx); + insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + predict_jump (REG_BR_PROB_BASE * 50 / 100); + JUMP_LABEL (insn) = jump_label; + + emit_insn (gen_negxf2 (e2, e2)); + + emit_label (jump_label); + LABEL_NUSES (jump_label) = 1; + + emit_move_insn (op0, e2); +} + +/* Output code to perform an acosh XFmode calculation. */ + +void ix86_emit_i387_acosh (rtx op0, rtx op1) +{ + rtx e1 = gen_reg_rtx (XFmode); + rtx e2 = gen_reg_rtx (XFmode); + rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); + + /* e2 = sqrt (op1 + 1.0) */ + emit_insn (gen_addxf3 (e2, op1, cst1)); + emit_insn (gen_sqrtxf2 (e2, e2)); + + /* e1 = sqrt (op1 - 1.0) */ + emit_insn (gen_subxf3 (e1, op1, cst1)); + emit_insn (gen_sqrtxf2 (e1, e1)); + + /* e1 = e1 * e2 */ + emit_insn (gen_mulxf3 (e1, e1, e2)); + + /* e1 = e1 + op1 */ + emit_insn (gen_addxf3 (e1, e1, op1)); + + /* op0 = log (e1) */ + emit_insn (gen_logxf2 (op0, e1)); +} + +/* Output code to perform an atanh XFmode calculation. */ + +void ix86_emit_i387_atanh (rtx op0, rtx op1) +{ + rtx e1 = gen_reg_rtx (XFmode); + rtx e2 = gen_reg_rtx (XFmode); + rtx scratch = gen_reg_rtx (HImode); + rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); + rtx half = const_double_from_real_value (dconsthalf, XFmode); + rtx cst1, tmp; + rtx_code_label *jump_label = gen_label_rtx (); + rtx_insn *insn; + + /* scratch = fxam (op1) */ + emit_insn (gen_fxamxf2_i387 (scratch, op1)); + + /* e2 = |op1| */ + emit_insn (gen_absxf2 (e2, op1)); + + /* e1 = -(e2 + e2) / (e2 + 1.0) */ + cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); + emit_insn (gen_addxf3 (e1, e2, cst1)); + emit_insn (gen_addxf3 (e2, e2, e2)); + emit_insn (gen_negxf2 (e2, e2)); + emit_insn (gen_divxf3 (e1, e2, e1)); + + /* e2 = log1p (e1) */ + ix86_emit_i387_log1p (e2, e1); + + /* flags = signbit (op1) */ + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); + + /* if (!flags) then e2 = -e2 */ + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_NE (VOIDmode, flags, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, jump_label), + pc_rtx); + insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + predict_jump (REG_BR_PROB_BASE * 50 / 100); + JUMP_LABEL (insn) = jump_label; + + emit_insn (gen_negxf2 (e2, e2)); + + emit_label (jump_label); + LABEL_NUSES (jump_label) = 1; + + /* op0 = 0.5 * e2) */ + half = force_reg (XFmode, half); + emit_insn (gen_mulxf3 (op0, e2, half)); +} + /* Output code to perform a log1p XFmode calculation. */ void ix86_emit_i387_log1p (rtx op0, rtx op1) Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 267203) +++ config/i386/i386.md (working copy) @@ -15483,6 +15483,89 @@ DONE; }) +(define_expand "asinhxf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only + && flag_unsafe_math_optimizations" +{ + ix86_emit_i387_asinh (operands[0], operands[1]); + DONE; +}) + +(define_expand "asinh<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_finite_math_only + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_asinhxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); + DONE; +}) + +(define_expand "acoshxf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + ix86_emit_i387_acosh (operands[0], operands[1]); + DONE; +}) + +(define_expand "acosh<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_acoshxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); + DONE; +}) + +(define_expand "atanhxf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + ix86_emit_i387_atanh (operands[0], operands[1]); + DONE; +}) + +(define_expand "atanh<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_atanhxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); + DONE; +}) + (define_insn "fyl2xxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0") Index: internal-fn.def =================================================================== --- internal-fn.def (revision 267203) +++ internal-fn.def (working copy) @@ -201,8 +201,11 @@ /* Unary math functions. */ DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary) +DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary) DEF_INTERNAL_FLT_FN (ASIN, ECF_CONST, asin, unary) +DEF_INTERNAL_FLT_FN (ASINH, ECF_CONST, asinh, unary) DEF_INTERNAL_FLT_FN (ATAN, ECF_CONST, atan, unary) +DEF_INTERNAL_FLT_FN (ATANH, ECF_CONST, atanh, unary) DEF_INTERNAL_FLT_FN (COS, ECF_CONST, cos, unary) DEF_INTERNAL_FLT_FN (EXP, ECF_CONST, exp, unary) DEF_INTERNAL_FLT_FN (EXP10, ECF_CONST, exp10, unary) Index: optabs.def =================================================================== --- optabs.def (revision 267203) +++ optabs.def (working copy) @@ -273,9 +273,12 @@ OPTAB_D (nearbyint_optab, "nearbyint$a2") OPTAB_D (acos_optab, "acos$a2") +OPTAB_D (acosh_optab, "acosh$a2") OPTAB_D (asin_optab, "asin$a2") +OPTAB_D (asinh_optab, "asinh$a2") OPTAB_D (atan2_optab, "atan2$a3") OPTAB_D (atan_optab, "atan$a2") +OPTAB_D (atanh_optab, "atanh$a2") OPTAB_D (copysign_optab, "copysign$F$a3") OPTAB_D (xorsign_optab, "xorsign$F$a3") OPTAB_D (cos_optab, "cos$a2")