Hi. This is a patch that Uros suggested for roundeven expansion, here. Thanks for the heads up. <https://gcc.gnu.org/ml/gcc-patches/2019-07/msg01038.html> I have rerun the testsuite on the patch, it survives the regression tests and bootstraps on x86_64-linux-gnu. Note, patch to be applied on top of <https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01828.html>
Thanks, Tejas gcc/ChangeLog: 2019-07-24 Tejas Joshi <tejasjoshi9...@gmail.com> * builtins.c (mathfn_built_in_2): Change CASE_MATHFN to CASE_MATHFN_FLOATN for roundeven. * config/i386/i386.c (ix86_i387_mode_needed): Add case I387_ROUNDEVEN. (ix86_mode_needed): Likewise. (ix86_mode_after): Likewise. (ix86_mode_entry): Likewise. (ix86_mode_exit): Likewise. (ix86_emit_mode_set): Likewise. (emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN. * config/i386/i386.h (ix86_stack_slot) : Add SLOT_CW_ROUNDEVEN. (ix86_entry): Add I387_ROUNDEVEN. (avx_u128_state): Add I387_CW_ANY. * config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN. (define_int_iterator): Likewise. (define_int_attr): Likewise for rounding_insn, rounding and ROUNDING. (define_constant): Define ROUND_ROUNDEVEN mode. (define_attr): Add roundeven mode for i387_cw. (<rouding_insn><mode>2): Add condition for ROUND_ROUNDEVEN. * internal-fn.def (ROUNDEVEN): New builtin function. * optabs.def (roundeven_optab): New optab. gcc/testsuite/ChangeLog: 2019-07-24 Tejas Joshi <tejasjoshi9...@gmail.com> * gcc.target/i386/avx-vround-roundeven-1.c: New test. * gcc.target/i386/avx-vround-roundeven-2.c: New test.
diff --git a/gcc/builtins.c b/gcc/builtins.c index 8ceb077b0bf..f61f10422fd 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -2056,7 +2056,7 @@ mathfn_built_in_2 (tree type, combined_fn fn) CASE_MATHFN (REMQUO) CASE_MATHFN_FLOATN (RINT) CASE_MATHFN_FLOATN (ROUND) - CASE_MATHFN (ROUNDEVEN) + CASE_MATHFN_FLOATN (ROUNDEVEN) CASE_MATHFN (SCALB) CASE_MATHFN (SCALBLN) CASE_MATHFN (SCALBN) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 49f49c5f8d0..aa32a61f416 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13593,6 +13593,11 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn) switch (entity) { + case I387_ROUNDEVEN: + if (mode == I387_CW_ROUNDEVEN) + return mode; + break; + case I387_TRUNC: if (mode == I387_CW_TRUNC) return mode; @@ -13627,6 +13632,7 @@ ix86_mode_needed (int entity, rtx_insn *insn) return ix86_dirflag_mode_needed (insn); case AVX_U128: return ix86_avx_u128_mode_needed (insn); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13687,6 +13693,7 @@ ix86_mode_after (int entity, int mode, rtx_insn *insn) return mode; case AVX_U128: return ix86_avx_u128_mode_after (mode, insn); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13739,6 +13746,7 @@ ix86_mode_entry (int entity) return ix86_dirflag_mode_entry (); case AVX_U128: return ix86_avx_u128_mode_entry (); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13776,6 +13784,7 @@ ix86_mode_exit (int entity) return X86_DIRFLAG_ANY; case AVX_U128: return ix86_avx_u128_mode_exit (); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13810,6 +13819,12 @@ emit_i387_cw_initialization (int mode) switch (mode) { + case I387_CW_ROUNDEVEN: + /* round to nearest */ + emit_insn (gen_andhi3 (reg, reg, GEN_INT (0x0c00))); + slot = SLOT_CW_ROUNDEVEN; + break; + case I387_CW_TRUNC: /* round toward zero (truncate) */ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); @@ -13856,6 +13871,7 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, if (mode == AVX_U128_CLEAN) emit_insn (gen_avx_vzeroupper ()); break; + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 1f70844fc45..a8e1cd048a8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2470,6 +2470,7 @@ enum ix86_stack_slot { SLOT_TEMP = 0, SLOT_CW_STORED, + SLOT_CW_ROUNDEVEN, SLOT_CW_TRUNC, SLOT_CW_FLOOR, SLOT_CW_CEIL, @@ -2481,6 +2482,7 @@ enum ix86_entity { X86_DIRFLAG = 0, AVX_U128, + I387_ROUNDEVEN, I387_TRUNC, I387_FLOOR, I387_CEIL, @@ -2516,7 +2518,7 @@ enum avx_u128_state #define NUM_MODES_FOR_MODE_SWITCHING \ { X86_DIRFLAG_ANY, AVX_U128_ANY, \ - I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } + I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } /* Avoid renaming of stack registers, as doing so in combination with diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index db5fa9ae3ca..21532494f6d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -141,6 +141,7 @@ UNSPEC_FXAM ;; x87 Rounding + UNSPEC_FRNDINT_ROUNDEVEN UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC @@ -303,7 +304,8 @@ ;; Constants to represent rounding modes in the ROUND instruction (define_constants - [(ROUND_FLOOR 0x1) + [(ROUND_ROUNDEVEN 0x0) + (ROUND_FLOOR 0x1) (ROUND_CEIL 0x2) (ROUND_TRUNC 0x3) (ROUND_MXCSR 0x4) @@ -779,7 +781,7 @@ ;; Defines rounding mode of an FP operation. -(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any" +(define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any" (const_string "any")) ;; Define attribute to indicate AVX insns with partial XMM register update. @@ -16251,7 +16253,8 @@ }) (define_int_iterator FRNDINT_ROUNDING - [UNSPEC_FRNDINT_FLOOR + [UNSPEC_FRNDINT_ROUNDEVEN + UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC]) @@ -16261,21 +16264,24 @@ ;; Base name for define_insn (define_int_attr rounding_insn - [(UNSPEC_FRNDINT_FLOOR "floor") + [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven") + (UNSPEC_FRNDINT_FLOOR "floor") (UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_TRUNC "btrunc") (UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_CEIL "ceil")]) (define_int_attr rounding - [(UNSPEC_FRNDINT_FLOOR "floor") + [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven") + (UNSPEC_FRNDINT_FLOOR "floor") (UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_TRUNC "trunc") (UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_CEIL "ceil")]) (define_int_attr ROUNDING - [(UNSPEC_FRNDINT_FLOOR "FLOOR") + [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN") + (UNSPEC_FRNDINT_FLOOR "FLOOR") (UNSPEC_FRNDINT_CEIL "CEIL") (UNSPEC_FRNDINT_TRUNC "TRUNC") (UNSPEC_FIST_FLOOR "FLOOR") @@ -16338,8 +16344,9 @@ || TARGET_MIX_SSE_I387) && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact - || !flag_trapping_math))" + && (TARGET_SSE4_1 + || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN + && (flag_fp_int_builtin_inexact || !flag_trapping_math))))" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math)) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 906d74b1d08..15f019b9b49 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -234,6 +234,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary) DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary) DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary) DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary) +DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary) DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary) /* Binary math functions. */ diff --git a/gcc/optabs.def b/gcc/optabs.def index 4ffd0f35a40..065e3f64dda 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -268,6 +268,7 @@ OPTAB_D (fnms_optab, "fnms$a4") OPTAB_D (rint_optab, "rint$a2") OPTAB_D (round_optab, "round$a2") +OPTAB_D (roundeven_optab, "roundeven$a2") OPTAB_D (floor_optab, "floor$a2") OPTAB_D (ceil_optab, "ceil$a2") OPTAB_D (btrunc_optab, "btrunc$a2") diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c index 710f14a9544..0f0089acdea 100644 --- a/gcc/reg-stack.c +++ b/gcc/reg-stack.c @@ -1817,6 +1817,7 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr regstack, rtx pat) case UNSPEC_FRNDINT: case UNSPEC_F2XM1: + case UNSPEC_FRNDINT_ROUNDEVEN: case UNSPEC_FRNDINT_FLOOR: case UNSPEC_FRNDINT_CEIL: case UNSPEC_FRNDINT_TRUNC: diff --git a/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-1.c b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-1.c new file mode 100644 index 00000000000..072d0f0e73a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx" } */ + +__attribute__((noinline, noclone)) double +f1 (double x) +{ + return __builtin_roundeven (x); +} + +__attribute__((noinline, noclone)) float +f2 (float x) +{ + return __builtin_roundevenf (x); +} + +/* { dg-final { scan-assembler-times "vroundsd\[^\n\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vroundss\[^\n\r\]*xmm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-2.c b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-2.c new file mode 100644 index 00000000000..211758d026a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-2.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-mavx" } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#define TEST avx_test +#define SRC "avx-vround-roundeven-1.c" +#endif + +#include CHECK_H +#include SRC + +static void +TEST (void) +{ + if (f1 (0.5) != 0.0 || f1 (1.5) != 2.0 || f1 (-0.5) != 0.0 || f1 (-1.5) != -2.0) + abort (); + if (f2 (0.5f) != 0.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != 0.0f || f2 (-1.5f) != -2.0f) + abort (); +}