Hi: Related discussion in [1] and PR. Bootstrapped and regtest on x86_64-linux-gnu{-m32,}. Ok for trunk?
[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html gcc/ChangeLog: PR target/102464 * config/i386/i386.c (ix86_optab_supported_p): Return true for HFmode. * match.pd: Simplify (_Float16) ceil ((double) x) to __builtin_ceilf16 (a) when a is _Float16 type and direct_internal_fn_supported_p. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102464.c: New test. --- gcc/config/i386/i386.c | 20 +++++++----- gcc/match.pd | 28 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ba89e111d28..3767fe9806d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, return opt_type == OPTIMIZE_FOR_SPEED; case rint_optab: - if (SSE_FLOAT_MODE_P (mode1) - && TARGET_SSE_MATH - && !flag_trapping_math - && !TARGET_SSE4_1) + if (mode1 == HFmode) + return true; + else if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && !TARGET_SSE4_1) return opt_type == OPTIMIZE_FOR_SPEED; return true; case floor_optab: case ceil_optab: case btrunc_optab: - if (SSE_FLOAT_MODE_P (mode1) - && TARGET_SSE_MATH - && !flag_trapping_math - && TARGET_SSE4_1) + if (mode1 == HFmode) + return true; + else if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && TARGET_SSE4_1) return true; return opt_type == OPTIMIZE_FOR_SPEED; diff --git a/gcc/match.pd b/gcc/match.pd index a9791ceb74a..9ccec8b6ce3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6191,6 +6191,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (froms (convert float_value_p@0)) (convert (tos @0))))) +#if GIMPLE +(match float16_value_p + @0 + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node))) +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF + BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF + BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF + BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF + BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF + BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF + BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF) + tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC + IFN_FLOOR IFN_FLOOR IFN_FLOOR + IFN_CEIL IFN_CEIL IFN_CEIL + IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN + IFN_ROUND IFN_ROUND IFN_ROUND + IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT + IFN_RINT IFN_RINT IFN_RINT) + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc., + if x is a _Float16. */ + (simplify + (convert (froms (convert float16_value_p@0))) + (if (types_match (type, TREE_TYPE (@0)) + && direct_internal_fn_supported_p (as_internal_fn (tos), + type, OPTIMIZE_FOR_BOTH)) + (tos @0)))) +#endif + (for froms (XFLOORL XCEILL XROUNDL XRINTL) tos (XFLOOR XCEIL XROUND XRINT) /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double. */ diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c new file mode 100644 index 00000000000..e3e060ee80b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102464.c @@ -0,0 +1,39 @@ +/* PR target/102464. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#define FOO(FUNC,SUFFIX) \ + _Float16 \ + foo_##FUNC##_##SUFFIX (_Float16 a) \ + { \ + return __builtin_##FUNC##SUFFIX (a); \ + } + +FOO (roundeven, f16); +FOO (roundeven, f); +FOO (roundeven, ); +FOO (roundeven, l); +FOO (trunc, f16); +FOO (trunc, f); +FOO (trunc, ); +FOO (trunc, l); +FOO (ceil, f16); +FOO (ceil, f); +FOO (ceil, ); +FOO (ceil, l); +FOO (floor, f16); +FOO (floor, f); +FOO (floor, ); +FOO (floor, l); +FOO (nearbyint, f16); +FOO (nearbyint, f); +FOO (nearbyint, ); +FOO (nearbyint, l); +FOO (rint, f16); +FOO (rint, f); +FOO (rint, ); +FOO (rint, l); + +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */ +/* { dg-final { scan-assembler-not "extendhfxf" } } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */ -- 2.27.0