On Tue, Sep 28, 2021 at 4:01 AM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Mon, Sep 27, 2021 at 8:53 PM Richard Biener
> <richard.guent...@gmail.com> wrote:
> >
> > On Fri, Sep 24, 2021 at 1:26 PM liuhongt <hongtao....@intel.com> wrote:
> > >
> > > Hi:
> > >   Related discussion in [1] and PR.
> > >
> > >   Bootstrapped and regtest on x86_64-linux-gnu{-m32,}.
> > >   Ok for trunk?
> > >
> > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR target/102464
> > >         * config/i386/i386.c (ix86_optab_supported_p):
> > >         Return true for HFmode.
> > >         * match.pd: Simplify (_Float16) ceil ((double) x) to
> > >         __builtin_ceilf16 (a) when a is _Float16 type and
> > >         direct_internal_fn_supported_p.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * gcc.target/i386/pr102464.c: New test.
> > > ---
> > >  gcc/config/i386/i386.c                   | 20 +++++++-----
> > >  gcc/match.pd                             | 28 +++++++++++++++++
> > >  gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++
> > >  3 files changed, 79 insertions(+), 8 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index ba89e111d28..3767fe9806d 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode 
> > > mode1, machine_mode,
> > >        return opt_type == OPTIMIZE_FOR_SPEED;
> > >
> > >      case rint_optab:
> > > -      if (SSE_FLOAT_MODE_P (mode1)
> > > -         && TARGET_SSE_MATH
> > > -         && !flag_trapping_math
> > > -         && !TARGET_SSE4_1)
> > > +      if (mode1 == HFmode)
> > > +       return true;
> > > +      else if (SSE_FLOAT_MODE_P (mode1)
> > > +              && TARGET_SSE_MATH
> > > +              && !flag_trapping_math
> > > +              && !TARGET_SSE4_1)
> > >         return opt_type == OPTIMIZE_FOR_SPEED;
> > >        return true;
> > >
> > >      case floor_optab:
> > >      case ceil_optab:
> > >      case btrunc_optab:
> > > -      if (SSE_FLOAT_MODE_P (mode1)
> > > -         && TARGET_SSE_MATH
> > > -         && !flag_trapping_math
> > > -         && TARGET_SSE4_1)
> > > +      if (mode1 == HFmode)
> > > +       return true;
> > > +      else if (SSE_FLOAT_MODE_P (mode1)
> > > +              && TARGET_SSE_MATH
> > > +              && !flag_trapping_math
> > > +              && TARGET_SSE4_1)
> > >         return true;
> > >        return opt_type == OPTIMIZE_FOR_SPEED;
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index a9791ceb74a..9ccec8b6ce3 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -6191,6 +6191,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >     (froms (convert float_value_p@0))
> > >     (convert (tos @0)))))
> > >
> > > +#if GIMPLE
> > > +(match float16_value_p
> > > + @0
> > > + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node)))
> > > +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF
> > > +           BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF
> > > +           BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF
> > > +           BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF
> > > +           BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF
> > > +           BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF
> > > +           BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF)
> >
> > we do have patterns that convert (truncl (convert floatval)) to
> > (float)truncf (val),
> > your's does (_Float16)trunc ((double) float16) -> truncF16 (float16), 
> > doesn't it
> > make sense to have trunc ((double) float16) -> (double)trunfF16
> > (float16) as well?
> >
> > Why do you conditionalize on GIMPLE here?
> To avoid
> error: ‘direct_internal_fn_supported_p’ was not declared in this scope

You probably have to amend generic-match-head.c by adding the relevant include.
But maybe we don't want to feed too many internal function calls into the
early GENERIC ... so OK to leave as-is then.

> >
> > That said, I wonder whether we can somehow address pattern explosion here,
> > eliding the outer (convert ...) from the match would help a bit already.
> >
> > The related patterns use optimize && canonicalize_math_p as well btw., not
> > sure whether either is appropriate here since there are no _Float16 math
> > functions available.
> Yes, that's why I didn't follow the existing pattern, i think we can
> add optimize back to the condition, but not canonicalize_math_p ()
> since there's no math function for _Float16.
> Also w/o the outer (convert ..), it looks like a canonicalization to
> transform ceil ((double) a) to (double) __builtin_ceilf16 (a) but not
> an optimization.

Yes, that's likely why we have canonicalize_math_p () on the others.

So I think the original patch is OK.

Thanks,
Richard.

> >
> > > +     tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC
> > > +         IFN_FLOOR IFN_FLOOR IFN_FLOOR
> > > +         IFN_CEIL IFN_CEIL IFN_CEIL
> > > +         IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN
> > > +         IFN_ROUND IFN_ROUND IFN_ROUND
> > > +         IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT
> > > +         IFN_RINT IFN_RINT IFN_RINT)
> > > + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc.,
> > > +    if x is a _Float16.  */
> > > + (simplify
> > > +   (convert (froms (convert float16_value_p@0)))
> > > +     (if (types_match (type, TREE_TYPE (@0))
> > > +         && direct_internal_fn_supported_p (as_internal_fn (tos),
> > > +                                            type, OPTIMIZE_FOR_BOTH))
> > > +       (tos @0))))
> > > +#endif
> > > +
> > >  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
> > >       tos (XFLOOR XCEIL XROUND XRINT)
> > >   /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double.  */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102464.c
> > > new file mode 100644
> > > index 00000000000..e3e060ee80b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102464.c
> > > @@ -0,0 +1,39 @@
> > > +/* PR target/102464.  */
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -mavx512fp16" } */
> > > +
> > > +#define FOO(FUNC,SUFFIX)                       \
> > > +  _Float16                                     \
> > > +  foo_##FUNC##_##SUFFIX (_Float16 a)           \
> > > +  {                                            \
> > > +    return __builtin_##FUNC##SUFFIX (a);       \
> > > +  }
> > > +
> > > +FOO (roundeven, f16);
> > > +FOO (roundeven, f);
> > > +FOO (roundeven, );
> > > +FOO (roundeven, l);
> > > +FOO (trunc, f16);
> > > +FOO (trunc, f);
> > > +FOO (trunc, );
> > > +FOO (trunc, l);
> > > +FOO (ceil, f16);
> > > +FOO (ceil, f);
> > > +FOO (ceil, );
> > > +FOO (ceil, l);
> > > +FOO (floor, f16);
> > > +FOO (floor, f);
> > > +FOO (floor, );
> > > +FOO (floor, l);
> > > +FOO (nearbyint, f16);
> > > +FOO (nearbyint, f);
> > > +FOO (nearbyint, );
> > > +FOO (nearbyint, l);
> > > +FOO (rint, f16);
> > > +FOO (rint, f);
> > > +FOO (rint, );
> > > +FOO (rint, l);
> > > +
> > > +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */
> > > +/* { dg-final { scan-assembler-not "extendhfxf" } } */
> > > +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 
> > > 24 } } */
> > > --
> > > 2.27.0
> > >
>
>
>
> --
> BR,
> Hongtao

Reply via email to