Re: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values

Kewen.Lin via Gcc-patches Mon, 05 Jun 2023 01:45:28 -0700

Hi Carl,

on 2023/5/2 23:52, Carl Love via Gcc-patches wrote:
> GCC maintainers:
> 
> The following patch adds three buitins for inserting and extracting the
> exponent and significand for an IEEE 128-bit floating point values. 
> The builtins are valid for Power 9 and Power 10.


We already have:

unsigned long long int scalar_extract_exp (__ieee128 source);
unsigned __int128 scalar_extract_sig (__ieee128 source);
ieee_128 scalar_insert_exp (unsigned __int128 significand,
                            unsigned long long int exponent);
ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int 
exponent);

you need to say something about the requirements or the justification for
adding more, for this patch itself, some comments are inline below, thanks!

> 
> The patch has been tested on both Power 9 and Power 10.
> 
> Please let me know if this patch is acceptable for mainline.  Thanks.
> 
>             Carl 
> 
> 
> ----------------------
> From a20cc81f98cce1140fc95775a7c25b55d1ca7cba Mon Sep 17 00:00:00 2001
> From: Carl Love <c...@us.ibm.com>
> Date: Wed, 12 Apr 2023 17:46:37 -0400
> Subject: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
> 
> Add support for the following builtins:
> 
>  __vector unsigned long long int __builtin_extractf128_exp (__ieee128);

Could you make the name similar to the existing one?  The existing one
  
  unsigned long long int scalar_extract_exp (__ieee128 source);

has nothing like f128 on its name, this variant is just to change the
return type to vector type, how about scalar_extract_exp_to_vec?

>  __vector unsigned __int128 __builtin_extractf128_sig (__ieee128);

Ditto.

>  __ieee128 __builtin_insertf128_exp (__vector unsigned __int128,
>                                    __vector unsigned long long);

This one can just overload the existing scalar_insert_exp?

> 
> gcc/
>       * config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
>        __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
>       builtin definitions.
>       * config/rs6000.md (extractf128_exp_<mode>, insertf128_exp_<mode>,
>       extractf128_sig_<mode>): Add define_expand for new builtins.
>       (xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>, siexpqpf_f128_<mode>):
>       Add define_insn for new builtins.
>       * doc/extend.texi (__builtin_extractf128_exp, __builtin_extractf128_sig,
>       __builtin_insertf128_exp): Add documentation for new builtins.
> 
> gcc/testsuite/
>       * gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
>       * gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
>       * gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
> ---
>  gcc/config/rs6000/rs6000-builtins.def         |  9 +++
>  gcc/config/rs6000/vsx.md                      | 66 ++++++++++++++++++-
>  gcc/doc/extend.texi                           | 28 ++++++++
>  .../powerpc/bfp/extract-exp-ieee128.c         | 49 ++++++++++++++
>  .../powerpc/bfp/extract-sig-ieee128.c         | 56 ++++++++++++++++
>  .../powerpc/bfp/insert-exp-ieee128.c          | 58 ++++++++++++++++
>  6 files changed, 265 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 638d0bc72ca..3247a7f7673 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2876,6 +2876,15 @@
>    pure vsc __builtin_vsx_xl_len_r (void *, signed long);
>      XL_LEN_R xl_len_r {}
>  
> +  vull __builtin_extractf128_exp (_Float128);
> +    EEXPKF extractf128_exp_kf {}
> +
> +  vuq __builtin_extractf128_sig (_Float128);
> +    ESIGKF extractf128_sig_kf {}
> +
> +  _Float128 __builtin_insertf128_exp (vuq, vull);
> +    IEXPKF_VULL insertf128_exp_kf {}
> +

Put them to be near its related ones like __builtin_vsx_scalar_extract_expq
etc.

>  
>  ; Builtins requiring hardware support for IEEE-128 floating-point.
>  [ieee128-hw]
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 7d845df5c2d..2a9f875ba57 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -369,7 +369,10 @@
>     UNSPEC_XXSPLTI32DX
>     UNSPEC_XXBLEND
>     UNSPEC_XXPERMX
> -  ])
> +   UNSPEC_EXTRACTEXPIEEE
> +   UNSPEC_EXTRACTSIGIEEE
> +   UNSPEC_INSERTEXPIEEE

These are not necessary, just use the existing UNSPEC_VSX_SXEXPDP etc.

> +])
>  
>  (define_int_iterator XVCVBF16        [UNSPEC_VSX_XVCVSPBF16
>                                UNSPEC_VSX_XVCVBF16SPN])
> @@ -4155,6 +4158,38 @@
>   "vins<wd>rx %0,%1,%2"
>   [(set_attr "type" "vecsimple")])
>  
> +(define_expand "extractf128_exp_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand")
> +  (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand")]
> +               UNSPEC_EXTRACTEXPIEEE))]
> +"TARGET_P9_VECTOR"
> +{
> +  emit_insn (gen_xsxexpqp_f128_<mode> (operands[0], operands[1]));
> +  DONE;
> +})
> +
> +(define_expand "insertf128_exp_<mode>"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand")
> +  (unspec:IEEE128 [(match_operand:V1TI 1 "altivec_register_operand")
> +                (match_operand:V2DI 2 "altivec_register_operand")]
> +               UNSPEC_INSERTEXPIEEE))]
> +"TARGET_P9_VECTOR"
> +{
> +  emit_insn (gen_xsiexpqpf_f128_<mode> (operands[0], operands[1],
> +                                     operands[2]));
> +  DONE;
> +})
> +
> +(define_expand "extractf128_sig_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand")
> +  (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand")]
> +               UNSPEC_EXTRACTSIGIEEE))]
> +"TARGET_P9_VECTOR"
> +{
> +  emit_insn (gen_xsxsigqp_f128_<mode> (operands[0], operands[1]));
> +  DONE;
> +})

These define_expands are not necessary, the called gen functions from the
define_insns can be directly used as bif pattern in rs6000-builtins.def.

> +
>  (define_expand "vreplace_elt_<mode>"
>    [(set (match_operand:REPLACE_ELT 0 "register_operand")
>    (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
> @@ -5016,6 +5051,15 @@
>    "xsxexpqp %0,%1"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating point format
> +(define_insn "xsxexpqp_f128_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> +     (unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> +      UNSPEC_VSX_SXEXPDP))]
> +  "TARGET_P9_VECTOR"
> +  "xsxexpqp %0,%1"
> +  [(set_attr "type" "vecmove")])

I think you can just merge this into the existing xsxexpqp_<mode> by extending
with one mode like xsxexpqp_<IEEE128:mode>_<xxx:mode> for unspec:xxx.

xxx can be one mode iterator for DI and V2DI.

> +
>  ;; VSX Scalar Extract Exponent Double-Precision
>  (define_insn "xsxexpdp"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> @@ -5034,6 +5078,15 @@
>    "xsxsigqp %0,%1"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point 
> format
> +(define_insn "xsxsigqp_f128_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> +     (unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> +      UNSPEC_VSX_SXSIG))]
> +  "TARGET_P9_VECTOR"
> +  "xsxsigqp %0,%1"
> +  [(set_attr "type" "vecmove")])

Ditto, the mode V2DI looks unexpected, V1TI instead?

> +
>  ;; VSX Scalar Extract Significand Double-Precision
>  (define_insn "xsxsigdp"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> @@ -5054,6 +5107,17 @@
>    "xsiexpqp %0,%1,%2"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Insert Exponent IEEE 128-bit Floating point format
> +(define_insn "xsiexpqpf_f128_<mode>"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> +     (unspec:IEEE128
> +      [(match_operand:V1TI 1 "altivec_register_operand" "v")
> +       (match_operand:V2DI 2 "altivec_register_operand" "v")]
> +      UNSPEC_VSX_SIEXPQP))]
> +  "TARGET_P9_VECTOR"
> +  "xsiexpqp %0,%1,%2"
> +  [(set_attr "type" "vecmove")]> +
>  ;; VSX Scalar Insert Exponent Quad-Precision
>  (define_insn "xsiexpqp_<mode>"
>    [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index e426a2eb7d8..456e5a03d69 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -18511,6 +18511,34 @@ the FPSCR.  The instruction is a lower latency 
> version of the @code{mffs}
>  instruction.  If the @code{mffsl} instruction is not available, then the
>  builtin uses the older @code{mffs} instruction to read the FPSCR.
>  
> +@smallexample
> +@exdent vector unsigned long long int
> +@exdent __builtin_extractf128_exp (__ieee128);
> +@end smallexample
> +
> +The exponent from the IEEE 128-bit floating point value is returned in the
> +lower bits of vector element 1 on Little Endian systems.
> +
> +@smallexample
> +@exdent vector unsigned __int128
> +@exdent __builtin_extractf128_sig (__ieee128);
> +@end smallexample
> +
> +The significand from the IEEE 128-bit floating point value is returned in the
> +vector element 0 bits [111:0]. Bit 112 is set to 0 if the input value was 
> zero
> +or Denormal, 1 otherwise.  Bits [127:113] are set to zero.
> +
> +@smallexample
> +@exdent _ieee128
> +@exdent __builtin_insertf128_exp (vector unsigned __int128,
> +vector unsigned long long int);
> +@end smallexample
> +
> +The first argument contains the significand in bits [111:0] for the IEEE
> +128-bi floating point tresult and the sign bit [127] for the result.  The
> +second argument contains the exponenet bits for the result in bits [14:0] of
> +vector element 1.

Just put the new prototypes after the existing ones and extend the current 
related
documentation if needed.

> +
>  @node Basic PowerPC Built-in Functions Available on ISA 3.1
>  @subsubsection Basic PowerPC Built-in Functions Available on ISA 3.1
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> new file mode 100644
> index 00000000000..47e2c43962f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> @@ -0,0 +1,49 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */> +/* { 
> dg-require-effective-target power10_ok } */

It needs _hw for run and it doesn't require power10, so p9vector_hw.

> +/* { dg-options "-mdejagnu-cpu=power9" } */

You can refer to the conditions for the existing test cases on ieee128
extract_{exp,sig}, insert_..., the underlying insns of these newly
introduced are the same, the condition should be the same.

BR,
Kewen

> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +vector unsigned long long int
> +get_exponents (__ieee128 *p)
> +{
> +  __ieee128 source = *p;
> +
> +  return __builtin_extractf128_exp (source);
> +}
> +
> +int
> +main ()
> +{
> +  vector unsigned long long int result, exp_result;
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     __int128  val_int128;
> +  } source;
> +  
> +  exp_result[0] = 0x0ULL;
> +  exp_result[1] = 0x1234ULL;
> +  source.val_int128 = 0x923456789ABCDEF0ULL;
> +  source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
> +
> +  result = get_exponents (&source.val_ieee128);
> +
> +  if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
> +#if DEBUG
> +    {
> +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> +          result[0], exp_result[0]);
> +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> +          result[1], exp_result[1]);
> +    }
> +#else
> +    abort();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> new file mode 100644
> index 00000000000..4bd50ca2281
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> @@ -0,0 +1,56 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */
> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +vector unsigned __int128
> +get_significand (__ieee128 *p)
> +{
> +  __ieee128 source = *p;
> +
> +  return __builtin_extractf128_sig (source);
> +}
> +
> +int
> +main ()
> +{
> +  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
> +
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     unsigned long long int val_ull[2];
> +     unsigned __int128  val_uint128;
> +     __vector unsigned __int128  val_vuint128;
> +  } source, result, exp_result;
> +  
> +  /* Result is not zero or denormal.  */
> +  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
> +  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
> +  source.val_uint128 = 0x923456789ABCDEF0ULL;
> +  source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
> +
> +  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
> +     Denormal, 1 otherwise.  */
> +  result.val_vuint128 = get_significand (&source.val_ieee128);
> +
> +  if ((result.val_ull[0] != exp_result.val_ull[0])
> +      || (result.val_ull[1] != exp_result.val_ull[1]))
> +#if DEBUG
> +    {
> +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> +          result.val_ull[0], exp_result.val_ull[0]);
> +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> +          result.val_ull[1], exp_result.val_ull[1]);
> +    }
> +#else
> +    abort();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> new file mode 100644
> index 00000000000..5bd33ed5b7d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> @@ -0,0 +1,58 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */
> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +__ieee128
> +insert_exponent (__vector unsigned __int128 *significand_p,
> +              __vector unsigned long long int *exponent_p)
> +{
> +  __vector unsigned __int128 significand = *significand_p;
> +  __vector unsigned long long int exponent = *exponent_p;
> +
> +  return __builtin_insertf128_exp (significand, exponent);
> +}
> +
> +
> +int
> +main ()
> +{
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     __vector unsigned __int128 val_vint128;
> +     __vector unsigned long long int  val_vull;
> +  } result, exp_result, significand;
> +
> +  __vector unsigned long long int exponent;
> +
> +  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
> +  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive value */
> +
> +  exponent[0] = 0x5678;
> +  exponent[1] = 0x1234;
> +
> +  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
> +  exp_result.val_vull[1] = 0x123412345678ABCDULL;
> +
> +  result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
> +  
> +  if (result.val_ieee128 != exp_result.val_ieee128)
> +#ifdef DEBUG
> +    {
> +      printf("result.val_vull[0] = 0x%llx, exp_result.val_vull[0] = 
> 0x%llx\n",
> +          result.val_vull[0], exp_result.val_vull[0]);
> +      printf("result.val_vull[1] = 0x%llx, exp_result.val_vull[1] = 
> 0x%llx\n",
> +          result.val_vull[1], exp_result.val_vull[1]);
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +}

Re: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values

Reply via email to