On Tue, 2023-06-13 at 11:10 +0800, Kewen.Lin wrote: > Hi Carl, > > on 2023/6/8 23:21, Carl Love wrote: > > Kewen, GCC maintainers: > > > > Version 3, was able to get the overloaded version of > > scalar_insert_exp > > to work and the change to xsxexpqp_f128_<mode> define instruction > > to > > work with the suggestions from Kewen. > > > > Version 2, I have addressed the various comments from Kewen. I had > > issues with adding an additional overloaded version of > > scalar_insert_exp with vector arguments. The overload > > infrastructure > > didn't work with a mix of scalar and vector arguments. I did > > rename > > the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp > > make > > it similar to the existing builtin. I also wasn't able to get the > > suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to > > work so > > I left the two simpler definitiions. > > > > The patch add three new builtins to extract the significand and > > exponent of an IEEE float 128-bit value where the builtin argument > > is a > > vector. Additionally, a builtin to insert the exponent into an > > IEEE > > float 128-bit vector argument is added. These builtins were > > requested > > since there is no clean and optimal way to transfer between a > > vector > > and a scalar IEEE 128 bit value. > > > > The patch has been tested on Power 10 with no regressions. Please > > let > > me know if the patch is acceptable or not. Thanks. > > > > Carl > > > > --------------------------------------- > > rs6000: Add builtins for IEEE 128-bit floating point values > > > > Add support for the following builtins: > > > > __vector unsigned long long int > > __builtin_scalar_extract_exp_to_vec (__ieee128); > > __vector unsigned __int128 > > __builtin_scalar_extract_sig_to_vec (__ieee128); > > __ieee128 scalar_insert_exp (__vector unsigned __int128, > > __vector unsigned long long);
Fixed commit log, removed __builtin_ from the names per comments from Kewen below. > > > > These builtins were requesed since there is no clean and performant > > way to > > transfer a value from a vector type and scalar type, despite the > > fact > > that they both reside in vector registers. > > > > gcc/ > > * config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin): > > Rename CCDE_FOR_xsxexpqp_tf to CODE_FOR_xsxexpqp_tf_di. > > Rename CODE_FOR_xsxexpqp_kf to CODE_FOR_xsxexpqp_kf_di. > > * config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp, > > __builtin_extractf128_sig, __builtin_insertf128_exp): Add new > > builtin definitions. > > Rename xsxexpqp_kf to xsxexpqp_kf_di. > > * config/rs6000/rs6000-c.cc > > (altivec_resolve_overloaded_builtin): > > Add else if for MODE_VECTOR_INT. Update comments. > > * config/rs6000/rs6000-overload.def > > (__builtin_vec_scalar_insert_exp): Add new overload definition > > with > > vector arguments. > > * config/vsx.md (VSEEQP_DI): New mode iterator. > > Rename define_insn xsxexpqp_<mode> to > > sxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>. > > (xsxsigqp_f128_<mode>, xsiexpqpf_f128_<mode>): Add define_insn > > for > > new builtins. > > * doc/extend.texi (__builtin_extractf128_exp, > > __builtin_extractf128_sig): Add documentation for new builtins. > > (scalar_insert_exp): Add new overloaded builtin definition. > > > > gcc/testsuite/ > > * gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case. > > * gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case. > > * gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case. > > --- > > gcc/config/rs6000/rs6000-builtin.cc | 4 +- > > gcc/config/rs6000/rs6000-builtins.def | 11 ++- > > gcc/config/rs6000/rs6000-c.cc | 10 +- > > gcc/config/rs6000/rs6000-overload.def | 2 + > > gcc/config/rs6000/vsx.md | 28 +++++- > > gcc/doc/extend.texi | 9 ++ > > .../powerpc/bfp/extract-exp-ieee128.c | 50 ++++++++++ > > .../powerpc/bfp/extract-sig-ieee128.c | 57 ++++++++++++ > > .../powerpc/bfp/insert-exp-ieee128.c | 91 > > +++++++++++++++++++ > > 9 files changed, 253 insertions(+), 9 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract- > > exp-ieee128.c > > create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract- > > sig-ieee128.c > > create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert- > > exp-ieee128.c > > > > diff --git a/gcc/config/rs6000/rs6000-builtin.cc > > b/gcc/config/rs6000/rs6000-builtin.cc > > index 534698e7d3e..d99f0ae5dda 100644 > > --- a/gcc/config/rs6000/rs6000-builtin.cc > > +++ b/gcc/config/rs6000/rs6000-builtin.cc > > @@ -3326,8 +3326,8 @@ rs6000_expand_builtin (tree exp, rtx target, > > rtx /* subtarget */, > > case CODE_FOR_fmakf4_odd: > > icode = CODE_FOR_fmatf4_odd; > > break; > > - case CODE_FOR_xsxexpqp_kf: > > - icode = CODE_FOR_xsxexpqp_tf; > > + case CODE_FOR_xsxexpqp_kf_di: > > + icode = CODE_FOR_xsxexpqp_tf_di; > > The others newly added such as CODE_FOR_xsxexpqp_kf_v2di should be > handled here as well. OK, added all of the new cases. > > > break; > > case CODE_FOR_xsxsigqp_kf: > > icode = CODE_FOR_xsxsigqp_tf; > > diff --git a/gcc/config/rs6000/rs6000-builtins.def > > b/gcc/config/rs6000/rs6000-builtins.def > > index 638d0bc72ca..dcd4a393906 100644 > > --- a/gcc/config/rs6000/rs6000-builtins.def > > +++ b/gcc/config/rs6000/rs6000-builtins.def > > @@ -2901,8 +2901,14 @@ > > fpmath double __builtin_truncf128_round_to_odd (_Float128); > > TRUNCF128_ODD trunckfdf2_odd {} > > > > + vull __builtin_scalar_extract_exp_to_vec (_Float128); > > + EEXPKF xsxexpqp_kf_v2di {} > > + > > + vuq __builtin_scalar_extract_sig_to_vec (_Float128); > > + ESIGKF xsxsigqp_f128_kf {} > > + > > const signed long long __builtin_vsx_scalar_extract_expq > > (_Float128); > > - VSEEQP xsxexpqp_kf {} > > + VSEEQP xsxexpqp_kf_di {} > > > > const signed __int128 __builtin_vsx_scalar_extract_sigq > > (_Float128); > > VSESQP xsxsigqp_kf {} > > @@ -2915,6 +2921,9 @@ > > unsigned > > long long); > > VSIEQPF xsiexpqpf_kf {} > > > > + const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull); > > + VSIEQPV xsiexpqpf_f128_kf {} > > + > > const signed int __builtin_vsx_scalar_test_data_class_qp > > (_Float128, \ > > const > > int<7>); > > VSTDCQP xststdcqp_kf {} > > diff --git a/gcc/config/rs6000/rs6000-c.cc > > b/gcc/config/rs6000/rs6000-c.cc > > index 8555174d36e..11060f697db 100644 > > --- a/gcc/config/rs6000/rs6000-c.cc > > +++ b/gcc/config/rs6000/rs6000-c.cc > > @@ -1929,11 +1929,15 @@ altivec_resolve_overloaded_builtin > > (location_t loc, tree fndecl, > > 128-bit variant of built-in function. */ > > if (GET_MODE_PRECISION (arg1_mode) > 64) > > { > > - /* If first argument is of float variety, choose variant > > - that expects __ieee128 argument. Otherwise, expect > > - __int128 argument. */ > > + /* If first argument is of float variety, choose the > > variant that > > + expects __ieee128 argument. If the first argument is > > vector > > + int, choose the variant that expects vector unsigned > > + __int128 argument. Otherwise, expect scalar __int128 > > argument. > > + */ > > if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT) > > instance_code = RS6000_BIF_VSIEQPF; > > + else if (GET_MODE_CLASS (arg1_mode) == MODE_VECTOR_INT) > > + instance_code = RS6000_BIF_VSIEQPV; > > else > > instance_code = RS6000_BIF_VSIEQP; > > } > > diff --git a/gcc/config/rs6000/rs6000-overload.def > > b/gcc/config/rs6000/rs6000-overload.def > > index c582490c084..84743114c8e 100644 > > --- a/gcc/config/rs6000/rs6000-overload.def > > +++ b/gcc/config/rs6000/rs6000-overload.def > > @@ -4515,6 +4515,8 @@ > > VSIEQP > > _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned > > long long); > > VSIEQPF > > + _Float128 __builtin_vec_scalar_insert_exp (vuq, vull); > > + VSIEQPV > > > > [VEC_VSTDC, scalar_test_data_class, > > __builtin_vec_scalar_test_data_class] > > unsigned int __builtin_vec_scalar_test_data_class (float, const > > int); > > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > > index 7d845df5c2d..92a34dec1be 100644 > > --- a/gcc/config/rs6000/vsx.md > > +++ b/gcc/config/rs6000/vsx.md > > @@ -396,6 +396,7 @@ > > V4SF > > V2DF > > V2DI]) > > +(define_mode_iterator VSEEQP_DI [V2DI DI]) > > > > (define_mode_attr VM3_char [(V2DI "d") > > (V4SI "w") > > @@ -5008,9 +5009,10 @@ > > ;; ISA 3.0 Binary Floating-Point Support > > > > ;; VSX Scalar Extract Exponent Quad-Precision > > -(define_insn "xsxexpqp_<mode>" > > - [(set (match_operand:DI 0 "altivec_register_operand" "=v") > > - (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" > > "v")] > > +(define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>" > > + [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand" > > "=v") > > + (unspec:VSEEQP_DI > > + [(match_operand:IEEE128 1 "altivec_register_operand" "v")] > > UNSPEC_VSX_SXEXPDP))] > > "TARGET_P9_VECTOR" > > "xsxexpqp %0,%1" > > @@ -5034,6 +5036,15 @@ > > "xsxsigqp %0,%1" > > [(set_attr "type" "vecmove")]) > > > > +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating > > point format > > +(define_insn "xsxsigqp_f128_<mode>" > > + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") > > + (unspec:V1TI [(match_operand:IEEE128 1 > > "altivec_register_operand" "v")] > > + UNSPEC_VSX_SXSIG))] > > + "TARGET_P9_VECTOR" > > + "xsxsigqp %0,%1" > > + [(set_attr "type" "vecmove")]) > > Like above xsxexpqp_<mode>, you can also simplify this with existing > xsxsigqp_<mode>, > the bif pattern should be updated accordingly. Fixed > > > + > > ;; VSX Scalar Extract Significand Double-Precision > > (define_insn "xsxsigdp" > > [(set (match_operand:DI 0 "register_operand" "=r") > > @@ -5054,6 +5065,17 @@ > > "xsiexpqp %0,%1,%2" > > [(set_attr "type" "vecmove")]) > > > > +;; VSX Insert Exponent IEEE 128-bit Floating point format > > +(define_insn "xsiexpqpf_f128_<mode>" > > + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") > > + (unspec:IEEE128 > > + [(match_operand:V1TI 1 "altivec_register_operand" "v") > > + (match_operand:V2DI 2 "altivec_register_operand" "v")] > > + UNSPEC_VSX_SIEXPQP))] > > + "TARGET_P9_VECTOR" > > + "xsiexpqp %0,%1,%2" > > + [(set_attr "type" "vecmove")]) > > Ditto, with existing xsiexpqp_<mode>. Fixed. > > > + > > ;; VSX Scalar Insert Exponent Quad-Precision > > (define_insn "xsiexpqp_<mode>" > > [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") > > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > > index e426a2eb7d8..625df24b62f 100644 > > --- a/gcc/doc/extend.texi > > +++ b/gcc/doc/extend.texi > > @@ -19724,6 +19724,10 @@ double scalar_insert_exp (double > > significand, unsigned long long int exponent); > > ieee_128 scalar_insert_exp (unsigned __int128 significand, > > unsigned long long int exponent); > > ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long > > long int exponent); > > +vector ieee_128 scalar_insert_exp (vector unsigned __int128, > > + vector unsigned long long); > > +vector unsigned long long __builtin_scalar_extract_exp_to_vec > > (ieee_128); > > +vector unsigned __int128 __builtin_scalar_extract_sig_to_vec > > (ieee_128); > > s/__builtin_// to align with the others. Renamed the builtins as requested. Fixed the names thru out the patch. > > > > > int scalar_cmp_exp_gt (double arg1, double arg2); > > int scalar_cmp_exp_lt (double arg1, double arg2); > > @@ -19777,6 +19781,11 @@ The significand and exponent components of > > the result are composed of > > the least significant 15 bits of the @code{exponent} argument and > > the > > least significant 112 bits of the @code{significand} argument > > respectively. > > > > +The @code{__builtin_scalar_extract_exp_to_vec}, > > +and @code{__builtin_scalar_extract_sig_to_vec} are similar to > > +@code{scalar_extract_exp}, @code{scalar_extract_sig} except they > > operate > > +on vector arguments. > > s/__builtin_// too, the description doesn't match the implementation, > the given > arguments are actually the same, but "they return vector type of > value"? Fixed up the description. > > > + > > The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt}, > > @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} > > built-in > > functions return a non-zero value if @code{arg1} is greater than, > > less > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp- > > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp- > > ieee128.c > > new file mode 100644 > > index 00000000000..39981f0a274 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c > > In this same testsuite, we have some other test cases also testing > ieee128, > but they just were named with number, so could you also name this > with number > instead of "-ieee128"? Renamed the three tests replacing -ieee128 with -1. > > > @@ -0,0 +1,50 @@ > > +/* { dg-do run { target { powerpc*-*-* } } } */ > > +/* { dg-require-effective-target lp64 } */ > > +/* { dg-require-effective-target p9vector_hw } */ > > +/* { dg-options "-mdejagnu-cpu=power9" } */ > > There are some other test cases checking the expected assembly insn, > you can add an extra option "-save-temps" and use dg-final > scan-assembler-times to check just with this test case. > Added check for the generated instruction, did this in all three new test files. > > + > > +#include <altivec.h> > > +#include <stdlib.h> > > + > > +#if DEBUG > > +#include <stdio.h> > > +#endif > > + > > +vector unsigned long long int > > +get_exponents (__ieee128 *p) > > +{ > > + __ieee128 source = *p; > > + > > + return __builtin_scalar_extract_exp_to_vec (source); > > s/__builtin_// Fixed > > > +} > > + > > +int > > +main () > > +{ > > + vector unsigned long long int result, exp_result; > > + union conv128_t > > + { > > + __ieee128 val_ieee128; > > + __int128 val_int128; > > + } source; > > + > > + exp_result[0] = 0x0ULL; > > + exp_result[1] = 0x1234ULL; > > + source.val_int128 = 0x923456789ABCDEF0ULL; > > + source.val_int128 = (source.val_int128 << 64) | > > 0x123456789ABCDEFULL; > > + > > + result = get_exponents (&source.val_ieee128); > > + > > + if ((result[0] != exp_result[0]) || (result[1] != > > exp_result[1])) > > +#if DEBUG > > + { > > + printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n", > > + result[0], exp_result[0]); > > + printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n", > > + result[1], exp_result[1]); > > + } > > +#else > > + abort(); > > +#endif > > + return 0; > > +} > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig- > > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig- > > ieee128.c > > new file mode 100644 > > index 00000000000..f7b3aedb832 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c > > @@ -0,0 +1,57 @@ > > +/* { dg-do run { target { powerpc*-*-* } } } */ > > +/* { dg-require-effective-target lp64 } */ > > +/* { dg-require-effective-target p9vector_hw } */ > > +/* { dg-options "-mdejagnu-cpu=power9" } */ > > + > > +#include <altivec.h> > > +#include <stdlib.h> > > + > > +#if DEBUG > > +#include <stdio.h> > > +#endif > > + > > +vector unsigned __int128 > > +get_significand (__ieee128 *p) > > +{ > > + __ieee128 source = *p; > > + > > + return __builtin_scalar_extract_sig_to_vec(source); > > +} > > Ditto. Fixed. > > > + > > +int > > +main () > > +{ > > + #define NOT_ZERO_OR_DENORMAL 0x1000000000000 > > + > > + union conv128_t > > + { > > + __ieee128 val_ieee128; > > + unsigned long long int val_ull[2]; > > + unsigned __int128 val_uint128; > > + __vector unsigned __int128 val_vuint128; > > Using vector instead of __vector would better match what users > normally adopt. > Changed __vector to vector in all new test files. > Same for some other places. > > > + } source, result, exp_result; > > + > > + /* Result is not zero or denormal. */ > > + exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | > > NOT_ZERO_OR_DENORMAL; > > + exp_result.val_ull[0] = 0x123456789ABCDEFULL; > > + source.val_uint128 = 0x923456789ABCDEF0ULL; > > + source.val_uint128 = (source.val_uint128 << 64) | > > 0x123456789ABCDEFULL; > > + > > + /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was > > zero or > > + Denormal, 1 otherwise. */ > > + result.val_vuint128 = get_significand (&source.val_ieee128); > > + > > + if ((result.val_ull[0] != exp_result.val_ull[0]) > > + || (result.val_ull[1] != exp_result.val_ull[1])) > > +#if DEBUG > > + { > > + printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n", > > + result.val_ull[0], exp_result.val_ull[0]); > > + printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n", > > + result.val_ull[1], exp_result.val_ull[1]); > > + } > > +#else > > + abort(); > > +#endif > > + return 0; > > +} > > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp- > > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp- > > ieee128.c > > new file mode 100644 > > index 00000000000..402dc48ad9d > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c > > Ditto for this case. Fixed. > > BR, > Kewen > > > @@ -0,0 +1,91 @@ > > +/* { dg-do run { target { powerpc*-*-* } } } */ > > +/* { dg-require-effective-target lp64 } */ > > +/* { dg-require-effective-target p9vector_hw } */ > > +/* { dg-options "-mdejagnu-cpu=power9" } */ > > + > > +#include <altivec.h> > > +#include <stdlib.h> > > + > > +#ifdef DEBUG > > +#include <stdio.h> > > +#endif > > + > > +__ieee128 > > +insert_exponent (__vector unsigned __int128 *significand_p, > > + __vector unsigned long long int *exponent_p) > > +{ > > + __vector unsigned __int128 significand = *significand_p; > > + __vector unsigned long long int exponent = *exponent_p; > > + > > + return scalar_insert_exp (significand, exponent); > > +} > > + > > +__ieee128 > > +insert_exponent2 (unsigned __int128 significand, > > + unsigned long long int exponent) > > +{ > > + return scalar_insert_exp (significand, exponent); > > +} > > + > > +int > > +main () > > +{ > > + __ieee128 val_ieee128, result_ieee128, exp_result_ieee128; > > + unsigned __int128 val_int128; > > + unsigned long long int val_ull; > > + union conv128_t > > + { > > + __ieee128 val_ieee128; > > + __vector unsigned __int128 val_vint128; > > + __vector unsigned long long int val_vull; > > + } result, exp_result, significand; > > + > > + __vector unsigned long long int exponent; > > + > > + /* Scalar argument test */ > > + val_ieee128 = 0xFEDCBA9876543210ULL; > > + val_ull = 0x5678; > > + exp_result.val_vull[1] = 0x5678000000000000ULL; > > + exp_result.val_vull[0] = 0xfedcba9876543210; > > + result_ieee128 = insert_exponent2 (val_ieee128, val_ull); > > + > > + if (result_ieee128 != exp_result.val_ieee128) > > +#ifdef DEBUG > > + { > > + result.val_ieee128 = result_ieee128; > > + printf("Scalar argument ERROR:\n"); > > + printf(" val_ieee128 = 0x%llx %llx\n", > > + result.val_vull[1], result.val_vull[0]); > > + printf(" val_ieee128 = 0x%llx %llx\n", > > + exp_result.val_vull[1], exp_result.val_vull[0]); > > + } > > +#else > > + abort (); > > +#endif > > + > > + /* Vector argument test */ > > + significand.val_vull[0] = 0xFEDCBA9876543210ULL; > > + significand.val_vull[1] = 0x7FFF12345678ABCDULL; /* positive > > value */ > > + > > + exponent[0] = 0x5678; > > + exponent[1] = 0x1234; > > + > > + exp_result.val_vull[0] = 0xFEDCBA9876543210ULL; > > + exp_result.val_vull[1] = 0x123412345678ABCDULL; > > + > > + result.val_ieee128 = insert_exponent(&significand.val_vint128, > > &exponent); > > + > > + if (result.val_ieee128 != exp_result.val_ieee128) > > +#ifdef DEBUG > > + { > > + printf("Vector argument ERROR:\n"); > > + printf(" result = 0x%llx %llx\n", > > + result.val_vull[1], result.val_vull[0]); > > + printf(" exp_result = 0x%llx %llx\n", > > + exp_result.val_vull[1], exp_result.val_vull[0]); > > + } > > +#else > > + abort (); > > +#endif > > + > > +}