Kewen:
On 5/13/24 19:54, Kewen.Lin wrote:
> Hi,
>
> on 2024/4/20 05:17, Carl Love wrote:
>> rs6000, add overloaded vec_sel with int128 arguments
>>
>> Extend the vec_sel built-in to take three signed/unsigned int128 arguments
>> and return a signed/unsigned int128 result.
>>
>> Extending the vec_sel built-in makes the existing buit-ins
>> __builtin_vsx_xxsel_1ti and __builtin_vsx_xxsel_1ti_uns obsolete. The
>> patch removes these built-ins.
>>
>> The patch adds documentation and test cases for the new overloaded vec_sel
>> built-ins.
>>
>> gcc/ChangeLog:
>> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_1ti,
>> __builtin_vsx_xxsel_1ti_uns): Remove built-in definitions.
>> * config/rs6000/rs6000-overload.def (vec_sel): Add new overloaded
>> definitions.
>> * doc/extend.texi: Add documentation for new vec_sel arguments.
>>
>> gcc/testsuite/ChangeLog:
>> * gcc.target/powerpc/vec_sel_runnable-int128.c: New test file.
>> ---
>> gcc/config/rs6000/rs6000-builtins.def | 6 --
>> gcc/config/rs6000/rs6000-overload.def | 4 +
>> gcc/doc/extend.texi | 14 ++++
>> .../powerpc/vec-sel-runnable-i128.c | 84 +++++++++++++++++++
>> 4 files changed, 102 insertions(+), 6 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c
>>
>> diff --git a/gcc/config/rs6000/rs6000-builtins.def
>> b/gcc/config/rs6000/rs6000-builtins.def
>> index d09e21a9151..46d2ae7b7cb 100644
>> --- a/gcc/config/rs6000/rs6000-builtins.def
>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>> @@ -1931,12 +1931,6 @@
>> const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc);
>> XXSEL_16QI_UNS vector_select_v16qi_uns {}
>>
>> - const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq);
>> - XXSEL_1TI vector_select_v1ti {}
>> -
>> - const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq);
>> - XXSEL_1TI_UNS vector_select_v1ti_uns {}
>> -
>> const vd __builtin_vsx_xxsel_2df (vd, vd, vd);
>> XXSEL_2DF vector_select_v2df {}
>>
>> diff --git a/gcc/config/rs6000/rs6000-overload.def
>> b/gcc/config/rs6000/rs6000-overload.def
>> index 68501c05289..5912c9452f4 100644
>> --- a/gcc/config/rs6000/rs6000-overload.def
>> +++ b/gcc/config/rs6000/rs6000-overload.def
>> @@ -3274,6 +3274,10 @@
>> VSEL_2DF VSEL_2DF_B
>> vd __builtin_vec_sel (vd, vd, vull);
>> VSEL_2DF VSEL_2DF_U
>> + vsq __builtin_vec_sel (vsq, vsq, vsq);
>> + VSEL_1TI VSEL_1TI_S
>> + vuq __builtin_vec_sel (vuq, vuq, vuq);
>> + VSEL_1TI_UNS VSEL_1TI_U
>> ; The following variants are deprecated.
>> vsll __builtin_vec_sel (vsll, vsll, vsll);
>> VSEL_2DI_B VSEL_2DI_S
>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>> index 64a43b55e2d..86b8e536dbe 100644
>> --- a/gcc/doc/extend.texi
>> +++ b/gcc/doc/extend.texi
>> @@ -23358,6 +23358,20 @@ The programmer is responsible for understanding the
>> endianness issues involved
>> with the first argument and the result.
>> @findex vec_replace_unaligned
>>
>> +Vector select
>> +
>> +@smallexample
>> +vector signed __int128 vec_sel (vector signed __int128,
>> + vector signed __int128, vector signed __int128);
>> +vector unsigned __int128 vec_sel (vector unsigned __int128,
>> + vector unsigned __int128, vector unsigned __int128);
>> +@end smallexample
>> +
>> +The overloaded built-in @code{vec_sel} with vector signed/unsigned __int128
>> +arguments and returns a vector selecting bits from the two source vectors
>> based
>> +on the values of the third input vector. This built-in is an extension of
>> the
>> +@code{vec_sel} built-in documented in the PVIPR.
>> +
>
> Why did you place this in a section for ISA 3.1 (Power10)? It doesn't really
> require this support. The used instance VSEL_1TI and VSEL_1TI_UNS are placed
> in altivec stanza, so it looks that we should put it under the section
> "PowerPC AltiVec Built-in Functions on ISA 2.05". And since it's an extension
> of @code{vec_sel} documented in the PVIPR, I prefer to just mention it's "an
> extension of the @code{vec_sel} built-in documented in the PVIPR" and omitting
> the description to avoid possible slightly different wording.
Honestly, at this point in time I don't remember why I put it there. It has
been too long since I created the patch. That said, the test case requires
Power 10 do to the comparison check using built-in vec_all_eq but that is
another issue. The built-in generates the xxsel instruction that is an ISA
2.06 instruction. So, I would say it should to into the ISA 2.06 section. I
moved it to the ISA 2.06 section.
For consistency with the previous patches/feedback, the descriptions are being
dropped and replaced with the instance being a new extension of the built-in
that is documented in the PVIPR.
>
>> Vector Shift Left Double Bit Immediate
>> @smallexample
>> @exdent vector signed char vec_sldb (vector signed char, vector signed char,
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c
>> b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c
>> new file mode 100644
>> index 00000000000..58eb383e8c3
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c
>> @@ -0,0 +1,84 @@
>> +/* { dg-do run { target power10_hw }} */
>> +/* { dg-require-effective-target int128 } */
>> +/* { dg-require-effective-target power10_hw } */
>
> As mentioned above, this doesn't require power10, you can specify vmx_hw.
> (btw removing { target power10_hw } on dg-do run line).
>
As mentioned, the testcase uses the vec_all_eq which requires Power 10. So, I
rewrote the test case
to check the result value and expected result value byte by byte so the test
will run on Power 7 (ISA 2.06). The new version of the test case compiles and
runs with the command:
gcc -g -mcpu=power7 vec-sel-runnable-i128.c -o vec-sel-runnable-i128
>> +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */
>
> s/-mdejagnu-cpu=power10/-maltivec/
> s/-save-temps//
>
Removed the Power 10, using vmx_hw instead.
>> +
>> +
>> +#include <altivec.h>
>> +
>> +
>> +#define DEBUG 0
>> +
>> +#if DEBUG
>> +#include <stdio.h>
>> +void print_i128 (unsigned __int128 val)
>> +{
>> + printf(" 0x%016llx%016llx",
>> + (unsigned long long)(val >> 64),
>> + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF));
>> +}
>> +#endif
>
> Did you really test this debugging work as expected?
> With my experience when making r14-10011-g6e62ede7aaccc6,
> this debugging doesn't work and the way to initialize
> a vector int128 variable can easily suffer from endianness
> issue, so please double check this and test it on BE as well.
Yes, It seemed to work with this version of gcc. I retested the patch per your
request. I set DEBUG to 1, changed the expected result and ran it on LE:
./vec-sel-runnable-i128
ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match
expected output.
Result: 0x000000000000000032147658ba9cfed0
Expected result: 0x000000000000000032147658ba9cfed1
I compiled the patch series on BE and ran the test there:
./vec-sel-runnable-i128
ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match
expected output.
Result: 0x000000000000000032147658ba9cfed0
Expected result: 0x000000000000000032147658ba9cfed1
I am not sure exactly what issue you are concerned about with the print
statement. But, we could implement the print statement as follows if you
prefer:
void print_i128 (unsigned __int128 val)
{
int i;
union convert_u {
unsigned __int128 val;
char bytes[16];
} convert;
convert.val = val;
#if __LITTLE_ENDIAN__
for (i = 15; i >= 0; i--)
#else
for (i = 0; i < 16; i++)
#endif
printf(" 0x");
printf("%02x", convert.bytes[i]);
}
which gives the same result (on LE:
./vec-sel-runnable-i128
ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match
expected output.
Result: 0x000000000000000032147658ba9cfed0
Expected result: 0x000000000000000032147658ba9cfed1
and on BE:
./vec-sel-runnable-i128
ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match
expected output.
Result: 0x000000000000000032147658ba9cfed0
Expected result: 0x000000000000000032147658ba9cfed1
Sounds like there was some issue that you noticed on r14-10011-g6e62ede7aaccc6.
The new version of
print_i128 should be functionally equivalent but perhaps is "safer"?
Let me know if you would prefer I use the new version of the print_i128
function or if the orginal is OK?
Thanks.
Carl
>
> BR,
> Kewen
>
>> +
>> +extern void abort (void);
>> +
>> +int
>> +main (int argc, char *argv [])
>> +{
>> + vector signed __int128 src_va_s128;
>> + vector signed __int128 src_vb_s128;
>> + vector signed __int128 src_vc_s128;
>> + vector signed __int128 vresult_s128;
>> + vector signed __int128 expected_vresult_s128;
>> +
>> + vector unsigned __int128 src_va_u128;
>> + vector unsigned __int128 src_vb_u128;
>> + vector unsigned __int128 src_vc_u128;
>> + vector unsigned __int128 vresult_u128;
>> + vector unsigned __int128 expected_vresult_u128;
>> +
>> + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0};
>> + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210};
>> + src_vc_s128 = (vector signed __int128) {0x3333333333333333};
>> + expected_vresult_s128 = (vector signed __int128) {0x32147658ba9cfed0};
>> +
>> + /* Signed arguments. */
>> + vresult_s128 = vec_sel (src_va_s128, src_vb_s128, src_vc_s128);
>> +
>> + if (!vec_all_eq (vresult_s128, expected_vresult_s128))
>> +#if DEBUG
>> + {
>> + printf ("ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128)
>> result does not match expected output.\n");
>> + printf (" Result: ");
>> + print_i128 ((unsigned __int128) vresult_s128);
>> + printf ("\n Expected result: ");
>> + print_i128 ((unsigned __int128) expected_vresult_s128);
>> + printf ("\n");
>> + }
>> +#else
>> + abort ();
>> +#endif
>> +
>> + src_va_u128 = (vector unsigned __int128) {0x13579ACE02468BDF};
>> + src_vb_u128 = (vector unsigned __int128) {0xA987654FEDCB3210};
>> + src_vc_u128 = (vector unsigned __int128) {0x5555555555555555};
>> + expected_vresult_u128 = (vector unsigned __int128) {0x32147658ba9cfed0};
>> +
>> + /* Unigned arguments. */
>> + vresult_u128 = vec_sel (src_va_u128, src_vb_u128, src_vc_u128);
>> +
>> + if (!vec_all_eq (vresult_u128, expected_vresult_u128))
>> +#if DEBUG
>> + {
>> + printf ("ERROR, vec_sel (src_va_u128, src_vb_u128, src_vc_u128)
>> result does not match expected output.\n");
>> + printf (" Result: ");
>> + print_i128 ((unsigned __int128) vresult_u128);
>> + printf ("\n Expected result: ");
>> + print_i128 ((unsigned __int128) expected_vresult_u128);
>> + printf ("\n");
>> + }
>> +#else
>> + abort ();
>> +#endif
>> +
>> + return 0;
>> +}
>