Will, Segher: This patch fixes the order of the argument in the vec_rlmi and vec_rlnm builtins. The patch also adds a new test cases to verify the fix.
The patch has been tested on powerpc64-linux instead (Power 8 BE) powerpc64-linux instead (Power 9 LE) powerpc64-linux instead (Power 10 LE) Please let me know if the patch is acceptable for mainline. Carl Love ---------------------------------------------------------------------- gcc/ChangeLog 2021-01-12 Carl Love <c...@us.ibm.com> gcc/ * config/rs6000/altivec.md (altivec_vrl<VI_char>mi): Fix bug in argument generation. gcc/testsuite/ gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c: New runnable test case. gcc.target/powerpc/vec-rlmi-rlnm.c: Update scan assembler times for xxlor instruction. --- gcc/config/rs6000/altivec.md | 6 +- .../powerpc/check-builtin-vec_rlnm-runnable.c | 233 ++++++++++++++++++ .../gcc.target/powerpc/vec-rlmi-rlnm.c | 2 +- 3 files changed, 237 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index fc19a8fc807..4d08cca2228 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1982,12 +1982,12 @@ (define_insn "altivec_vrl<VI_char>mi" [(set (match_operand:VIlong 0 "register_operand" "=v") - (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0") - (match_operand:VIlong 2 "register_operand" "v") + (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v") + (match_operand:VIlong 2 "register_operand" "0") (match_operand:VIlong 3 "register_operand" "v")] UNSPEC_VRLMI))] "TARGET_P9_VECTOR" - "vrl<VI_char>mi %0,%2,%3" + "vrl<VI_char>mi %0,%1,%3" [(set_attr "type" "veclogical")]) (define_insn "altivec_vrl<VI_char>nm" diff --git a/gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c b/gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c new file mode 100644 index 00000000000..b97bc519c87 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c @@ -0,0 +1,233 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9 -save-temps" } */ + +/* Verify the vec_rlm and vec_rlmi builtins works correctly. */ +/* { dg-final { scan-assembler-times {\mvrldmi\M} 1 } } */ + +#include <altivec.h> + +#define DEBUG 1 + +#if DEBUG +#include <stdio.h> +#include <stdlib.h> +#endif + +void abort (void); + +int main () +{ + int i; + + vector unsigned int vec_arg1_int, vec_arg2_int, vec_arg3_int; + vector unsigned int vec_result_int, vec_expected_result_int; + + vector unsigned long long int vec_arg1_di, vec_arg2_di, vec_arg3_di; + vector unsigned long long int vec_result_di, vec_expected_result_di; + + unsigned int mask_begin, mask_end, shift; + unsigned long long int mask; + +/* Check vec int version of vec_rlmi builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 4; i++) { + vec_arg1_int[i] = 0x12345678 + i*0x11111111; + vec_arg2_int[i] = 0xA1B1CDEF; + vec_arg3_int[i] = mask_begin << 16 | mask_end << 8 | shift; + + /* do rotate */ + vec_expected_result_int[i] = ( vec_arg2_int[i] & ~mask) + | ((vec_arg1_int[i] << shift) | (vec_arg1_int[i] >> (32-shift))) & mask; + + } + + /* vec_rlmi(arg1, arg2, arg3) + result - rotate each element of arg1 left and inserting it into arg2 + element of arg2 based on the mask specified in arg3. The shift, mask + start and end is specified in arg3. */ + vec_result_int = vec_rlmi (vec_arg1_int, vec_arg2_int, vec_arg3_int); + + for (i = 0; i < 4; i++) { + if (vec_result_int[i] != vec_expected_result_int[i]) +#if DEBUG + printf("ERROR: i = %d, vec_rlmi int result 0x%x, does not match " + "expected result 0x%x\n", i, vec_result_int[i], + vec_expected_result_int[i]); +#else + abort(); +#endif + } + +/* Check vec long long int version of vec_rlmi builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 2; i++) { + vec_arg1_di[i] = 0x1234567800000000 + i*0x11111111; + vec_arg2_di[i] = 0xA1B1C1D1E1F12345; + vec_arg3_di[i] = mask_begin << 16 | mask_end << 8 | shift; + + /* do rotate */ + vec_expected_result_di[i] = ( vec_arg2_di[i] & ~mask) + | ((vec_arg1_di[i] << shift) | (vec_arg1_di[i] >> (64-shift))) & mask; + } + + /* vec_rlmi(arg1, arg2, arg3) + result - rotate each element of arg1 left and inserting it into arg2 + element of arg2 based on the mask specified in arg3. The shift, mask + start and end is specified in arg3. */ + vec_result_di = vec_rlmi (vec_arg1_di, vec_arg2_di, vec_arg3_di); + + for (i = 0; i < 2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) +#if DEBUG + printf("ERROR: i = %d, vec_rlmi int result 0x%x, does not match " + "expected result 0x%x\n", i, vec_result_di[i], + vec_expected_result_di[i]); +#else + abort(); +#endif + } + + /* Check vec int version of vec_rlnm builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 4; i++) { + vec_arg1_int[i] = 0x12345678 + i*0x11111111; + vec_arg2_int[i] = shift; + vec_arg3_int[i] = mask_begin << 8 | mask_end; + vec_expected_result_int[i] = (vec_arg1_int[i] << shift) & mask; + } + + /* vec_rlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left by shift in element of arg2. + Then AND with mask whose start/stop bits are specified in element of + arg3. */ + vec_result_int = vec_rlnm (vec_arg1_int, vec_arg2_int, vec_arg3_int); + for (i = 0; i < 4; i++) { + if (vec_result_int[i] != vec_expected_result_int[i]) +#if DEBUG + printf("ERROR: vec_rlnm, i = %d, int result 0x%x does not match " + "expected result 0x%x\n", i, vec_result_int[i], + vec_expected_result_int[i]); +#else + abort(); +#endif + } + +/* Check vec long int version of builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 20; + + for (i = 0; i < 63; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x8000000000000000ULL >> i; + + for (i = 0; i < 2; i++) { + vec_arg1_di[i] = 0x123456789ABCDE00ULL + i*0x1111111111111111ULL; + vec_arg2_di[i] = shift; + vec_arg3_di[i] = mask_begin << 8 | mask_end; + vec_expected_result_di[i] = (vec_arg1_di[i] << shift) & mask; + } + + vec_result_di = vec_rlnm (vec_arg1_di, vec_arg2_di, vec_arg3_di); + + for (i = 0; i < 2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) +#if DEBUG + printf("ERROR: vec_rlnm, i = %d, long long int result 0x%llx does not " + "match expected result 0x%llx\n", i, vec_result_di[i], + vec_expected_result_di[i]); +#else + abort(); +#endif + } + + /* Check vec int version of vec_vrlnm builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 4; i++) { + vec_arg1_int[i] = 0x12345678 + i*0x11111111; + vec_arg2_int[i] = mask_begin << 16 | mask_end << 8 | shift; + vec_expected_result_int[i] = (vec_arg1_int[i] << shift) & mask; + } + + /* vec_vrlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left then AND with mask. The mask + start, stop bits is specified in the second argument. The shift amount + is also specified in the second argument. */ + vec_result_int = vec_vrlnm (vec_arg1_int, vec_arg2_int); + + for (i = 0; i < 4; i++) { + if (vec_result_int[i] != vec_expected_result_int[i]) +#if DEBUG + printf("ERROR: vec_vrlnm, i = %d, int result 0x%x does not match " + "expected result 0x%x\n", i, vec_result_int[i], + vec_expected_result_int[i]); +#else + abort(); +#endif + } + +/* Check vec long int version of vec_vrlnm builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 20; + + for (i = 0; i < 63; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x8000000000000000ULL >> i; + + for (i = 0; i < 2; i++) { + vec_arg1_di[i] = 0x123456789ABCDE00ULL + i*0x1111111111111111ULL; + vec_arg2_di[i] = mask_begin << 16 | mask_end << 8 | shift; + vec_expected_result_di[i] = (vec_arg1_di[i] << shift) & mask; + } + + vec_result_di = vec_vrlnm (vec_arg1_di, vec_arg2_di); + + for (i = 0; i < 2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) +#if DEBUG + printf("ERROR: vec_vrlnm, i = %d, long long int result 0x%llx does not " + "match expected result 0x%llx\n", i, vec_result_di[i], + vec_expected_result_di[i]); +#else + abort(); +#endif + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c b/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c index 1e7d7390c5b..b0f26c8f4cb 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c @@ -62,6 +62,6 @@ rlnm_test_2 (vector unsigned long long x, vector unsigned long long y, /* { dg-final { scan-assembler-times "vextsb2d" 1 } } */ /* { dg-final { scan-assembler-times "vslw" 1 } } */ /* { dg-final { scan-assembler-times "vsld" 1 } } */ -/* { dg-final { scan-assembler-times "xxlor" 3 } } */ +/* { dg-final { scan-assembler-times "xxlor" 5 } } */ /* { dg-final { scan-assembler-times "vrlwnm" 2 } } */ /* { dg-final { scan-assembler-times "vrldnm" 2 } } */ -- 2.27.0