Changes from previous revision: Rename new `check_effective_target' and tests to make their intent clearer.
* lib/target-supports.exp: For new `check_effective_target', s/vect_dotprod_twoway/vect_dotprod_hisi/. * One test is renamed to `vect-dotprod-conv-optab.c' to emphasize aim of checking the new dotprod convert optab allows autovectorization of a given datatype to distinct target data-types. * The aarch64 runtime-correctness check has had the mode supported for its two-way dot-product added to the test name, resulting in the new `vect-dotprod-twoway-hisi.c' name. ------ Given the novel treatment of the dot product optab as a conversion, we are now able to target different relationships between output modes and input modes. This is made clearer by way of example. Previously, on AArch64, the following loop was vectorizable: uint32_t udot4(int n, uint8_t* data) { uint32_t sum = 0; for (int i=0; i<n; i+=1) sum += data[i] * data[i]; return sum; } while the following was not: uint32_t udot2(int n, uint16_t* data) { uint32_t sum = 0; for (int i=0; i<n; i+=1) sum += data[i] * data[i]; return sum; } Under the new treatment of the dot product optab, they are both now vectorizable. This adds the relevant target-agnostic check to ensure this behavior in the autovectorizer, gated behind the new check_effective_target `vect_dotprod_hisi' as well a runtime check targeting aarch64. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_dotprod_hisi): New. * gcc.dg/vect/vect-dotprod-conv-optab.c: Likewise. * gcc.target/aarch64/vect-dotprod-twoway-hisi.c: Likewise. --- .../gcc.dg/vect/vect-dotprod-conv-optab.c | 41 ++++++++++++ .../aarch64/vect-dotprod-twoway-hisi.c | 66 +++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 9 +++ 3 files changed, 116 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c new file mode 100644 index 00000000000..63e6c95480d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_dotprod_hisi } */ +/* Ensure that, given the same input datatype, both the two-way and four-way + dot products are autovectorized, with the correct operation then selected + based on the distinct output types. */ +#include <stdint.h> + +uint32_t udot4(int n, uint8_t* data) { + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t sdot4(int n, int8_t* data) { + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +uint32_t udot2(int n, uint16_t* data) { + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t sdot2(int n, int16_t* data) { + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c new file mode 100644 index 00000000000..0490faa2c94 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_dotprod_hisi } */ +/* { dg-options "-static -O3 -ftree-vectorize -fdump-tree-vect-details -save-temps" } */ +/* Ensure runtime correctness in the autovectorized two-way dot product operations. */ + +#include <stdint.h> +#include <stdlib.h> +#pragma GCC target "+sme2" + +uint32_t +udot2 (int n, uint16_t* data) __arm_streaming +{ + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t +sdot2 (int n, int16_t* data) __arm_streaming +{ + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int +main () +{ + + uint16_t u_input_nil[] = { [0 ... 3] = 0 }; + uint16_t u_input_min[] = { [0 ... 3] = 1 }; + uint16_t u_input_max[] = { [0 ... 3] = 32767}; + + uint32_t u_nil_dotprod = udot2 (4, u_input_nil); + uint32_t u_min_dotprod = udot2 (4, u_input_min); + uint32_t u_max_dotprod = udot2 (4, u_input_max); + + if (u_nil_dotprod != 0 + || u_min_dotprod != 4 + || u_max_dotprod != 4294705156) + abort (); + + int16_t s_input_nil[] = { [0 ... 3] = 0 }; + int16_t s_input_min[] = { [0 ... 3] = -23170 }; + int16_t s_input_max[] = { [0 ... 3] = 23170 }; + + int32_t s_nil_dotprod = sdot2 (4, s_input_nil); + int32_t s_min_dotprod = sdot2 (4, s_input_min); + int32_t s_max_dotprod = sdot2 (4, s_input_max); + + if (s_nil_dotprod != 0 + || s_min_dotprod != 2147395600 + || s_max_dotprod != 2147395600) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 46 "vect" } } */ +/* { dg-final { scan-assembler "\[ \t\]udot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]sdot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 11ba77ca404..ebbc2fb8015 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4258,6 +4258,15 @@ proc check_effective_target_vect_int { } { }}] } +# Return 1 if the target supports two-way dot products on inputs of hi mode +# producing si outputs, 0 otherwise. + +proc check_effective_target_vect_dotprod_hisi { } { + return [check_cached_effective_target_indexed aarch64_sme2 { + expr { [check_effective_target_aarch64_sme2] + }}] +} + # Return 1 if the target supports vectorization of early breaks, # 0 otherwise. # -- 2.34.1