Hi All, This adds testcases to test for auto-vect detection of the new sign differing dot product.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * doc/sourcebuild.texi (arm_v8_2a_i8mm_neon_hw): Document. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_arm_v8_2a_imm8_neon_ok_nocache, check_effective_target_arm_v8_2a_i8mm_neon_hw, check_effective_target_vect_usdot_qi): New. * gcc.dg/vect/vect-reduc-dot-10.c: New test. * gcc.dg/vect/vect-reduc-dot-11.c: New test. * gcc.dg/vect/vect-reduc-dot-12.c: New test. * gcc.dg/vect/vect-reduc-dot-13.c: New test. * gcc.dg/vect/vect-reduc-dot-14.c: New test. * gcc.dg/vect/vect-reduc-dot-15.c: New test. * gcc.dg/vect/vect-reduc-dot-16.c: New test. * gcc.dg/vect/vect-reduc-dot-9.c: New test. --- inline copy of patch -- diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index b0001247795947c9dcab1a14884ecd585976dfdd..0034ac9d86b26e6674d71090b9d04b6148f99e17 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1672,6 +1672,10 @@ Target supports a vector dot-product of @code{signed char}. @item vect_udot_qi Target supports a vector dot-product of @code{unsigned char}. +@item vect_usdot_qi +Target supports a vector dot-product where one operand of the multiply is +@code{signed char} and the other of @code{unsigned char}. + @item vect_sdot_hi Target supports a vector dot-product of @code{signed short}. @@ -1947,6 +1951,11 @@ ARM target supports executing instructions from ARMv8.2-A with the Dot Product extension. Some multilibs may be incompatible with these options. Implies arm_v8_2a_dotprod_neon_ok. +@item arm_v8_2a_i8mm_neon_hw +ARM target supports executing instructions from ARMv8.2-A with the 8-bit +Matrix Multiply extension. Some multilibs may be incompatible with these +options. Implies arm_v8_2a_i8mm_ok. + @item arm_fp16fml_neon_ok @anchor{arm_fp16fml_neon_ok} ARM target supports extensions to generate the @code{VFMAL} and @code{VFMLS} diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c new file mode 100644 index 0000000000000000000000000000000000000000..7ce86965ea97d37c43d96b4d2271df667dcb2aae --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c new file mode 100644 index 0000000000000000000000000000000000000000..0f7cbbb87ef028f166366aea55bc4ef49d2f8e9b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c new file mode 100644 index 0000000000000000000000000000000000000000..08412614fc67045d3067b5b55ba032d297595237 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c new file mode 100644 index 0000000000000000000000000000000000000000..7ee0f45f64296442204ee13d5f880f4b7716fb85 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c new file mode 100644 index 0000000000000000000000000000000000000000..2de1434528b87f0c32c54150b16791f3f2a469b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c new file mode 100644 index 0000000000000000000000000000000000000000..dc48f95a32bf76c54a906ee81ddee99b16aea84a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c new file mode 100644 index 0000000000000000000000000000000000000000..aec628789366673321aea88c60316a68fe16cbc5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c new file mode 100644 index 0000000000000000000000000000000000000000..cbbeedec3bfd0810a8ce8036e6670585d9334924 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N], b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 short) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ad323107f2ec5d55a77214beca5e4135643528b4..db9bd605ab4c838f65667fa616da334a171d9dfb 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5240,6 +5240,36 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } { return 0; } +# Return 1 if the target supports ARMv8.2 Adv.SIMD imm8 +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_imm8_neon_ok_nocache { } { + global et_arm_v8_2a_imm8_neon_flags + set et_arm_v8_2a_imm8_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_imm8_neon_ok object { + #include <stdint.h> + #if !defined (__ARM_FEATURE_MATMUL_INT8) + #error "__ARM_FEATURE_MATMUL_INT8 not defined" + #endif + } "$flags -march=armv8.2-a+imm8"] } { + set et_arm_v8_2a_imm8_neon_flags "$flags -march=armv8.2-a+imm8" + return 1 + } + } + + return 0; +} + # Return 1 if the target supports ARMv8.1-M MVE # instructions, 0 otherwise. The test is valid for ARM. # Record the command line options needed. @@ -5667,6 +5697,43 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } { } [add_options_for_arm_v8_2a_dotprod_neon ""]] } +# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2 +# with the i8mm extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_i8mm_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_i8mm_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_i8mm_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + + uint32x2_t results = {0,0}; + uint8x8_t a = {1,1,1,1,2,2,2,2}; + int8x8_t b = {2,2,2,2,3,3,3,3}; + + #ifdef __ARM_ARCH_ISA_A64 + asm ("usdot %0.2s, %1.8b, %2.8b" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + + #else + asm ("vusdot.u8 %P0, %P1, %P2" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + + return (vget_lane_u32 (results, 0) == 8 + && vget_lane_u32 (results, 1) == 24) ? 1 : 0; + } + } [add_options_for_arm_v8_2a_i8mm ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -7022,6 +7089,19 @@ proc check_effective_target_vect_udot_qi { } { && [et-is-effective-target mips_msa]) }}] } +# Return 1 if the target plus current options supports a vector +# dot-product where one operand of the multiply is signed char +# and the other unsigned chars, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_usdot_qi { } { + return [check_cached_effective_target_indexed vect_usdot_qi { + expr { [istarget aarch64*-*-*] + || [istarget arm*-*-*] }}] +} + + # Return 1 if the target plus current options supports a vector # dot-product of signed shorts, 0 otherwise. # --
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index b0001247795947c9dcab1a14884ecd585976dfdd..0034ac9d86b26e6674d71090b9d04b6148f99e17 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1672,6 +1672,10 @@ Target supports a vector dot-product of @code{signed char}. @item vect_udot_qi Target supports a vector dot-product of @code{unsigned char}. +@item vect_usdot_qi +Target supports a vector dot-product where one operand of the multiply is +@code{signed char} and the other of @code{unsigned char}. + @item vect_sdot_hi Target supports a vector dot-product of @code{signed short}. @@ -1947,6 +1951,11 @@ ARM target supports executing instructions from ARMv8.2-A with the Dot Product extension. Some multilibs may be incompatible with these options. Implies arm_v8_2a_dotprod_neon_ok. +@item arm_v8_2a_i8mm_neon_hw +ARM target supports executing instructions from ARMv8.2-A with the 8-bit +Matrix Multiply extension. Some multilibs may be incompatible with these +options. Implies arm_v8_2a_i8mm_ok. + @item arm_fp16fml_neon_ok @anchor{arm_fp16fml_neon_ok} ARM target supports extensions to generate the @code{VFMAL} and @code{VFMLS} diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c new file mode 100644 index 0000000000000000000000000000000000000000..7ce86965ea97d37c43d96b4d2271df667dcb2aae --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c new file mode 100644 index 0000000000000000000000000000000000000000..0f7cbbb87ef028f166366aea55bc4ef49d2f8e9b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c new file mode 100644 index 0000000000000000000000000000000000000000..08412614fc67045d3067b5b55ba032d297595237 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c new file mode 100644 index 0000000000000000000000000000000000000000..7ee0f45f64296442204ee13d5f880f4b7716fb85 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c new file mode 100644 index 0000000000000000000000000000000000000000..2de1434528b87f0c32c54150b16791f3f2a469b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c new file mode 100644 index 0000000000000000000000000000000000000000..dc48f95a32bf76c54a906ee81ddee99b16aea84a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c new file mode 100644 index 0000000000000000000000000000000000000000..aec628789366673321aea88c60316a68fe16cbc5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c new file mode 100644 index 0000000000000000000000000000000000000000..cbbeedec3bfd0810a8ce8036e6670585d9334924 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N], b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 short) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ad323107f2ec5d55a77214beca5e4135643528b4..db9bd605ab4c838f65667fa616da334a171d9dfb 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5240,6 +5240,36 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } { return 0; } +# Return 1 if the target supports ARMv8.2 Adv.SIMD imm8 +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_imm8_neon_ok_nocache { } { + global et_arm_v8_2a_imm8_neon_flags + set et_arm_v8_2a_imm8_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_imm8_neon_ok object { + #include <stdint.h> + #if !defined (__ARM_FEATURE_MATMUL_INT8) + #error "__ARM_FEATURE_MATMUL_INT8 not defined" + #endif + } "$flags -march=armv8.2-a+imm8"] } { + set et_arm_v8_2a_imm8_neon_flags "$flags -march=armv8.2-a+imm8" + return 1 + } + } + + return 0; +} + # Return 1 if the target supports ARMv8.1-M MVE # instructions, 0 otherwise. The test is valid for ARM. # Record the command line options needed. @@ -5667,6 +5697,43 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } { } [add_options_for_arm_v8_2a_dotprod_neon ""]] } +# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2 +# with the i8mm extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_i8mm_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_i8mm_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_i8mm_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + + uint32x2_t results = {0,0}; + uint8x8_t a = {1,1,1,1,2,2,2,2}; + int8x8_t b = {2,2,2,2,3,3,3,3}; + + #ifdef __ARM_ARCH_ISA_A64 + asm ("usdot %0.2s, %1.8b, %2.8b" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + + #else + asm ("vusdot.u8 %P0, %P1, %P2" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + + return (vget_lane_u32 (results, 0) == 8 + && vget_lane_u32 (results, 1) == 24) ? 1 : 0; + } + } [add_options_for_arm_v8_2a_i8mm ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -7022,6 +7089,19 @@ proc check_effective_target_vect_udot_qi { } { && [et-is-effective-target mips_msa]) }}] } +# Return 1 if the target plus current options supports a vector +# dot-product where one operand of the multiply is signed char +# and the other unsigned chars, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_usdot_qi { } { + return [check_cached_effective_target_indexed vect_usdot_qi { + expr { [istarget aarch64*-*-*] + || [istarget arm*-*-*] }}] +} + + # Return 1 if the target plus current options supports a vector # dot-product of signed shorts, 0 otherwise. #