gcc/ChangeLog: * config/i386/sse.md (VI1_AVX512VNNIBW): New. (VI2_AVX10_2): Ditto. (sdot_prod<mode>): Add AVX10.2 to auto vectorize and combine 512 bit part. (udot_prod<mode>): Ditto. (sdot_prodv64qi): Removed. (udot_prodv64qi): Ditto. (usdot_prod<mode>): Add AVX10.2 to auto vectorize. (udot_prod<mode>): Ditto.
gcc/testsuite/ChangeLog: * gcc.target/i386/vnniint16-auto-vectorize-2.c: Only define TEST when not defined. * gcc.target/i386/vnniint8-auto-vectorize-2.c: Ditto. * gcc.target/i386/vnniint16-auto-vectorize-3.c: New test. * gcc.target/i386/vnniint16-auto-vectorize-4.c: Ditto. * gcc.target/i386/vnniint8-auto-vectorize-3.c: Ditto. * gcc.target/i386/vnniint8-auto-vectorize-4.c: Ditto. --- gcc/config/i386/sse.md | 93 +++++-------------- .../i386/vnniint16-auto-vectorize-2.c | 11 ++- .../i386/vnniint16-auto-vectorize-3.c | 6 ++ .../i386/vnniint16-auto-vectorize-4.c | 15 +++ .../i386/vnniint8-auto-vectorize-2.c | 12 ++- .../i386/vnniint8-auto-vectorize-3.c | 6 ++ .../i386/vnniint8-auto-vectorize-4.c | 15 +++ 7 files changed, 80 insertions(+), 78 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-3.c create mode 100644 gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-4.c create mode 100644 gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-3.c create mode 100644 gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-4.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index da91d39cf8e..442ac93afa2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -610,6 +610,10 @@ (define_mode_iterator VI1_AVX512VNNI [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI]) +(define_mode_iterator VI1_AVX512VNNIBW + [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512") + (V32QI "TARGET_AVX2") V16QI]) + (define_mode_iterator VI12_256_512_AVX512VL [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")]) @@ -627,6 +631,9 @@ [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) +(define_mode_iterator VI2_AVX10_2 + [(V32HI "TARGET_AVX10_2_512") V16HI V8HI]) + (define_mode_iterator VI4_AVX [(V8SI "TARGET_AVX") V4SI]) @@ -31232,12 +31239,13 @@ (define_expand "sdot_prod<mode>" [(match_operand:<ssedvecmode> 0 "register_operand") - (match_operand:VI1_AVX2 1 "register_operand") - (match_operand:VI1_AVX2 2 "register_operand") + (match_operand:VI1_AVX512VNNIBW 1 "register_operand") + (match_operand:VI1_AVX512VNNIBW 2 "register_operand") (match_operand:<ssedvecmode> 3 "register_operand")] "TARGET_SSE2" { - if (TARGET_AVXVNNIINT8) + if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512) + || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256))) { operands[1] = lowpart_subreg (<ssedvecmode>mode, force_reg (<MODE>mode, operands[1]), @@ -31276,44 +31284,15 @@ DONE; }) -(define_expand "sdot_prodv64qi" - [(match_operand:V16SI 0 "register_operand") - (match_operand:V64QI 1 "register_operand") - (match_operand:V64QI 2 "register_operand") - (match_operand:V16SI 3 "register_operand")] - "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512" -{ - /* Emulate with vpdpwssd. */ - rtx op1_lo = gen_reg_rtx (V32HImode); - rtx op1_hi = gen_reg_rtx (V32HImode); - rtx op2_lo = gen_reg_rtx (V32HImode); - rtx op2_hi = gen_reg_rtx (V32HImode); - - emit_insn (gen_vec_unpacks_lo_v64qi (op1_lo, operands[1])); - emit_insn (gen_vec_unpacks_lo_v64qi (op2_lo, operands[2])); - emit_insn (gen_vec_unpacks_hi_v64qi (op1_hi, operands[1])); - emit_insn (gen_vec_unpacks_hi_v64qi (op2_hi, operands[2])); - - rtx res1 = gen_reg_rtx (V16SImode); - rtx res2 = gen_reg_rtx (V16SImode); - rtx sum = gen_reg_rtx (V16SImode); - - emit_move_insn (sum, CONST0_RTX (V16SImode)); - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); - - emit_insn (gen_addv16si3 (operands[0], res1, res2)); - DONE; -}) - (define_expand "udot_prod<mode>" [(match_operand:<ssedvecmode> 0 "register_operand") - (match_operand:VI1_AVX2 1 "register_operand") - (match_operand:VI1_AVX2 2 "register_operand") + (match_operand:VI1_AVX512VNNIBW 1 "register_operand") + (match_operand:VI1_AVX512VNNIBW 2 "register_operand") (match_operand:<ssedvecmode> 3 "register_operand")] "TARGET_SSE2" { - if (TARGET_AVXVNNIINT8) + if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512) + || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256))) { operands[1] = lowpart_subreg (<ssedvecmode>mode, force_reg (<MODE>mode, operands[1]), @@ -31352,36 +31331,6 @@ DONE; }) -(define_expand "udot_prodv64qi" - [(match_operand:V16SI 0 "register_operand") - (match_operand:V64QI 1 "register_operand") - (match_operand:V64QI 2 "register_operand") - (match_operand:V16SI 3 "register_operand")] - "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512" -{ - /* Emulate with vpdpwssd. */ - rtx op1_lo = gen_reg_rtx (V32HImode); - rtx op1_hi = gen_reg_rtx (V32HImode); - rtx op2_lo = gen_reg_rtx (V32HImode); - rtx op2_hi = gen_reg_rtx (V32HImode); - - emit_insn (gen_vec_unpacku_lo_v64qi (op1_lo, operands[1])); - emit_insn (gen_vec_unpacku_lo_v64qi (op2_lo, operands[2])); - emit_insn (gen_vec_unpacku_hi_v64qi (op1_hi, operands[1])); - emit_insn (gen_vec_unpacku_hi_v64qi (op2_hi, operands[2])); - - rtx res1 = gen_reg_rtx (V16SImode); - rtx res2 = gen_reg_rtx (V16SImode); - rtx sum = gen_reg_rtx (V16SImode); - - emit_move_insn (sum, CONST0_RTX (V16SImode)); - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); - - emit_insn (gen_addv16si3 (operands[0], res1, res2)); - DONE; -}) - (define_insn "vpdp<vpdotprodtype>_<mode>" [(set (match_operand:VI4_AVX 0 "register_operand" "=v") (unspec:VI4_AVX @@ -31757,10 +31706,10 @@ (define_expand "usdot_prod<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX2 1 "register_operand") - (match_operand:VI2_AVX2 2 "register_operand") + (match_operand:VI2_AVX10_2 1 "register_operand") + (match_operand:VI2_AVX10_2 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] - "TARGET_AVXVNNIINT16" + "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256" { operands[1] = lowpart_subreg (<sseunpackmode>mode, force_reg (<MODE>mode, operands[1]), @@ -31775,10 +31724,10 @@ (define_expand "udot_prod<mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX2 1 "register_operand") - (match_operand:VI2_AVX2 2 "register_operand") + (match_operand:VI2_AVX10_2 1 "register_operand") + (match_operand:VI2_AVX10_2 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] - "TARGET_AVXVNNIINT16" + "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256" { operands[1] = lowpart_subreg (<sseunpackmode>mode, force_reg (<MODE>mode, operands[1]), diff --git a/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-2.c b/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-2.c index 90dc0eade7e..1bd1dfbd3a3 100644 --- a/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-2.c +++ b/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-2.c @@ -2,19 +2,24 @@ /* { dg-options "-O2 -mavxvnniint16" } */ /* { dg-require-effective-target avxvnniint16 } */ +#ifndef AVX10_2 #define AVXVNNIINT16 +#endif + #ifndef CHECK #define CHECK "avx-check.h" #endif +#include CHECK +#include "vnniint16-auto-vectorize-1.c" + #ifndef TEST #define TEST avx_test #endif -#include CHECK -#include "vnniint16-auto-vectorize-1.c" - +#ifndef N #define N 256 +#endif short a_i16[N]; unsigned short b_u16[N], c_u16[N], d_u16[N]; diff --git a/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-3.c b/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-3.c new file mode 100644 index 00000000000..85dd80e6d1b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2 -O2" } */ +/* { dg-final { scan-assembler "vpdpwusd\t" } } */ +/* { dg-final { scan-assembler "vpdpwuud\t" } } */ + +#include "vnniint16-auto-vectorize-1.c" diff --git a/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-4.c b/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-4.c new file mode 100644 index 00000000000..36b76987b50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-4.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef CHECK +#define CHECK "avx512f-check.h" +#endif + +#define N 512 + +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#define AVX512F_LEN 512 +#include "vnniint16-auto-vectorize-2.c" diff --git a/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-2.c b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-2.c index 99853e6c3b7..5a791f0f59e 100644 --- a/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-2.c +++ b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-2.c @@ -2,19 +2,25 @@ /* { dg-options "-O2 -mavxvnniint8" } */ /* { dg-require-effective-target avxvnniint8 } */ +#ifndef AVX10_2 #define AVXVNNIINT8 +#endif + #ifndef CHECK #define CHECK "avx-check.h" #endif +#include CHECK +#include "vnniint8-auto-vectorize-1.c" + #ifndef TEST #define TEST avx_test #endif -#include CHECK -#include "vnniint8-auto-vectorize-1.c" - +#ifndef N #define N 256 +#endif + char a_i8[N], b_i8[N]; unsigned char c_u8[N], d_u8[N]; int i8_exp, i8_ref; diff --git a/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-3.c b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-3.c new file mode 100644 index 00000000000..bbb49e81b69 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2 -O2" } */ +/* { dg-final { scan-assembler "vpdpbssd\t" } } */ +/* { dg-final { scan-assembler "vpdpbuud\t" } } */ + +#include "vnniint8-auto-vectorize-1.c" diff --git a/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-4.c b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-4.c new file mode 100644 index 00000000000..41098b1abcd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-4.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef CHECK +#define CHECK "avx512f-check.h" +#endif + +#define N 512 + +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#define AVX512F_LEN 512 +#include "vnniint8-auto-vectorize-2.c" -- 2.31.1