From: Hongyu Wang <hongyu.w...@intel.com> gcc/ChangeLog
* config.gcc: Add avx10_2mediaintrin.h and avx10_2-512mediaintrin.h. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-builtins.cc (def_builtin): Handle shared builtins between AVXVNNIINT8 and AVX10.2. * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): Ditto. * config/i386/immintrin.h: Include avx10_2mediaintrin.h and avx10_2-512mediaintrin.h * config/i386/sse.md: (VI4_AVX10_2): New. (vpdp<vpdotprodtype>_<mode>): Add AVX10_2_256. (vpdp<vpdotprodtype>_v16si): New define_insn. (vpdp<vpdotprodtype>_<mode>_mask): Ditto. (*vpdp<vpdotprodtype>_<mode>_maskz): Ditto. (vpdp<vpdotprodtype>_<mode>_maskz): New expander. * config/i386/avx10_2_512mediaintrin.h: New file. * config/i386/avx10_2mediaintrin.h: Ditto. gcc/testsuite/ChangeLog * g++.dg/other/i386-2.C: Add -mavx10.2-512. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/avx512f-helper.h: Reuse AVX512F macros for AVX10. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * lib/target-supports.exp (check_effective_target_avx10_2): New. (check_effective_target_avx10_2_512): Ditto. * gcc.target/i386/avx10-check.h: New. * gcc.target/i386/avx10-helper.h: New. * gcc.target/i386/avx10_2-builtin-1.c: Ditto. * gcc.target/i386/avx10_2-512-media-1.c: Ditto. * gcc.target/i386/avx10_2-media-1.c: Ditto.. * gcc.target/i386/avxvnniint8-builtin.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbssd-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbssds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbsud-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbsuds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbuud-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbuuds-2.c: Ditto. * gcc.target/i386/avx10_2-vpdpbssd-2.c: Ditto. * gcc.target/i386/avx10_2-vpdpbssds-2.c: Ditto. * gcc.target/i386/avx10_2-vpdpbsud-2.c: Ditto. * gcc.target/i386/avx10_2-vpdpbsuds-2.c: Ditto. * gcc.target/i386/avx10_2-vpdpbuud-2.c: Ditto. * gcc.target/i386/avx10_2-vpdpbuuds-2.c: Ditto. Co-authored-by: Haochen Jiang <haochen.ji...@intel.com> --- gcc/config.gcc | 3 +- gcc/config/i386/avx10_2-512mediaintrin.h | 234 +++++++++++ gcc/config/i386/avx10_2mediaintrin.h | 367 ++++++++++++++++++ gcc/config/i386/i386-builtin.def | 68 +++- gcc/config/i386/i386-builtins.cc | 10 +- gcc/config/i386/i386-expand.cc | 3 + gcc/config/i386/immintrin.h | 4 + gcc/config/i386/sse.md | 66 +++- gcc/testsuite/gcc.target/i386/avx10-check.h | 61 +++ gcc/testsuite/gcc.target/i386/avx10-helper.h | 23 ++ .../gcc.target/i386/avx10-os-support.h | 23 ++ .../gcc.target/i386/avx10_2-512-media-1.c | 52 +++ .../gcc.target/i386/avx10_2-512-vpdpbssd-2.c | 71 ++++ .../gcc.target/i386/avx10_2-512-vpdpbssds-2.c | 74 ++++ .../gcc.target/i386/avx10_2-512-vpdpbsud-2.c | 71 ++++ .../gcc.target/i386/avx10_2-512-vpdpbsuds-2.c | 74 ++++ .../gcc.target/i386/avx10_2-512-vpdpbuud-2.c | 70 ++++ .../gcc.target/i386/avx10_2-512-vpdpbuuds-2.c | 73 ++++ .../gcc.target/i386/avx10_2-builtin-1.c | 8 + .../gcc.target/i386/avx10_2-media-1.c | 96 +++++ .../gcc.target/i386/avx10_2-vpdpbssd-2.c | 16 + .../gcc.target/i386/avx10_2-vpdpbssds-2.c | 16 + .../gcc.target/i386/avx10_2-vpdpbsud-2.c | 16 + .../gcc.target/i386/avx10_2-vpdpbsuds-2.c | 16 + .../gcc.target/i386/avx10_2-vpdpbuud-2.c | 16 + .../gcc.target/i386/avx10_2-vpdpbuuds-2.c | 16 + .../gcc.target/i386/avx512f-helper.h | 6 +- .../gcc.target/i386/avxvnniint8-builtin.c | 8 + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 4 + gcc/testsuite/lib/target-supports.exp | 36 ++ 30 files changed, 1577 insertions(+), 24 deletions(-) create mode 100644 gcc/config/i386/avx10_2-512mediaintrin.h create mode 100644 gcc/config/i386/avx10_2mediaintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10-check.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10-helper.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10-os-support.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-media-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsuds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuuds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-builtin-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-media-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsuds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuuds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-builtin.c diff --git a/gcc/config.gcc b/gcc/config.gcc index 2c0f4518638..22353f2d69e 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -452,7 +452,8 @@ i[34567]86-*-* | x86_64-*-*) cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h sm3intrin.h sha512intrin.h sm4intrin.h - usermsrintrin.h avx10_2roundingintrin.h" + usermsrintrin.h avx10_2roundingintrin.h + avx10_2mediaintrin.h avx10_2-512mediaintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h new file mode 100644 index 00000000000..02d826b24cd --- /dev/null +++ b/gcc/config/i386/avx10_2-512mediaintrin.h @@ -0,0 +1,234 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED +#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED + +#if !defined(__AVX10_2_512__) +#pragma GCC push_options +#pragma GCC target("avx10.2-512") +#define __DISABLE_AVX10_2_512__ +#endif /* __AVX10_2_512__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +#ifdef __DISABLE_AVX10_2_512__ +#undef __DISABLE_AVX10_2_512__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX10_2_512__ */ + +#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h new file mode 100644 index 00000000000..e668af62e36 --- /dev/null +++ b/gcc/config/i386/avx10_2mediaintrin.h @@ -0,0 +1,367 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2mediaintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2MEDIAINTRIN_H_INCLUDED +#define _AVX10_2MEDIAINTRIN_H_INCLUDED + +#if !defined(__AVX10_2_256__) +#pragma GCC push_options +#pragma GCC target("avx10.2-256") +#define __DISABLE_AVX10_2_256__ +#endif /* __AVX10_2_256__ */ + +#define _mm_dpbssd_epi32(W, A, B) \ + (__m128i) __builtin_ia32_vpdpbssd128 ((__v4si) (W), (__v4si) (A), (__v4si) (B)) + +#define _mm_dpbssds_epi32(W, A, B) \ + (__m128i) __builtin_ia32_vpdpbssds128 ((__v4si) (W), (__v4si) (A), (__v4si) (B)) + +#define _mm_dpbsud_epi32(W, A, B) \ + (__m128i) __builtin_ia32_vpdpbsud128 ((__v4si) (W), (__v4si) (A), (__v4si) (B)) + +#define _mm_dpbsuds_epi32(W, A, B) \ + (__m128i) __builtin_ia32_vpdpbsuds128 ((__v4si) (W), (__v4si) (A), (__v4si) (B)) + +#define _mm_dpbuud_epi32(W, A, B) \ + (__m128i) __builtin_ia32_vpdpbuud128 ((__v4si) (W), (__v4si) (A), (__v4si) (B)) + +#define _mm_dpbuuds_epi32(W, A, B) \ + (__m128i) __builtin_ia32_vpdpbuuds128 ((__v4si) (W), (__v4si) (A), (__v4si) (B)) + +#define _mm256_dpbssd_epi32(W, A, B) \ + (__m256i) __builtin_ia32_vpdpbssd256 ((__v8si) (W), (__v8si) (A), (__v8si) (B)) + +#define _mm256_dpbssds_epi32(W, A, B) \ + (__m256i) __builtin_ia32_vpdpbssds256 ((__v8si) (W), (__v8si) (A), (__v8si) (B)) + +#define _mm256_dpbsud_epi32(W, A, B) \ + (__m256i) __builtin_ia32_vpdpbsud256 ((__v8si) (W), (__v8si) (A), (__v8si) (B)) + +#define _mm256_dpbsuds_epi32(W, A, B) \ + (__m256i) __builtin_ia32_vpdpbsuds256 ((__v8si) (W), (__v8si) (A), (__v8si) (B)) + +#define _mm256_dpbuud_epi32(W, A, B) \ + (__m256i) __builtin_ia32_vpdpbuud256 ((__v8si) (W), (__v8si) (A), (__v8si) (B)) + +#define _mm256_dpbuuds_epi32(W, A, B) \ + (__m256i) __builtin_ia32_vpdpbuuds256 ((__v8si) (W), (__v8si) (A), (__v8si) (B)) + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_dpbssd_epi32 (__m128i __W, __mmask8 __U, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbssd_v4si_mask ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_dpbssd_epi32 (__mmask8 __U, __m128i __W, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbssd_v4si_maskz ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_dpbssds_epi32 (__m128i __W, __mmask8 __U, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbssds_v4si_mask ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_dpbssds_epi32 (__mmask8 __U, __m128i __W, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbssds_v4si_maskz ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_dpbsud_epi32 (__m128i __W, __mmask8 __U, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbsud_v4si_mask ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_dpbsud_epi32 (__mmask8 __U, __m128i __W, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbsud_v4si_maskz ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_dpbsuds_epi32 (__m128i __W, __mmask8 __U, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbsuds_v4si_mask ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_dpbsuds_epi32 (__mmask8 __U, __m128i __W, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbsuds_v4si_maskz ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_dpbuud_epi32 (__m128i __W, __mmask8 __U, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbuud_v4si_mask ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_dpbuud_epi32 (__mmask8 __U, __m128i __W, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbuud_v4si_maskz ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_dpbuuds_epi32 (__m128i __W, __mmask8 __U, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbuuds_v4si_mask ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_dpbuuds_epi32 (__mmask8 __U, __m128i __W, + __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbuuds_v4si_maskz ((__v4si) __W, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_dpbssd_epi32 (__m256i __W, __mmask8 __U, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbssd_v8si_mask ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_dpbssd_epi32 (__mmask8 __U, __m256i __W, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbssd_v8si_maskz ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_dpbssds_epi32 (__m256i __W, __mmask8 __U, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbssds_v8si_mask ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_dpbssds_epi32 (__mmask8 __U, __m256i __W, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbssds_v8si_maskz ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_dpbsud_epi32 (__m256i __W, __mmask8 __U, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbsud_v8si_mask ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_dpbsud_epi32 (__mmask8 __U, __m256i __W, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbsud_v8si_maskz ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_dpbsuds_epi32 (__m256i __W, __mmask8 __U, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbsuds_v8si_mask ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_dpbsuds_epi32 (__mmask8 __U, __m256i __W, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbsuds_v8si_maskz ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_dpbuud_epi32 (__m256i __W, __mmask8 __U, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbuud_v8si_mask ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_dpbuud_epi32 (__mmask8 __U, __m256i __W, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbuud_v8si_maskz ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_dpbuuds_epi32 (__m256i __W, __mmask8 __U, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbuuds_v8si_mask ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W, + __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbuuds_v8si_maskz ((__v8si) __W, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + + +#ifdef __DISABLE_AVX10_2_256__ +#undef __DISABLE_AVX10_2_256__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX10_2_256__ */ + +#endif /* __AVX10_2MEDIAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 416944f8b5b..5bd9aabdc52 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2748,18 +2748,18 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpws BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) /* AVXVNNIINT8 */ -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) -BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) /* AVXVNNIINT16 */ BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16, CODE_FOR_vpdpwusd_v8si, "__builtin_ia32_vpdpwusd256", IX86_BUILTIN_VPDPWUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) @@ -3020,6 +3020,50 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmulc_v16hf, "__builtin_ia32_vfmulcph256", IX86_BUILTIN_VFMULCPH_V16HF, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmulc_v16hf_mask, "__builtin_ia32_vfmulcph256_mask", IX86_BUILTIN_VFMULCPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI) +/* AVX10.2. */ +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssd_v16si, "__builtin_ia32_vpdpbssd512", IX86_BUILTIN_VPDPBSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssds_v16si, "__builtin_ia32_vpdpbssds512", IX86_BUILTIN_VPDPBSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsud_v16si, "__builtin_ia32_vpdpbsud512", IX86_BUILTIN_VPDPBSUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsuds_v16si, "__builtin_ia32_vpdpbsuds512", IX86_BUILTIN_VPDPBSUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuud_v16si, "__builtin_ia32_vpdpbuud512", IX86_BUILTIN_VPDPBUUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuuds_v16si, "__builtin_ia32_vpdpbuuds512", IX86_BUILTIN_VPDPBUUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssd_v16si_mask, "__builtin_ia32_vpdpbssd_v16si_mask", IX86_BUILTIN_VPDPBSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssd_v16si_maskz, "__builtin_ia32_vpdpbssd_v16si_maskz", IX86_BUILTIN_VPDPBSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssds_v16si_mask, "__builtin_ia32_vpdpbssds_v16si_mask", IX86_BUILTIN_VPDPBSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssds_v16si_maskz, "__builtin_ia32_vpdpbssds_v16si_maskz", IX86_BUILTIN_VPDPBSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsud_v16si_mask, "__builtin_ia32_vpdpbsud_v16si_mask", IX86_BUILTIN_VPDPBSUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsud_v16si_maskz, "__builtin_ia32_vpdpbsud_v16si_maskz", IX86_BUILTIN_VPDPBSUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsuds_v16si_mask, "__builtin_ia32_vpdpbsuds_v16si_mask", IX86_BUILTIN_VPDPBSUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsuds_v16si_maskz, "__builtin_ia32_vpdpbsuds_v16si_maskz", IX86_BUILTIN_VPDPBSUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuud_v16si_mask, "__builtin_ia32_vpdpbuud_v16si_mask", IX86_BUILTIN_VPDPBUUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuud_v16si_maskz, "__builtin_ia32_vpdpbuud_v16si_maskz", IX86_BUILTIN_VPDPBUUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuuds_v16si_mask, "__builtin_ia32_vpdpbuuds_v16si_mask", IX86_BUILTIN_VPDPBUUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuuds_v16si_maskz, "__builtin_ia32_vpdpbuuds_v16si_maskz", IX86_BUILTIN_VPDPBUUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v8si_mask, "__builtin_ia32_vpdpbssd_v8si_mask", IX86_BUILTIN_VPDPBSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v8si_maskz, "__builtin_ia32_vpdpbssd_v8si_maskz", IX86_BUILTIN_VPDPBSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v8si_mask, "__builtin_ia32_vpdpbssds_v8si_mask", IX86_BUILTIN_VPDPBSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v8si_maskz, "__builtin_ia32_vpdpbssds_v8si_maskz", IX86_BUILTIN_VPDPBSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v8si_mask, "__builtin_ia32_vpdpbsud_v8si_mask", IX86_BUILTIN_VPDPBSUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v8si_maskz, "__builtin_ia32_vpdpbsud_v8si_maskz", IX86_BUILTIN_VPDPBSUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v8si_mask, "__builtin_ia32_vpdpbsuds_v8si_mask", IX86_BUILTIN_VPDPBSUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v8si_maskz, "__builtin_ia32_vpdpbsuds_v8si_maskz", IX86_BUILTIN_VPDPBSUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v8si_mask, "__builtin_ia32_vpdpbuud_v8si_mask", IX86_BUILTIN_VPDPBUUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v8si_maskz, "__builtin_ia32_vpdpbuud_v8si_maskz", IX86_BUILTIN_VPDPBUUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v8si_mask, "__builtin_ia32_vpdpbuuds_v8si_mask", IX86_BUILTIN_VPDPBUUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v8si_maskz, "__builtin_ia32_vpdpbuuds_v8si_maskz", IX86_BUILTIN_VPDPBUUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v4si_mask, "__builtin_ia32_vpdpbssd_v4si_mask", IX86_BUILTIN_VPDPBSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v4si_maskz, "__builtin_ia32_vpdpbssd_v4si_maskz", IX86_BUILTIN_VPDPBSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v4si_mask, "__builtin_ia32_vpdpbssds_v4si_mask", IX86_BUILTIN_VPDPBSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v4si_maskz, "__builtin_ia32_vpdpbssds_v4si_maskz", IX86_BUILTIN_VPDPBSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v4si_mask, "__builtin_ia32_vpdpbsud_v4si_mask", IX86_BUILTIN_VPDPBSUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v4si_maskz, "__builtin_ia32_vpdpbsud_v4si_maskz", IX86_BUILTIN_VPDPBSUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v4si_mask, "__builtin_ia32_vpdpbsuds_v4si_mask", IX86_BUILTIN_VPDPBSUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v4si_maskz, "__builtin_ia32_vpdpbsuds_v4si_maskz", IX86_BUILTIN_VPDPBSUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v4si_mask, "__builtin_ia32_vpdpbuud_v4si_mask", IX86_BUILTIN_VPDPBUUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v4si_maskz, "__builtin_ia32_vpdpbuud_v4si_maskz", IX86_BUILTIN_VPDPBUUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v4si_mask, "__builtin_ia32_vpdpbuuds_v4si_mask", IX86_BUILTIN_VPDPBUUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v4si_maskz, "__builtin_ia32_vpdpbuuds_v4si_maskz", IX86_BUILTIN_VPDPBUUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index a76451f5949..130ba853125 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -280,15 +280,17 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) && (mask == 0 || (mask & ix86_isa_flags) != 0)) || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) - /* "Unified" builtin used by either AVXVNNI/AVXIFMA/AES intrinsics - or AVX512VNNIVL/AVX512IFMAVL/VAESVL non-mask intrinsics should be - defined whenever avxvnni/avxifma/aes or avx512vnni/avx512ifma/vaes - && avx512vl exist. */ + /* "Unified" builtin used by either AVXVNNI/AVXIFMA/AES/AVXVNNIINT8 + intrinsics or AVX512VNNIVL/AVX512IFMAVL/VAESVL/AVX10.2 non-mask + intrinsics should be defined whenever avxvnni/avxifma/aes/ + avxvnniint8 or avx512vnni && avx512vl/avx512ifma && avx512vl/vaes + && avx512vl/avx10.2 exist. */ || (mask2 == OPTION_MASK_ISA2_AVXVNNI) || (mask2 == OPTION_MASK_ISA2_AVXIFMA) || (mask2 == (OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16)) || ((mask2 & OPTION_MASK_ISA2_VAES) != 0) + || ((mask2 & OPTION_MASK_ISA2_AVXVNNIINT8) != 0) || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) { diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index bc8f16bfb65..200b768f5d9 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -13298,6 +13298,7 @@ ix86_check_builtin_isa_match (unsigned int fcode, (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA2_AVX512BF16) or OPTION_MASK_ISA2_AVXNECONVERT OPTION_MASK_ISA_AES or (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA2_VAES) + OPTION_MASK_ISA2_AVX10_2 or OPTION_MASK_ISA2_AVXVNNIINT8 where for each such pair it is sufficient if either of the ISAs is enabled, plus if it is ored with other options also those others. OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ @@ -13323,6 +13324,8 @@ ix86_check_builtin_isa_match (unsigned int fcode, OPTION_MASK_ISA2_AVXNECONVERT); SHARE_BUILTIN (OPTION_MASK_ISA_AES, 0, OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES); + SHARE_BUILTIN (0, OPTION_MASK_ISA2_AVXVNNIINT8, 0, + OPTION_MASK_ISA2_AVX10_2_256); isa = tmp_isa; isa2 = tmp_isa2; diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 80357d563ee..ce8437d00c2 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -140,4 +140,8 @@ #include <avx10_2roundingintrin.h> +#include <avx10_2mediaintrin.h> + +#include <avx10_2-512mediaintrin.h> + #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8f34c9300d0..41d448f57cb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -578,6 +578,9 @@ (define_mode_iterator VI4_AVX512VL [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) +(define_mode_iterator VI4_AVX10_2 + [(V16SI "TARGET_AVX10_2_512") V8SI V4SI]) + (define_mode_iterator VI48_AVX512F_AVX512VL [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") @@ -31241,16 +31244,67 @@ }) (define_insn "vpdp<vpdotprodtype>_<mode>" - [(set (match_operand:VI4_AVX 0 "register_operand" "=x") + [(set (match_operand:VI4_AVX 0 "register_operand" "=v") (unspec:VI4_AVX [(match_operand:VI4_AVX 1 "register_operand" "0") - (match_operand:VI4_AVX 2 "register_operand" "x") - (match_operand:VI4_AVX 3 "nonimmediate_operand" "xjm")] + (match_operand:VI4_AVX 2 "register_operand" "v") + (match_operand:VI4_AVX 3 "nonimmediate_operand" "vm")] VPDOTPROD))] - "TARGET_AVXVNNIINT8" + "TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256" "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "prefix" "vex") - (set_attr "addr" "gpr16")]) + [(set_attr "prefix" "maybe_evex")]) + +(define_insn "vpdp<vpdotprodtype>_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + VPDOTPROD))] + "TARGET_AVX10_2_512" + "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "prefix" "evex")]) + +(define_insn "vpdp<vpdotprodtype>_<mode>_mask" + [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=v") + (vec_merge:VI4_AVX10_2 + (unspec:VI4_AVX10_2 + [(match_operand:VI4_AVX10_2 1 "register_operand" "0") + (match_operand:VI4_AVX10_2 2 "register_operand" "v") + (match_operand:VI4_AVX10_2 3 "nonimmediate_operand" "vm")] + VPDOTPROD) + (match_dup 1) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] + "TARGET_AVX10_2_256" + "vpdp<vpdotprodtype>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "prefix" "evex")]) + +(define_expand "vpdp<vpdotprodtype>_<mode>_maskz" + [(set (match_operand:VI4_AVX10_2 0 "register_operand") + (vec_merge:VI4_AVX10_2 + (unspec:VI4_AVX10_2 + [(match_operand:VI4_AVX10_2 1 "register_operand") + (match_operand:VI4_AVX10_2 2 "register_operand") + (match_operand:VI4_AVX10_2 3 "nonimmediate_operand")] + VPDOTPROD) + (match_dup 5) + (match_operand:<avx512fmaskmode> 4 "register_operand")))] + "TARGET_AVX10_2_256" + "operands[5] = CONST0_RTX (<MODE>mode);") + +(define_insn "*vpdp<vpdotprodtype>_<mode>_maskz" + [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=v") + (vec_merge:VI4_AVX10_2 + (unspec:VI4_AVX10_2 + [(match_operand:VI4_AVX10_2 1 "register_operand" "0") + (match_operand:VI4_AVX10_2 2 "register_operand" "v") + (match_operand:VI4_AVX10_2 3 "nonimmediate_operand" "vm")] + VPDOTPROD) + (match_operand:VI4_AVX10_2 5 "const0_operand" "C") + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] + "TARGET_AVX10_2_256" + "vpdp<vpdotprodtype>\t{%3, %2, %0%{%4%}%N5|%0%{%4%}%N5, %2, %3}" + [(set_attr "prefix" "evex")]) (define_insn "vbcstnebf162ps_<mode>" [(set (match_operand:VF1_128_256 0 "register_operand" "=x") diff --git a/gcc/testsuite/gcc.target/i386/avx10-check.h b/gcc/testsuite/gcc.target/i386/avx10-check.h new file mode 100644 index 00000000000..76c32d7acaa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10-check.h @@ -0,0 +1,61 @@ +#include <stdlib.h> +#include "cpuid.h" +#include "m512-check.h" +#include "avx10-os-support.h" + +#ifndef DO_TEST +#define DO_TEST do_test +#if defined(AVX10_512BIT) +static void test_512 (void); +#else +static void test_256 (void); +static void test_128 (void); +#endif + +__attribute__ ((noinline)) +static void +do_test (void) +{ +#if defined(AVX10_512BIT) + test_512 (); +#else + test_256 (); + test_128 (); +#endif +} +#endif + +static int +check_osxsave (void) +{ + unsigned int eax, ebx, ecx, edx; + + __cpuid (1, eax, ebx, ecx, edx); + return (ecx & bit_OSXSAVE) != 0; +} + +int +main () +{ + /* Run AVX10 test only if host has ISA support. */ + if (__builtin_cpu_supports ("avx10.1") +#ifdef AVX10_2 + && __builtin_cpu_supports ("avx10.2") +#endif +#ifdef AVX10_2_512 + && __builtin_cpu_supports ("avx10.2-512") +#endif + && avx10_os_support ()) + { + DO_TEST (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + return 0; + } + +#ifdef DEBUG + printf ("SKIPPED\n"); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10-helper.h b/gcc/testsuite/gcc.target/i386/avx10-helper.h new file mode 100644 index 00000000000..385c7446979 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10-helper.h @@ -0,0 +1,23 @@ +#ifndef AVX10_HELPER_INCLUDED +#define AVX10_HELPER_INCLUDED + +#define AVX10 +#define AVX512FP16 + +#include "avx512f-helper.h" +#include "avx512f-mask-type.h" + +#endif /* AVX10_HELPER_INCLUDED */ + +/* Intrinsic being tested. It has different deffinitions, + depending on AVX512F_LEN, so it's outside include guards + and in undefed away to silence warnings. */ +#if defined INTRINSIC +#undef INTRINSIC +#endif + +#if AVX512F_LEN != 128 +#define INTRINSIC(NAME) EVAL(_mm, AVX512F_LEN, NAME) +#else +#define INTRINSIC(NAME) _mm ## NAME +#endif diff --git a/gcc/testsuite/gcc.target/i386/avx10-os-support.h b/gcc/testsuite/gcc.target/i386/avx10-os-support.h new file mode 100644 index 00000000000..ea6899bbcab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10-os-support.h @@ -0,0 +1,23 @@ +/* Check if the OS supports executing AVX10 instructions. */ + +#define XCR_XFEATURE_ENABLED_MASK 0x0 + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 +#define XSTATE_OPMASK 0x20 +#define XSTATE_ZMM 0x40 +#define XSTATE_HI_ZMM 0x80 + +static int +avx10_os_support (void) +{ + unsigned int eax, edx; + unsigned int ecx = XCR_XFEATURE_ENABLED_MASK; + unsigned int mask = XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK + | XSTATE_HI_ZMM; + + __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (ecx)); + + return ((eax & mask) == mask); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-media-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-media-1.c new file mode 100644 index 00000000000..d4145c41a99 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-media-1.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2-512 -O2" } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i x,y,z,z1; +volatile __mmask16 m16; + +void avx10_2_512_test (void) +{ + x = _mm512_dpbssd_epi32 (x, y, z); + x = _mm512_mask_dpbssd_epi32 (x, m16, y, z); + x = _mm512_maskz_dpbssd_epi32 (m16, x, y, z); + + x = _mm512_dpbssds_epi32 (x, y, z); + x = _mm512_mask_dpbssds_epi32 (x, m16, y, z); + x = _mm512_maskz_dpbssds_epi32 (m16, x, y, z); + + x = _mm512_dpbsud_epi32 (x, y, z); + x = _mm512_mask_dpbsud_epi32 (x, m16, y, z); + x = _mm512_maskz_dpbsud_epi32 (m16, x, y, z); + + x = _mm512_dpbsuds_epi32 (x, y, z); + x = _mm512_mask_dpbsuds_epi32 (x, m16, y, z); + x = _mm512_maskz_dpbsuds_epi32 (m16, x, y, z); + + x = _mm512_dpbuud_epi32 (x, y, z); + x = _mm512_mask_dpbuud_epi32 (x, m16, y, z); + x = _mm512_maskz_dpbuud_epi32 (m16, x, y, z); + + x = _mm512_dpbuuds_epi32 (x, y, z); + x = _mm512_mask_dpbuuds_epi32 (x, m16, y, z); + x = _mm512_maskz_dpbuuds_epi32 (m16, x, y, z); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c new file mode 100644 index 00000000000..969a5ff844e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" + +#define SIZE (AVX512F_LEN / 8) +#define SIZE_RES (AVX512F_LEN / 32) + +static void +CALC (int *r, int *dst, char *s1, char *s2) +{ + short tempres[SIZE]; + for (int i = 0; i < SIZE; i++) + tempres[i] = (short) s1[i] * (short) s2[i]; + for (int i = 0; i < SIZE_RES; i++) + { + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1] + + tempres[i * 4 + 2] + tempres[i * 4 + 3]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, i_b) src1; + UNION_TYPE (AVX512F_LEN, i_b) src2; + MASK_TYPE mask = MASK_VALUE; + int res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE; i++) + { + int sign = i % 2 ? 1 : -1; + src1.a[i] = sign * (10 + 3 * i * i); + src2.a[i] = sign * 10 * i * i; + } + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0x7FFFFFFF; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + CALC (res_ref, res1.a, src1.a, src2.a); + CALC (res_ref2, res2.a, src1.a, src2.a); + + res1.x = INTRINSIC (_dpbssd_epi32) (res1.x, src1.x, src2.x); + res2.x = INTRINSIC (_mask_dpbssd_epi32) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_dpbssd_epi32) (mask, res3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2)) + abort (); + + MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssds-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssds-2.c new file mode 100644 index 00000000000..1f147009186 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssds-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" + +#define SIZE (AVX512F_LEN / 8) +#define SIZE_RES (AVX512F_LEN / 32) + +static void +CALC (int *r, int *dst, char *s1, char *s2) +{ + short tempres[SIZE]; + for (int i = 0; i < SIZE; i++) + tempres[i] = (short) s1[i] * (short) s2[i]; + for (int i = 0; i < SIZE_RES; i++) + { + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1] + + tempres[i * 4 + 2] + tempres[i * 4 + 3]; + long long max_int = 0x7FFFFFFF; + if (test > max_int) + test = max_int; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, i_b) src1; + UNION_TYPE (AVX512F_LEN, i_b) src2; + MASK_TYPE mask = MASK_VALUE; + int res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE; i++) + { + int sign = i % 2 ? 1 : -1; + src1.a[i] = sign * (10 + 3 * i * i); + src2.a[i] = sign * 10 * i * i; + } + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0x7FFFFFFF; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + CALC (res_ref, res1.a, src1.a, src2.a); + CALC (res_ref2, res2.a, src1.a, src2.a); + + res1.x = INTRINSIC (_dpbssds_epi32) (res1.x, src1.x, src2.x); + res2.x = INTRINSIC (_mask_dpbssds_epi32) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_dpbssds_epi32) (mask, res3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2)) + abort (); + + MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsud-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsud-2.c new file mode 100644 index 00000000000..81653b223c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsud-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" + +#define SIZE (AVX512F_LEN / 8) +#define SIZE_RES (AVX512F_LEN / 32) + +static void +CALC (int *r, int *dst, char *s1, unsigned char *s2) +{ + short tempres[SIZE]; + for (int i = 0; i < SIZE; i++) + tempres[i] = (short) s1[i] * (unsigned short) s2[i]; + for (int i = 0; i < SIZE_RES; i++) + { + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1] + + tempres[i * 4 + 2] + tempres[i * 4 + 3]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, i_b) src1; + UNION_TYPE (AVX512F_LEN, i_ub) src2; + MASK_TYPE mask = MASK_VALUE; + int res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE; i++) + { + int sign = i % 2 ? 1 : -1; + src1.a[i] = sign*10*i*i; + src2.a[i] = 10 + 3*i*i + sign; + } + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0x7FFFFFFF; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + CALC (res_ref, res1.a, src1.a, src2.a); + CALC (res_ref2, res2.a, src1.a, src2.a); + + res1.x = INTRINSIC (_dpbsud_epi32) (res1.x, src1.x, src2.x); + res2.x = INTRINSIC (_mask_dpbsud_epi32) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_dpbsud_epi32) (mask, res3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2)) + abort (); + + MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsuds-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsuds-2.c new file mode 100644 index 00000000000..70a00aa76f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbsuds-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" + +#define SIZE (AVX512F_LEN / 8) +#define SIZE_RES (AVX512F_LEN / 32) + +static void +CALC (int *r, int *dst, char *s1, unsigned char *s2) +{ + short tempres[SIZE]; + for (int i = 0; i < SIZE; i++) + tempres[i] = (short) s1[i] * (unsigned short) s2[i]; + for (int i = 0; i < SIZE_RES; i++) + { + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1] + + tempres[i * 4 + 2] + tempres[i * 4 + 3]; + long long max_int = 0x7FFFFFFF; + if (test > max_int) + test = max_int; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, i_b) src1; + UNION_TYPE (AVX512F_LEN, i_ub) src2; + MASK_TYPE mask = MASK_VALUE; + int res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE; i++) + { + int sign = i % 2 ? 1 : -1; + src1.a[i] = sign * 10 * i * i; + src2.a[i] = 10 + 3 * i * i + sign; + } + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0x7FFFFFFF; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + CALC (res_ref, res1.a, src1.a, src2.a); + CALC (res_ref2, res2.a, src1.a, src2.a); + + res1.x = INTRINSIC (_dpbsuds_epi32) (res1.x, src1.x, src2.x); + res2.x = INTRINSIC (_mask_dpbsuds_epi32) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_dpbsuds_epi32) (mask, res3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2)) + abort (); + + MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuud-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuud-2.c new file mode 100644 index 00000000000..84ef32f1b01 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuud-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" + +#define SIZE (AVX512F_LEN / 8) +#define SIZE_RES (AVX512F_LEN / 32) + +static void +CALC (int *r, int *dst, unsigned char *s1, unsigned char *s2) +{ + unsigned short tempres[SIZE]; + for (int i = 0; i < SIZE; i++) + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i]; + for (int i = 0; i < SIZE_RES; i++) + { + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1] + + tempres[i * 4 + 2] + tempres[i * 4 + 3]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, i_ub) src1; + UNION_TYPE (AVX512F_LEN, i_ub) src2; + MASK_TYPE mask = MASK_VALUE; + int res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 10 + 3 * i * i; + src2.a[i] = 10 * i * i; + } + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0x7FFFFFFF; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + CALC (res_ref, res1.a, src1.a, src2.a); + CALC (res_ref2, res2.a, src1.a, src2.a); + + res1.x = INTRINSIC (_dpbuud_epi32) (res1.x, src1.x, src2.x); + res2.x = INTRINSIC (_mask_dpbuud_epi32) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_dpbuud_epi32) (mask, res3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2)) + abort (); + + MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuuds-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuuds-2.c new file mode 100644 index 00000000000..98fe36d6b66 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbuuds-2.c @@ -0,0 +1,73 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" + +#define SIZE (AVX512F_LEN / 8) +#define SIZE_RES (AVX512F_LEN / 32) + +static void +CALC (int *r, int *dst, unsigned char *s1, unsigned char *s2) +{ + unsigned short tempres[SIZE]; + for (int i = 0; i < SIZE; i++) + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i]; + for (int i = 0; i < SIZE_RES; i++) + { + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1] + + tempres[i * 4 + 2] + tempres[i * 4 + 3]; + long long max_uint = 0xFFFFFFFF; + if (test > max_uint) + test = max_uint; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, i_ub) src1; + UNION_TYPE (AVX512F_LEN, i_ub) src2; + MASK_TYPE mask = MASK_VALUE; + int res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 10 + 3 * i * i; + src2.a[i] = 10 * i * i; + } + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0x7FFFFFFF; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + } + + CALC (res_ref, res1.a, src1.a, src2.a); + CALC (res_ref2, res2.a, src1.a, src2.a); + + res1.x = INTRINSIC (_dpbuuds_epi32) (res1.x, src1.x, src2.x); + res2.x = INTRINSIC (_mask_dpbuuds_epi32) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_dpbuuds_epi32) (mask, res3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2)) + abort (); + + MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-builtin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-builtin-1.c new file mode 100644 index 00000000000..daf61c785a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-builtin-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx10.2 -mno-avxvnniint8" } */ +typedef int v8si __attribute__ ((vector_size (32))); +v8si +foo (v8si a, v8si b, v8si c) +{ + return __builtin_ia32_vpdpbssd256 (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-media-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-media-1.c new file mode 100644 index 00000000000..c2b3e5527d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-media-1.c @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; +volatile __mmask8 m; + +void extern +avx10_2_test (void) +{ + x = _mm256_dpbssd_epi32 (x, y, z); + x = _mm256_mask_dpbssd_epi32 (x, m, y, z); + x = _mm256_maskz_dpbssd_epi32 (m, x, y, z); + + x_ = _mm_dpbssd_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbssd_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbssd_epi32 (m, x_, y_, z_); + + x = _mm256_dpbssds_epi32 (x, y, z); + x = _mm256_mask_dpbssds_epi32 (x, m, y, z); + x = _mm256_maskz_dpbssds_epi32 (m, x, y, z); + + x_ = _mm_dpbssds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbssds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbssds_epi32 (m, x_, y_, z_); + + x = _mm256_dpbsud_epi32 (x, y, z); + x = _mm256_mask_dpbsud_epi32 (x, m, y, z); + x = _mm256_maskz_dpbsud_epi32 (m, x, y, z); + + x_ = _mm_dpbsud_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbsud_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbsud_epi32 (m, x_, y_, z_); + + x = _mm256_dpbsuds_epi32 (x, y, z); + x = _mm256_mask_dpbsuds_epi32 (x, m, y, z); + x = _mm256_maskz_dpbsuds_epi32 (m, x, y, z); + + x_ = _mm_dpbsuds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbsuds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbsuds_epi32 (m, x_, y_, z_); + + x = _mm256_dpbuud_epi32 (x, y, z); + x = _mm256_mask_dpbuud_epi32 (x, m, y, z); + x = _mm256_maskz_dpbuud_epi32 (m, x, y, z); + + x_ = _mm_dpbuud_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbuud_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbuud_epi32 (m, x_, y_, z_); + + x = _mm256_dpbuuds_epi32 (x, y, z); + x = _mm256_mask_dpbuuds_epi32 (x, m, y, z); + x = _mm256_maskz_dpbuuds_epi32 (m, x, y, z); + + x_ = _mm_dpbuuds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbuuds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbuuds_epi32 (m, x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssd-2.c new file mode 100644 index 00000000000..510216a7be2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssd-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbssd-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbssd-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssds-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssds-2.c new file mode 100644 index 00000000000..4b84105c202 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbssds-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbssds-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbssds-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsud-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsud-2.c new file mode 100644 index 00000000000..e4f0f415a1a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsud-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbsud-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbsud-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsuds-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsuds-2.c new file mode 100644 index 00000000000..ca7942e288e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbsuds-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbsuds-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbsuds-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuud-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuud-2.c new file mode 100644 index 00000000000..9664c99baa2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuud-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbuud-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbuud-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuuds-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuuds-2.c new file mode 100644 index 00000000000..285637bbc13 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vpdpbuuds-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbuuds-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vpdpbuuds-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h index 72982f95aed..3cd6751af26 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-helper.h +++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h @@ -8,7 +8,11 @@ #ifndef AVX512F_HELPER_INCLUDED #define AVX512F_HELPER_INCLUDED +#if defined(AVX10) +#include "avx10-check.h" +#else #include "avx512-check.h" +#endif /* Macros expansion. */ #define CONCAT(a,b,c) a ## b ## c @@ -87,7 +91,7 @@ MAKE_MASK_ZERO(i_uq, unsigned long long) /* Function which calculates result. */ #define CALC EVAL(calc_, AVX512F_LEN,) -#ifndef AVX512VL +#if !defined(AVX512VL) || defined(AVX10_512) #define AVX512F_LEN 512 #define AVX512F_LEN_HALF 256 #endif diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-builtin.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-builtin.c new file mode 100644 index 00000000000..9d61af6df54 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-builtin.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavxvnniint8 -mno-avx10.2" } */ +typedef int v8si __attribute__ ((vector_size (32))); +v8si +foo (v8si a, v8si b, v8si c) +{ + return __builtin_ia32_vpdpbssd256 (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index e4713eaa88d..0852e539cd7 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -87,6 +87,8 @@ extern void test_sm3 (void) __attribute__((__target__("sm3"))); extern void test_sha512 (void) __attribute__((__target__("sha512"))); extern void test_sm4 (void) __attribute__((__target__("sm4"))); extern void test_user_msr (void) __attribute__((__target__("usermsr"))); +extern void test_avx10_2 (void) __attribute__((__target__("avx10.2"))); +extern void test_avx10_2_512 (void) __attribute__((__target__("avx10.2-512"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq"))); @@ -175,6 +177,8 @@ extern void test_no_sm3 (void) __attribute__((__target__("no-sm3"))); extern void test_no_sha512 (void) __attribute__((__target__("no-sha512"))); extern void test_no_sm4 (void) __attribute__((__target__("no-sm4"))); extern void test_no_user_msr (void) __attribute__((__target__("no-usermsr"))); +extern void test_no_avx10_2 (void) __attribute__((__target__("no-avx10.2"))); +extern void test_no_avx10_2_512 (void) __attribute__((__target__("no-avx10.2-512"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 11ba77ca404..67362a5e9ca 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10638,6 +10638,42 @@ proc check_effective_target_apxf { } { } "-mapxf" ] } +# Return 1 if avx10.2 instructions can be compiled. +proc check_effective_target_avx10_2 { } { + return [check_no_compiler_messages avx10.2 object { + typedef int __v8si __attribute__ ((__vector_size__ (32))); + typedef char __mmask8; + + __v8si + _mm256_mask_vpdpbssd_epi32 (__v8si __A, __mmask8 __U, + __v8si __B, __v8si __C) + { + return (__v8si) __builtin_ia32_vpdpbssd_v8si_mask ((__v8si)__A, + (__v8si)__B, + (__v8si)__C, + (__mmask8)__U); + } + } "-mavx10.2" ] +} + +# Return 1 if avx10.2-512 instructions can be compiled. +proc check_effective_target_avx10_2_512 { } { + return [check_no_compiler_messages avx10.2-512 object { + typedef int __v16si __attribute__ ((__vector_size__ (64))); + typedef short __mmask16; + + __v16si + _mm512_vpdpbssd_epi32 (__v16si __A, __mmask16 __U, + __v16si __B, __v16si __C) + { + return (__v16si) __builtin_ia32_vpdpbssd_v16si_mask ((__v16si)__A, + (__v16si)__B, + (__v16si)__C, + (__mmask16)__U); + } + } "-mavx10.2-512" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.43.5