llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mc Author: Phoebe Wang (phoebewang) <details> <summary>Changes</summary> Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- Patch is 445.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101783.diff 28 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.def (+45-24) - (modified) clang/lib/Headers/avx10_2_512niintrin.h (+279) - (modified) clang/lib/Headers/avx10_2niintrin.h (+369) - (modified) clang/lib/Headers/avxvnniint16intrin.h (+36-77) - (modified) clang/lib/Headers/avxvnniint8intrin.h (+36-77) - (modified) clang/test/CodeGen/X86/avx10_2_512ni-builtins.c (+276) - (modified) clang/test/CodeGen/X86/avx10_2ni-builtins.c (+381) - (modified) clang/test/CodeGen/X86/avxvnniint16-builtins.c (+2) - (modified) clang/test/CodeGen/X86/avxvnniint8-builtins.c (+2) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+79) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+7) - (modified) llvm/lib/Target/X86/X86ISelLowering.h (+9-1) - (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+34) - (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+32-27) - (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+12) - (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+54) - (modified) llvm/lib/Target/X86/X86InstrSSE.td (+37-42) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+33) - (modified) llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll (+385-2) - (modified) llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll (+563) - (modified) llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll (+62) - (modified) llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll (+206) - (modified) llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt (+1410) - (modified) llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt (+1410) - (modified) llvm/test/MC/X86/avx10_2ni-32-intel.s (+1410) - (modified) llvm/test/MC/X86/avx10_2ni-64-att.s (+1410) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+234) - (modified) llvm/utils/TableGen/X86InstrMappingEmitter.cpp (+2-1) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index f028711a807c0..3ea196d949d2d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -773,18 +773,18 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512v TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512") // AVX-VNNI-INT8 -TARGET_BUILTIN(__builtin_ia32_vpdpbssd128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbssd256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbssds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbssds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbsud128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbsud256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbsuds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbsuds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbuud128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbuud256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbuuds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8") -TARGET_BUILTIN(__builtin_ia32_vpdpbuuds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8") +TARGET_BUILTIN(__builtin_ia32_vpdpbssd128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbssd256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbssds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbssds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbsud128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbsud256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbsuds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbsuds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbuud128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbuud256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbuuds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpbuuds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256") TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2dvC*V2OiUcIi", "nV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2OiV2OivC*V2OiUcIi", "nV:128:", "avx512vl") @@ -1959,6 +1959,27 @@ TARGET_HEADER_BUILTIN(__readgsword, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +// AVX10.2 VNNI FP16 +TARGET_BUILTIN(__builtin_ia32_vdpphps128, "V4fV4fV8xV8x", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vdpphps256, "V8fV8fV16xV16x", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vdpphps512, "V16fV16fV32xV32x", "ncV:512:", "avx10.2-512") + +// AVX10.2 VNNI INT8 +TARGET_BUILTIN(__builtin_ia32_vpdpbssd512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpbssds512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpbsud512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpbsuds512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpbuud512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpbuuds512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512") + +// AVX10.2 VNNI INT16 +TARGET_BUILTIN(__builtin_ia32_vpdpwsud512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpwsuds512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpwusd512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpwusds512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpwuud512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vpdpwuuds512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512") + // AVX10.2 VMPSADBW TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2-512") @@ -1968,18 +1989,18 @@ TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256") // AVX-VNNI-INT16 -TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwsud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwsuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwsuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwusd128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwusd256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwusds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwusds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwuud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") -TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") +TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwsud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwsuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwsuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwusd128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwusd256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwusds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwusds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwuud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") // AVX-NE-CONVERT TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert") diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h index 5ad6993b45433..7e614f7740bff 100644 --- a/clang/lib/Headers/avx10_2_512niintrin.h +++ b/clang/lib/Headers/avx10_2_512niintrin.h @@ -16,6 +16,35 @@ #ifndef __AVX10_2_512NIINTRIN_H #define __AVX10_2_512NIINTRIN_H +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +/* VNNI FP16 */ +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_dpph_ps(__m512 __W, + __m512h __A, + __m512h __B) { + return (__m512)__builtin_ia32_vdpphps512((__v16sf)__W, (__v32hf)__A, + (__v32hf)__B); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_dpph_ps(__m512 __W, + __mmask16 __U, + __m512h __A, + __m512h __B) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_dpph_ps(__W, __A, __B), (__v16sf)__W); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U, + __m512 __W, + __m512h __A, + __m512h __B) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_dpph_ps(__W, __A, __B), + (__v16sf)_mm512_setzero_ps()); +} + /* VMPSADBW */ #define _mm512_mpsadbw_epu8(A, B, imm) \ ((__m512i)__builtin_ia32_mpsadbw512((__v64qi)(__m512i)(A), \ @@ -31,5 +60,255 @@ (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ (__v32hi)_mm512_setzero_si512())) +/* VNNI INT8 */ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbssd_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssd_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssd_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32( + __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssds_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssds_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbsud_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsud_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsud_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32( + __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsuds_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsuds_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbuud_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuud_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuud_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32( + __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuuds_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuuds_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +/* VNNI INT16 */ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32( + __m512i __... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/101783 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits