llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir Author: Phoebe Wang (phoebewang) <details> <summary>Changes</summary> …nstructions Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- Patch is 101.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101452.diff 46 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+2) - (modified) clang/include/clang/Basic/BuiltinsX86.def (+8) - (modified) clang/include/clang/Driver/Options.td (+6) - (modified) clang/lib/Basic/Targets/X86.cpp (+12) - (modified) clang/lib/Basic/Targets/X86.h (+2) - (modified) clang/lib/Driver/ToolChains/Arch/X86.cpp (+1-1) - (modified) clang/lib/Headers/CMakeLists.txt (+2) - (added) clang/lib/Headers/avx10_2_512niintrin.h (+35) - (added) clang/lib/Headers/avx10_2niintrin.h (+83) - (modified) clang/lib/Headers/immintrin.h (+8) - (modified) clang/lib/Sema/SemaX86.cpp (+3) - (added) clang/test/CodeGen/X86/avx10_2_512ni-builtins.c (+24) - (added) clang/test/CodeGen/X86/avx10_2ni-builtins.c (+105) - (modified) clang/test/CodeGen/attr-target-x86.c (+4-4) - (modified) clang/test/Driver/x86-target-features.c (+7) - (modified) clang/test/Preprocessor/x86_target_features.c (+9) - (modified) llvm/docs/ReleaseNotes.rst (+2) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+29-1) - (modified) llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h (+43-2) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+2) - (modified) llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp (+3) - (modified) llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h (+4-1) - (modified) llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (+6-4) - (modified) llvm/lib/Target/X86/X86.td (+6) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1) - (modified) llvm/lib/Target/X86/X86ISelLowering.h (+2) - (added) llvm/lib/Target/X86/X86InstrAVX10.td (+33) - (modified) llvm/lib/Target/X86/X86InstrFormats.td (+2) - (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+7-5) - (modified) llvm/lib/Target/X86/X86InstrInfo.td (+1) - (modified) llvm/lib/Target/X86/X86InstrPredicates.td (+3) - (modified) llvm/lib/Target/X86/X86InstrSSE.td (+11-11) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+10) - (modified) llvm/lib/TargetParser/Host.cpp (+8-3) - (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+3) - (added) llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll (+41) - (added) llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll (+216) - (added) llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt (+150) - (added) llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt (+150) - (added) llvm/test/MC/X86/avx10_2ni-32-intel.s (+149) - (added) llvm/test/MC/X86/avx10_2ni-64-att.s (+149) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+9) - (modified) llvm/utils/TableGen/X86DisassemblerTables.cpp (+31-1) - (modified) llvm/utils/TableGen/X86ManualInstrMapping.def (+4) - (modified) llvm/utils/TableGen/X86RecognizableInstr.cpp (+25-1) - (modified) llvm/utils/TableGen/X86RecognizableInstr.h (+2) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3c2e0282d1c72..956c6e88da1bc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -211,6 +211,8 @@ X86 Support functions defined by the ``*mmintrin.h`` headers. A mapping can be found in the file ``clang/www/builtins.py``. +- Support ISA of ``AVX10.2``. + Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 06ca30d65f5bd..f028711a807c0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1959,6 +1959,14 @@ TARGET_HEADER_BUILTIN(__readgsword, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +// AVX10.2 VMPSADBW +TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2-512") + +// AVX10.2 YMM Rounding +TARGET_BUILTIN(__builtin_ia32_vaddpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256") + // AVX-VNNI-INT16 TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") TARGET_BUILTIN(__builtin_ia32_vpdpwsud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c8c56dbb51b28..d19c2a30c5f3f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6205,6 +6205,12 @@ def mavx10_1_512 : Flag<["-"], "mavx10.1-512">, Group<m_x86_AVX10_Features_Group def mno_avx10_1_512 : Flag<["-"], "mno-avx10.1-512">, Group<m_x86_AVX10_Features_Group>; def mavx10_1 : Flag<["-"], "mavx10.1">, Alias<mavx10_1_256>; def mno_avx10_1 : Flag<["-"], "mno-avx10.1">, Alias<mno_avx10_1_256>; +def mavx10_2_256 : Flag<["-"], "mavx10.2-256">, Group<m_x86_AVX10_Features_Group>; +def mno_avx10_2_256 : Flag<["-"], "mno-avx10.2-256">, Group<m_x86_AVX10_Features_Group>; +def mavx10_2_512 : Flag<["-"], "mavx10.2-512">, Group<m_x86_AVX10_Features_Group>; +def mno_avx10_2_512 : Flag<["-"], "mno-avx10.2-512">, Group<m_x86_AVX10_Features_Group>; +def mavx10_2 : Flag<["-"], "mavx10.2">, Alias<mavx10_2_256>; +def mno_avx10_2 : Flag<["-"], "mno-avx10.2">, Alias<mno_avx10_2_256>; def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>; def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>; def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 18e6dbf03e00d..3fb3587eb5914 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -304,6 +304,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX10_1 = true; } else if (Feature == "+avx10.1-512") { HasAVX10_1_512 = true; + } else if (Feature == "+avx10.2-256") { + HasAVX10_2 = true; + } else if (Feature == "+avx10.2-512") { + HasAVX10_2_512 = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { @@ -824,6 +828,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX10_1__"); if (HasAVX10_1_512) Builder.defineMacro("__AVX10_1_512__"); + if (HasAVX10_2) + Builder.defineMacro("__AVX10_2__"); + if (HasAVX10_2_512) + Builder.defineMacro("__AVX10_2_512__"); if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) @@ -1056,6 +1064,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx", true) .Case("avx10.1-256", true) .Case("avx10.1-512", true) + .Case("avx10.2-256", true) + .Case("avx10.2-512", true) .Case("avx2", true) .Case("avx512f", true) .Case("avx512cd", true) @@ -1171,6 +1181,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx", SSELevel >= AVX) .Case("avx10.1-256", HasAVX10_1) .Case("avx10.1-512", HasAVX10_1_512) + .Case("avx10.2-256", HasAVX10_2) + .Case("avx10.2-512", HasAVX10_2_512) .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index ba34ab2c7f336..79fd5867cf667 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -92,6 +92,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasF16C = false; bool HasAVX10_1 = false; bool HasAVX10_1_512 = false; + bool HasAVX10_2 = false; + bool HasAVX10_2_512 = false; bool HasEVEX512 = false; bool HasAVX512CD = false; bool HasAVX512VPOPCNTDQ = false; diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index dc6c8695488bb..b2109e11038fe 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -241,7 +241,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, assert(Name.starts_with("avx10.") && "Invalid AVX10 feature name."); StringRef Version, Width; std::tie(Version, Width) = Name.substr(6).split('-'); - assert(Version == "1" && "Invalid AVX10 feature name."); + assert((Version == "1" || Version == "2") && "Invalid AVX10 feature name."); assert((Width == "256" || Width == "512") && "Invalid AVX10 feature name."); #endif diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 89fa0ecd45eb4..b17ab24d625a0 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -147,6 +147,8 @@ set(x86_files amxcomplexintrin.h amxfp16intrin.h amxintrin.h + avx10_2_512niintrin.h + avx10_2niintrin.h avx2intrin.h avx512bf16intrin.h avx512bitalgintrin.h diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h new file mode 100644 index 0000000000000..98ed9c72afd0c --- /dev/null +++ b/clang/lib/Headers/avx10_2_512niintrin.h @@ -0,0 +1,35 @@ +/*===---- avx10_2_512niintrin.h - AVX10.2-512 new instruction intrinsics ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2_512niintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512INTRIN_H +#define __AVX10_2_512INTRIN_H + +/* VMPSADBW */ +#define _mm512_mpsadbw_epu8(A, B, imm) \ + ((__m512i)__builtin_ia32_mpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm))) + +#define _mm512_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m512i)__builtin_ia32_selectw_512( \ + (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m512i)__builtin_ia32_selectw_512( \ + (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ + (__v32hi)_mm512_setzero_si512())) + +#endif /* __SSE2__ */ +#endif /* __AVX10_2_512INTRIN_H */ diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h new file mode 100644 index 0000000000000..bbd8eb7609b66 --- /dev/null +++ b/clang/lib/Headers/avx10_2niintrin.h @@ -0,0 +1,83 @@ +/*===---- avx10_2niintrin.h - AVX10.2 new instruction intrinsics -----------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2INTRIN_H +#define __AVX10_2INTRIN_H + +/* VMPSADBW */ +#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ + (__v8hi)(__m128i)(W))) + +#define _mm_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m256i)__builtin_ia32_selectw_256( \ + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m256i)__builtin_ia32_selectw_256( \ + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ + (__v16hi)_mm256_setzero_si256())) + +/* YMM Rounding */ +#define _mm256_add_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_add_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_add_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_add_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_add_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_add_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_add_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_add_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_add_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#endif /* __AVX10_2INTRIN_H */ +#endif /* __SSE2__ */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index cd6cf09b90cad..e0957257ed5c7 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -648,6 +648,14 @@ _storebe_i64(void * __P, long long __D) { #include <avx512vlvp2intersectintrin.h> #endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) +#include <avx10_2niintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__) +#include <avx10_2_512niintrin.h> +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) #include <enqcmdintrin.h> #endif diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 8f9057bbaf259..bf2d2d8ac8f42 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -162,6 +162,9 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_mulps512: case X86::BI__builtin_ia32_subpd512: case X86::BI__builtin_ia32_subps512: + case X86::BI__builtin_ia32_vaddpd256_round: + case X86::BI__builtin_ia32_vaddph256_round: + case X86::BI__builtin_ia32_vaddps256_round: case X86::BI__builtin_ia32_cvtsi2sd64: case X86::BI__builtin_ia32_cvtsi2ss32: case X86::BI__builtin_ia32_cvtsi2ss64: diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c new file mode 100644 index 0000000000000..5983e0d969b68 --- /dev/null +++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s + +#include <immintrin.h> + +// VMPSADBW +__m512i test_mm512_mpsadbw_epu8(__m512i __A, __m512i __B) { +// CHECK-LABEL: @test_mm512_mpsadbw_epu8 +// CHECK: @llvm.x86.avx10.vmpsadbw.512 + return _mm512_mpsadbw_epu8(__A, __B, 17); +} + +__m512i test_mm512_mask_mpsadbw_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { +// CHECK-LABEL: @test_mm512_mask_mpsadbw_epu8 +// CHECK: @llvm.x86.avx10.vmpsadbw.512 +// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_mask_mpsadbw_epu8(__W, __U, __A, __B, 17); +} + +__m512i test_mm512_maskz_mpsadbw_epu8(__mmask32 __U, __m512i __A, __m512i __B) { +// CHECK-LABEL: @test_mm512_maskz_mpsadbw_epu8 +// CHECK: @llvm.x86.avx10.vmpsadbw.512 +// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_maskz_mpsadbw_epu8(__U, __A, __B, 17); +} diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c new file mode 100644 index 0000000000000..c8e4d3c906a72 --- /dev/null +++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c @@ -0,0 +1,105 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s + +#include <immintrin.h> + +// VMPSADBW +__m128i test_mm_mpsadbw_epu8(__m128i __A, __m128i __B) { +// CHECK-LABEL: @test_mm_mpsadbw_epu8 +// CHECK: @llvm.x86.sse41.mpsadbw + return _mm_mpsadbw_epu8(__A, __B, 170); +} + +__m128i test_mm_mask_mpsadbw_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { +// CHECK-LABEL: @test_mm_mask_mpsadbw_epu8 +// CHECK: @llvm.x86.sse41.mpsadbw +// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_mask_mpsadbw_epu8(__W, __U, __A, __B, 170); +} + +__m128i test_mm_maskz_mpsadbw_epu8(__mmask8 __U, __m128i __A, __m128i __B) { +// CHECK-LABEL: @test_mm_maskz_mpsadbw_epu8 +// CHECK: @llvm.x86.sse41.mpsadbw +// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_maskz_mpsadbw_epu8(__U, __A, __B, 170); +} + +__m256i test_mm256_mpsadbw_epu8(__m256i __A, __m256i __B) { +// CHECK-LABEL: @test_mm256_mpsadbw_epu8 +// CHECK: @llvm.x86.avx2.mpsadbw + return _mm256_mpsadbw_epu8(__A, __B, 170); +} + +__m256i test_mm256_mask_mpsadbw_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { +// CHECK-LABEL: @test_mm256_mask_mpsadbw_epu8 +// CHECK: @llvm.x86.avx2.mpsadbw +// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_mask_mpsadbw_epu8(__W, __U, __A, __B, 170); +} + +__m256i test_mm256_maskz_mpsadbw_epu8(__mmask16 __U, __m256i __A, __m256i __B) { +// CHECK-LABEL: @test_mm256_maskz_mpsadbw_epu8 +// CHECK: @llvm.x86.avx2.mpsadbw +// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_maskz_mpsadbw_epu8(__U, __A, __B, 170); +} + +// YMM Rounding +__m256d test_mm256_add_round_pd(__m256d __A, __m256d __B) { +// CHECK-LABEL: @test_mm256_add_round_pd +// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) + return _mm256_add_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m256d test_mm256_mask_add_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { +// CHECK-LABEL: @test_mm256_mask_add_round_pd +// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10) +// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask_add_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +__m256d test_mm256_maskz_add_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { +// CHECK-LABEL: @test_mm256_maskz_add_round_pd +// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9) +// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_maskz_add_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} + +__m256h test_mm256_add_round_ph(__m256h __A, __m256h __B) { +// CHECK-LABEL: @test_mm256_add_round_ph +// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11) + return _mm256_add_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m256h test_mm256_mask_add_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { +// CHECK-LABEL: @test_mm256_mask_add_round_ph +// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10) +// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} + return _mm256_mask_add_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +__m256h test_mm256_maskz_add_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { +// CHECK-LABEL: @test_mm256_maskz_add_round_ph +// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9) +// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} + return _mm256_maskz_add_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} + +__m256 test_mm256_add_round_ps(__m256 __A, __m256 __B) { +// CHECK-LABEL: @test_mm256_add_round_ps +// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) + return _mm256_add_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m256 test_mm256_mask_add_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { +// CHECK-LABEL: @test_mm256_mask_add_round_ps +// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10) +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask_add_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +__m256 test_mm256_maskz_add_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { +// CHECK-LABEL: @test_mm256_maskz_add_round_ps +// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9) +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_maskz_add_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index b1ae6678531b9..593ccffbcda09 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/101452 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits