https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/101452
…nstructions Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 >From 56b26bfaaa071dde226077531aaa46f4b671a815 Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" <phoebe.w...@intel.com> Date: Sat, 27 Jul 2024 22:21:32 +0800 Subject: [PATCH] [X86][AVX10.2] Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new instructions Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Basic/BuiltinsX86.def | 8 + clang/include/clang/Driver/Options.td | 6 + clang/lib/Basic/Targets/X86.cpp | 12 + clang/lib/Basic/Targets/X86.h | 2 + clang/lib/Driver/ToolChains/Arch/X86.cpp | 2 +- clang/lib/Headers/CMakeLists.txt | 2 + clang/lib/Headers/avx10_2_512niintrin.h | 35 +++ clang/lib/Headers/avx10_2niintrin.h | 83 +++++++ clang/lib/Headers/immintrin.h | 8 + clang/lib/Sema/SemaX86.cpp | 3 + .../test/CodeGen/X86/avx10_2_512ni-builtins.c | 24 ++ clang/test/CodeGen/X86/avx10_2ni-builtins.c | 105 +++++++++ clang/test/CodeGen/attr-target-x86.c | 8 +- clang/test/Driver/x86-target-features.c | 7 + clang/test/Preprocessor/x86_target_features.c | 9 + llvm/docs/ReleaseNotes.rst | 2 + llvm/include/llvm/IR/IntrinsicsX86.td | 30 ++- .../Support/X86DisassemblerDecoderCommon.h | 45 +++- .../llvm/TargetParser/X86TargetParser.def | 2 + .../X86/Disassembler/X86Disassembler.cpp | 3 + .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 5 +- .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 10 +- llvm/lib/Target/X86/X86.td | 6 + llvm/lib/Target/X86/X86ISelLowering.cpp | 1 + llvm/lib/Target/X86/X86ISelLowering.h | 2 + llvm/lib/Target/X86/X86InstrAVX10.td | 33 +++ llvm/lib/Target/X86/X86InstrFormats.td | 2 + llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 12 +- llvm/lib/Target/X86/X86InstrInfo.td | 1 + llvm/lib/Target/X86/X86InstrPredicates.td | 3 + llvm/lib/Target/X86/X86InstrSSE.td | 22 +- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 10 + llvm/lib/TargetParser/Host.cpp | 11 +- llvm/lib/TargetParser/X86TargetParser.cpp | 3 + .../CodeGen/X86/avx10_2_512ni-intrinsics.ll | 41 ++++ llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll | 216 ++++++++++++++++++ .../test/MC/Disassembler/X86/avx10_2ni-32.txt | 150 ++++++++++++ .../test/MC/Disassembler/X86/avx10_2ni-64.txt | 150 ++++++++++++ llvm/test/MC/X86/avx10_2ni-32-intel.s | 149 ++++++++++++ llvm/test/MC/X86/avx10_2ni-64-att.s | 149 ++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 9 + llvm/utils/TableGen/X86DisassemblerTables.cpp | 32 ++- llvm/utils/TableGen/X86ManualInstrMapping.def | 4 + llvm/utils/TableGen/X86RecognizableInstr.cpp | 26 ++- llvm/utils/TableGen/X86RecognizableInstr.h | 2 + 46 files changed, 1413 insertions(+), 34 deletions(-) create mode 100644 clang/lib/Headers/avx10_2_512niintrin.h create mode 100644 clang/lib/Headers/avx10_2niintrin.h create mode 100644 clang/test/CodeGen/X86/avx10_2_512ni-builtins.c create mode 100644 clang/test/CodeGen/X86/avx10_2ni-builtins.c create mode 100644 llvm/lib/Target/X86/X86InstrAVX10.td create mode 100644 llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll create mode 100644 llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt create mode 100644 llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt create mode 100644 llvm/test/MC/X86/avx10_2ni-32-intel.s create mode 100644 llvm/test/MC/X86/avx10_2ni-64-att.s diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3c2e0282d1c72..956c6e88da1bc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -211,6 +211,8 @@ X86 Support functions defined by the ``*mmintrin.h`` headers. A mapping can be found in the file ``clang/www/builtins.py``. +- Support ISA of ``AVX10.2``. + Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 06ca30d65f5bd..f028711a807c0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1959,6 +1959,14 @@ TARGET_HEADER_BUILTIN(__readgsword, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +// AVX10.2 VMPSADBW +TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2-512") + +// AVX10.2 YMM Rounding +TARGET_BUILTIN(__builtin_ia32_vaddpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256") + // AVX-VNNI-INT16 TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16") TARGET_BUILTIN(__builtin_ia32_vpdpwsud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c8c56dbb51b28..d19c2a30c5f3f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6205,6 +6205,12 @@ def mavx10_1_512 : Flag<["-"], "mavx10.1-512">, Group<m_x86_AVX10_Features_Group def mno_avx10_1_512 : Flag<["-"], "mno-avx10.1-512">, Group<m_x86_AVX10_Features_Group>; def mavx10_1 : Flag<["-"], "mavx10.1">, Alias<mavx10_1_256>; def mno_avx10_1 : Flag<["-"], "mno-avx10.1">, Alias<mno_avx10_1_256>; +def mavx10_2_256 : Flag<["-"], "mavx10.2-256">, Group<m_x86_AVX10_Features_Group>; +def mno_avx10_2_256 : Flag<["-"], "mno-avx10.2-256">, Group<m_x86_AVX10_Features_Group>; +def mavx10_2_512 : Flag<["-"], "mavx10.2-512">, Group<m_x86_AVX10_Features_Group>; +def mno_avx10_2_512 : Flag<["-"], "mno-avx10.2-512">, Group<m_x86_AVX10_Features_Group>; +def mavx10_2 : Flag<["-"], "mavx10.2">, Alias<mavx10_2_256>; +def mno_avx10_2 : Flag<["-"], "mno-avx10.2">, Alias<mno_avx10_2_256>; def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>; def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>; def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 18e6dbf03e00d..3fb3587eb5914 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -304,6 +304,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX10_1 = true; } else if (Feature == "+avx10.1-512") { HasAVX10_1_512 = true; + } else if (Feature == "+avx10.2-256") { + HasAVX10_2 = true; + } else if (Feature == "+avx10.2-512") { + HasAVX10_2_512 = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { @@ -824,6 +828,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX10_1__"); if (HasAVX10_1_512) Builder.defineMacro("__AVX10_1_512__"); + if (HasAVX10_2) + Builder.defineMacro("__AVX10_2__"); + if (HasAVX10_2_512) + Builder.defineMacro("__AVX10_2_512__"); if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) @@ -1056,6 +1064,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx", true) .Case("avx10.1-256", true) .Case("avx10.1-512", true) + .Case("avx10.2-256", true) + .Case("avx10.2-512", true) .Case("avx2", true) .Case("avx512f", true) .Case("avx512cd", true) @@ -1171,6 +1181,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx", SSELevel >= AVX) .Case("avx10.1-256", HasAVX10_1) .Case("avx10.1-512", HasAVX10_1_512) + .Case("avx10.2-256", HasAVX10_2) + .Case("avx10.2-512", HasAVX10_2_512) .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index ba34ab2c7f336..79fd5867cf667 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -92,6 +92,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasF16C = false; bool HasAVX10_1 = false; bool HasAVX10_1_512 = false; + bool HasAVX10_2 = false; + bool HasAVX10_2_512 = false; bool HasEVEX512 = false; bool HasAVX512CD = false; bool HasAVX512VPOPCNTDQ = false; diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index dc6c8695488bb..b2109e11038fe 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -241,7 +241,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, assert(Name.starts_with("avx10.") && "Invalid AVX10 feature name."); StringRef Version, Width; std::tie(Version, Width) = Name.substr(6).split('-'); - assert(Version == "1" && "Invalid AVX10 feature name."); + assert((Version == "1" || Version == "2") && "Invalid AVX10 feature name."); assert((Width == "256" || Width == "512") && "Invalid AVX10 feature name."); #endif diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 89fa0ecd45eb4..b17ab24d625a0 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -147,6 +147,8 @@ set(x86_files amxcomplexintrin.h amxfp16intrin.h amxintrin.h + avx10_2_512niintrin.h + avx10_2niintrin.h avx2intrin.h avx512bf16intrin.h avx512bitalgintrin.h diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h new file mode 100644 index 0000000000000..98ed9c72afd0c --- /dev/null +++ b/clang/lib/Headers/avx10_2_512niintrin.h @@ -0,0 +1,35 @@ +/*===---- avx10_2_512niintrin.h - AVX10.2-512 new instruction intrinsics ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2_512niintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512INTRIN_H +#define __AVX10_2_512INTRIN_H + +/* VMPSADBW */ +#define _mm512_mpsadbw_epu8(A, B, imm) \ + ((__m512i)__builtin_ia32_mpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm))) + +#define _mm512_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m512i)__builtin_ia32_selectw_512( \ + (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m512i)__builtin_ia32_selectw_512( \ + (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ + (__v32hi)_mm512_setzero_si512())) + +#endif /* __SSE2__ */ +#endif /* __AVX10_2_512INTRIN_H */ diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h new file mode 100644 index 0000000000000..bbd8eb7609b66 --- /dev/null +++ b/clang/lib/Headers/avx10_2niintrin.h @@ -0,0 +1,83 @@ +/*===---- avx10_2niintrin.h - AVX10.2 new instruction intrinsics -----------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2INTRIN_H +#define __AVX10_2INTRIN_H + +/* VMPSADBW */ +#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ + (__v8hi)(__m128i)(W))) + +#define _mm_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m256i)__builtin_ia32_selectw_256( \ + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m256i)__builtin_ia32_selectw_256( \ + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ + (__v16hi)_mm256_setzero_si256())) + +/* YMM Rounding */ +#define _mm256_add_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_add_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_add_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_add_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_add_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_add_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_add_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_add_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_add_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#endif /* __AVX10_2INTRIN_H */ +#endif /* __SSE2__ */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index cd6cf09b90cad..e0957257ed5c7 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -648,6 +648,14 @@ _storebe_i64(void * __P, long long __D) { #include <avx512vlvp2intersectintrin.h> #endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) +#include <avx10_2niintrin.h> +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__) +#include <avx10_2_512niintrin.h> +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) #include <enqcmdintrin.h> #endif diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 8f9057bbaf259..bf2d2d8ac8f42 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -162,6 +162,9 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_mulps512: case X86::BI__builtin_ia32_subpd512: case X86::BI__builtin_ia32_subps512: + case X86::BI__builtin_ia32_vaddpd256_round: + case X86::BI__builtin_ia32_vaddph256_round: + case X86::BI__builtin_ia32_vaddps256_round: case X86::BI__builtin_ia32_cvtsi2sd64: case X86::BI__builtin_ia32_cvtsi2ss32: case X86::BI__builtin_ia32_cvtsi2ss64: diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c new file mode 100644 index 0000000000000..5983e0d969b68 --- /dev/null +++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s + +#include <immintrin.h> + +// VMPSADBW +__m512i test_mm512_mpsadbw_epu8(__m512i __A, __m512i __B) { +// CHECK-LABEL: @test_mm512_mpsadbw_epu8 +// CHECK: @llvm.x86.avx10.vmpsadbw.512 + return _mm512_mpsadbw_epu8(__A, __B, 17); +} + +__m512i test_mm512_mask_mpsadbw_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { +// CHECK-LABEL: @test_mm512_mask_mpsadbw_epu8 +// CHECK: @llvm.x86.avx10.vmpsadbw.512 +// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_mask_mpsadbw_epu8(__W, __U, __A, __B, 17); +} + +__m512i test_mm512_maskz_mpsadbw_epu8(__mmask32 __U, __m512i __A, __m512i __B) { +// CHECK-LABEL: @test_mm512_maskz_mpsadbw_epu8 +// CHECK: @llvm.x86.avx10.vmpsadbw.512 +// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_maskz_mpsadbw_epu8(__U, __A, __B, 17); +} diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c new file mode 100644 index 0000000000000..c8e4d3c906a72 --- /dev/null +++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c @@ -0,0 +1,105 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s + +#include <immintrin.h> + +// VMPSADBW +__m128i test_mm_mpsadbw_epu8(__m128i __A, __m128i __B) { +// CHECK-LABEL: @test_mm_mpsadbw_epu8 +// CHECK: @llvm.x86.sse41.mpsadbw + return _mm_mpsadbw_epu8(__A, __B, 170); +} + +__m128i test_mm_mask_mpsadbw_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { +// CHECK-LABEL: @test_mm_mask_mpsadbw_epu8 +// CHECK: @llvm.x86.sse41.mpsadbw +// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_mask_mpsadbw_epu8(__W, __U, __A, __B, 170); +} + +__m128i test_mm_maskz_mpsadbw_epu8(__mmask8 __U, __m128i __A, __m128i __B) { +// CHECK-LABEL: @test_mm_maskz_mpsadbw_epu8 +// CHECK: @llvm.x86.sse41.mpsadbw +// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_maskz_mpsadbw_epu8(__U, __A, __B, 170); +} + +__m256i test_mm256_mpsadbw_epu8(__m256i __A, __m256i __B) { +// CHECK-LABEL: @test_mm256_mpsadbw_epu8 +// CHECK: @llvm.x86.avx2.mpsadbw + return _mm256_mpsadbw_epu8(__A, __B, 170); +} + +__m256i test_mm256_mask_mpsadbw_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { +// CHECK-LABEL: @test_mm256_mask_mpsadbw_epu8 +// CHECK: @llvm.x86.avx2.mpsadbw +// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_mask_mpsadbw_epu8(__W, __U, __A, __B, 170); +} + +__m256i test_mm256_maskz_mpsadbw_epu8(__mmask16 __U, __m256i __A, __m256i __B) { +// CHECK-LABEL: @test_mm256_maskz_mpsadbw_epu8 +// CHECK: @llvm.x86.avx2.mpsadbw +// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_maskz_mpsadbw_epu8(__U, __A, __B, 170); +} + +// YMM Rounding +__m256d test_mm256_add_round_pd(__m256d __A, __m256d __B) { +// CHECK-LABEL: @test_mm256_add_round_pd +// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) + return _mm256_add_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m256d test_mm256_mask_add_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { +// CHECK-LABEL: @test_mm256_mask_add_round_pd +// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10) +// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask_add_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +__m256d test_mm256_maskz_add_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { +// CHECK-LABEL: @test_mm256_maskz_add_round_pd +// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9) +// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_maskz_add_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} + +__m256h test_mm256_add_round_ph(__m256h __A, __m256h __B) { +// CHECK-LABEL: @test_mm256_add_round_ph +// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11) + return _mm256_add_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m256h test_mm256_mask_add_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { +// CHECK-LABEL: @test_mm256_mask_add_round_ph +// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10) +// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} + return _mm256_mask_add_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +__m256h test_mm256_maskz_add_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { +// CHECK-LABEL: @test_mm256_maskz_add_round_ph +// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9) +// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} + return _mm256_maskz_add_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} + +__m256 test_mm256_add_round_ps(__m256 __A, __m256 __B) { +// CHECK-LABEL: @test_mm256_add_round_ps +// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) + return _mm256_add_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m256 test_mm256_mask_add_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { +// CHECK-LABEL: @test_mm256_mask_add_round_ps +// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10) +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask_add_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +__m256 test_mm256_maskz_add_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { +// CHECK-LABEL: @test_mm256_maskz_add_round_ps +// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9) +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_maskz_add_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index b1ae6678531b9..593ccffbcda09 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -59,10 +59,10 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {} // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" @@ -76,5 +76,5 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {} // CHECK: "target-cpu"="x86-64-v4" // CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-evex512" +// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-avx10.2-512,-evex512" // CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 7d77ae75f8c47..ddfbb29a48f8d 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -386,6 +386,13 @@ // RUN: %clang --target=i386 -march=i386 -mavx10.1 -mno-avx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-AVX512 %s // RUN: %clang --target=i386 -march=i386 -mavx10.1 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-EVEX512 %s // RUN: %clang --target=i386 -march=i386 -mavx10.1 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-EVEX512 %s +// RUN: %clang --target=i386 -mavx10.2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_256 %s +// RUN: %clang --target=i386 -mavx10.2-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_256 %s +// RUN: %clang --target=i386 -mavx10.2-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_512 %s +// RUN: %clang --target=i386 -mavx10.2-256 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_256,AVX10_1_512 %s +// RUN: %clang --target=i386 -mavx10.2-512 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_512,AVX10_1_256 %s +// AVX10_2_256: "-target-feature" "+avx10.2-256" +// AVX10_2_512: "-target-feature" "+avx10.2-512" // AVX10_1_256: "-target-feature" "+avx10.1-256" // AVX10_1_512: "-target-feature" "+avx10.1-512" // BAD-AVX10: error: unknown argument{{:?}} '-mavx10.{{.*}}' diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 5d510cb4667f4..8b4e6bdc09226 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -712,7 +712,12 @@ // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_256 %s // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-256 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_256 %s // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-256 -mno-avx512f -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_256 %s +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.2 -x c -E -dM -o - %s | FileCheck -check-prefixes=AVX10_1_256,AVX10_2_256 %s +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.2-256 -x c -E -dM -o - %s | FileCheck -check-prefixes=AVX10_1_256,AVX10_2_256 %s +// AVX10_1_256-NOT: __AVX10_1_512__ // AVX10_1_256: #define __AVX10_1__ 1 +// AVX10_2_256-NOT: __AVX10_2_512__ +// AVX10_2_256: #define __AVX10_2__ 1 // AVX10_1_256: #define __AVX512F__ 1 // AVX10_1_256: #define __EVEX256__ 1 // AVX10_1_256-NOT: __EVEX512__ @@ -720,7 +725,11 @@ // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-512 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_512 %s // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-512 -mno-avx512f -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_512 %s // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-512 -mno-evex512 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_512 %s +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.2-512 -x c -E -dM -o - %s | FileCheck -check-prefixes=AVX10_1_512,AVX10_2_512 %s +// AVX10_1_512: #define __AVX10_1_512__ 1 // AVX10_1_512: #define __AVX10_1__ 1 +// AVX10_2_512: #define __AVX10_2_512__ 1 +// AVX10_2_512: #define __AVX10_2__ 1 // AVX10_1_512: #define __AVX512F__ 1 // AVX10_1_512: #define __EVEX256__ 1 // AVX10_1_512: #define __EVEX512__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 551a9bec3b916..2486663956c3f 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -129,6 +129,8 @@ Changes to the X86 Backend generally seen in the wild (Clang never generates them!), so this is not expected to result in real-world compatibility problems. +* Support ISA of ``AVX10.2-256`` and ``AVX10.2-512``. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index b6a92136f3828..515b0d0fcc22c 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -764,7 +764,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : ClangBuiltin<"__builtin_ia32_mpsadbw128">, DefaultAttrsIntrinsic<[llvm_v8i16_ty], - [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; } @@ -4977,6 +4977,34 @@ let TargetPrefix = "x86" in { ImmArg<ArgIndex<4>>]>; } +//===----------------------------------------------------------------------===// +// AVX10.2 intrinsics +let TargetPrefix = "x86" in { + // VMPSADBW + def int_x86_avx10_vmpsadbw_512 : + ClangBuiltin<"__builtin_ia32_mpsadbw512">, + DefaultAttrsIntrinsic<[llvm_v32i16_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + + // YMM Rounding + def int_x86_avx10_vaddpd256 : + ClangBuiltin<"__builtin_ia32_vaddpd256_round">, + DefaultAttrsIntrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_avx10_vaddph256 : + ClangBuiltin<"__builtin_ia32_vaddph256_round">, + DefaultAttrsIntrinsic<[llvm_v16f16_ty], + [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_avx10_vaddps256 : + ClangBuiltin<"__builtin_ia32_vaddps256_round">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; +} + //===----------------------------------------------------------------------===// // SHA intrinsics let TargetPrefix = "x86" in { diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index 5daae45df2f83..5ec8a718d5a3e 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -71,7 +71,8 @@ enum attributeBits { ATTR_EVEXB = 0x1 << 12, ATTR_REX2 = 0x1 << 13, ATTR_EVEXNF = 0x1 << 14, - ATTR_max = 0x1 << 15, + ATTR_EVEXU = 0x1 << 15, + ATTR_max = 0x1 << 16, }; // Combinations of the above attributes that are relevant to instruction @@ -320,7 +321,47 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \ - ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_B_U, 2, "requires EVEX_B and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_XS_B_U, 3, "requires EVEX_B, XS and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_XD_B_U, 3, "requires EVEX_B, XD and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_B_U, 3, \ + "requires EVEX_B, OpSize and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_B_U, 4, "requires EVEX_B, W, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_B_U, 5, "requires EVEX_B, W, XS, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_B_U, 5, "requires EVEX_B, W, XD, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_B_U, 5, \ + "requires EVEX_B, W, OpSize and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_K_B_U, 2, "requires EVEX_B, EVEX_K and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_XS_K_B_U, 3, \ + "requires EVEX_B, EVEX_K, XS and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_XD_K_B_U, 3, \ + "requires EVEX_B, EVEX_K, XD and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_K_B_U, 3, \ + "requires EVEX_B, EVEX_K, OpSize and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_K_B_U, 4, \ + "requires EVEX_B, EVEX_K, W, and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_K_B_U, 5, \ + "requires EVEX_B, EVEX_K, W, XS, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_K_B_U, 5, \ + "requires EVEX_B, EVEX_K, W, XD, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B_U, 5, \ + "requires EVEX_B, EVEX_K, W, OpSize, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_KZ_B_U, 2, "requires EVEX_B, EVEX_KZ and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_XS_KZ_B_U, 3, \ + "requires EVEX_B, EVEX_KZ, XS, and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_XD_KZ_B_U, 3, \ + "requires EVEX_B, EVEX_KZ, XD, and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B_U, 3, \ + "requires EVEX_B, EVEX_KZ, OpSize and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_KZ_B_U, 4, \ + "requires EVEX_B, EVEX_KZ, W and the EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_KZ_B_U, 5, \ + "requires EVEX_B, EVEX_KZ, W, XS, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_KZ_B_U, 5, \ + "requires EVEX_B, EVEX_KZ, W, XD, and EVEX_U prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B_U, 5, \ + "requires EVEX_B, EVEX_KZ, W, OpSize and EVEX_U prefix") #define ENUM_ENTRY(n, r, d) n, enum InstructionContext { INSTRUCTION_CONTEXTS IC_max }; diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 92798cbe4b4c1..5652fb8bde086 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -257,6 +257,8 @@ X86_FEATURE_COMPAT(USERMSR, "usermsr", 0) X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36) X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37) X86_FEATURE (ZU, "zu") +X86_FEATURE_COMPAT(AVX10_2, "avx10.2-256", 0) +X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0) // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 6272e2d270f25..46871e1febd6c 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1219,6 +1219,9 @@ static int getInstructionID(struct InternalInstruction *insn, attrMask |= ATTR_EVEXKZ; if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_EVEXB; + if (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) && + (insn->opcodeType != MAP4)) + attrMask |= ATTR_EVEXU; if (isNF(insn) && !readModRM(insn) && !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa. attrMask |= ATTR_EVEXNF; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index b24b8acce6412..a3af9affa5fd0 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -872,7 +872,10 @@ enum : uint64_t { EVEX_NF = 1ULL << EVEX_NFShift, // TwoConditionalOps - Set if this instruction has two conditional operands TwoConditionalOps_Shift = EVEX_NFShift + 1, - TwoConditionalOps = 1ULL << TwoConditionalOps_Shift + TwoConditionalOps = 1ULL << TwoConditionalOps_Shift, + // EVEX_U - Set if this instruction has EVEX.U field set. + EVEX_UShift = TwoConditionalOps_Shift + 1, + EVEX_U = 1ULL << EVEX_UShift }; /// \returns true if the instruction with given opcode is a prefix. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 6553e1cc4a930..469a385e08527 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -111,9 +111,9 @@ class X86OpcodePrefixHelper { // 0b11: F2 // EVEX (4 bytes) - // +-----+ +---------------+ +--------------------+ +------------------------+ - // | 62h | | RXBR' | B'mmm | | W | vvvv | X' | pp | | z | L'L | b | v' | aaa | - // +-----+ +---------------+ +--------------------+ +------------------------+ + // +-----+ +---------------+ +-------------------+ +------------------------+ + // | 62h | | RXBR' | B'mmm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa | + // +-----+ +---------------+ +-------------------+ +------------------------+ // EVEX_L2/VEX_L (Vector Length): // L2 L @@ -131,7 +131,7 @@ class X86OpcodePrefixHelper { // | RM (VR) | EVEX_X | EVEX_B | modrm.r/m | VR | Dest or Src | // | RM (GPR) | EVEX_B' | EVEX_B | modrm.r/m | GPR | Dest or Src | // | BASE | EVEX_B' | EVEX_B | modrm.r/m | GPR | MA | - // | INDEX | EVEX_X' | EVEX_X | sib.index | GPR | MA | + // | INDEX | EVEX_U | EVEX_X | sib.index | GPR | MA | // | VIDX | EVEX_v' | EVEX_X | sib.index | VR | VSIB MA | // +----------+---------+--------+-----------+---------+--------------+ // @@ -238,6 +238,7 @@ class X86OpcodePrefixHelper { void setZ(bool V) { EVEX_z = V; } void setL2(bool V) { EVEX_L2 = V; } void setEVEX_b(bool V) { EVEX_b = V; } + void setEVEX_U(bool V) { X2 = V; } void setV2(const MCInst &MI, unsigned OpNum, bool HasVEX_4V) { // Only needed with VSIB which don't use VVVV. if (HasVEX_4V) @@ -1052,6 +1053,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, Prefix.setZ(HasEVEX_K && (TSFlags & X86II::EVEX_Z)); Prefix.setEVEX_b(TSFlags & X86II::EVEX_B); + Prefix.setEVEX_U(TSFlags & X86II::EVEX_U); bool EncodeRC = false; uint8_t EVEX_rc = 0; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 9dafd5e628ca8..988966fa6a6c4 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -326,6 +326,12 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", "Support AVX10.1 up to 512-bit instruction", [FeatureAVX10_1, FeatureEVEX512]>; +def FeatureAVX10_2 : SubtargetFeature<"avx10.2-256", "HasAVX10_2", "true", + "Support AVX10.2 up to 256-bit instruction", + [FeatureAVX10_1]>; +def FeatureAVX10_2_512 : SubtargetFeature<"avx10.2-512", "HasAVX10_2_512", "true", + "Support AVX10.2 up to 512-bit instruction", + [FeatureAVX10_2, FeatureAVX10_1_512]>; def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true", "Support extended general purpose register">; def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true", diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 73405397aa6e8..9fafb66ab0b3f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34033,6 +34033,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVTNEPS2BF16) NODE_NAME_CASE(MCVTNEPS2BF16) NODE_NAME_CASE(DPBF16PS) + NODE_NAME_CASE(MPSADBW) NODE_NAME_CASE(LWPINS) NODE_NAME_CASE(MGATHER) NODE_NAME_CASE(MSCATTER) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 362daa98e1f8e..4fd320885d608 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -595,6 +595,8 @@ namespace llvm { VPDPBSSD, VPDPBSSDS, + MPSADBW, + // Compress and expand. COMPRESS, EXPAND, diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td new file mode 100644 index 0000000000000..666667895bc39 --- /dev/null +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -0,0 +1,33 @@ +//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 AVX10 instruction set, defining the +// instructions, and properties of the instructions which are needed for code +// generation, machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +// VMPSADBW +defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW, + avx512vl_i16_info, avx512vl_i8_info, + HasAVX10_2>, + XS, EVEX_CD8<32, CD8VF>; + +// YMM Rounding +multiclass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, + X86SchedWriteSizes sched> { + defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM, + v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; + defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM, + v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; + defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM, + v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; +} + +let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in + defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 31ee288c6f8bb..7a9c164c031d5 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -282,6 +282,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, ExplicitOpPrefix explicitOpPrefix = NoExplicitOpPrefix; bits<2> explicitOpPrefixBits = explicitOpPrefix.Value; + bit hasEVEX_U = 0; // Does this inst set the EVEX_U field? // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; @@ -309,4 +310,5 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{51-50} = explicitOpPrefixBits; let TSFlags{52} = hasEVEX_NF; let TSFlags{53} = hasTwoConditionalOps; + let TSFlags{54} = hasEVEX_U; } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index dff33a469b97a..74596cec5c5ef 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -74,11 +74,11 @@ def X86psadbw : SDNode<"X86ISD::PSADBW", SDTCVecEltisVT<1, i8>, SDTCisSameSizeAs<0,1>, SDTCisSameAs<1,2>]>, [SDNPCommutative]>; -def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", - SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>, - SDTCVecEltisVT<1, i8>, - SDTCisSameSizeAs<0,1>, - SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>>; +def SDTX86PSADBW : SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, i8>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; +def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", SDTX86PSADBW>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -809,6 +809,8 @@ def X86vpdpbsuds : SDNode<"X86ISD::VPDPBSUDS", SDTVnni>; def X86vpdpbuud : SDNode<"X86ISD::VPDPBUUD", SDTVnni>; def X86vpdpbuuds : SDNode<"X86ISD::VPDPBUUDS", SDTVnni>; +def X86Vmpsadbw : SDNode<"X86ISD::MPSADBW", SDTX86PSADBW>; + //===----------------------------------------------------------------------===// // SSE pattern fragments //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 4792784336109..e75d6743f9273 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -63,6 +63,7 @@ include "X86InstrXOP.td" // SSE, MMX and 3DNow! vector support. include "X86InstrSSE.td" include "X86InstrAVX512.td" +include "X86InstrAVX10.td" include "X86InstrMMX.td" include "X86Instr3DNow.td" diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index f6038cf7a94cb..a815ddc9714f0 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -71,6 +71,9 @@ def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; def HasEVEX512 : Predicate<"Subtarget->hasEVEX512()">; def HasAVX10_1 : Predicate<"Subtarget->hasAVX10_1()">; def HasAVX10_1_512 : Predicate<"Subtarget->hasAVX10_1_512()">; +def HasAVX10_2 : Predicate<"Subtarget->hasAVX10_2()">; +def HasAVX10_2_512 : Predicate<"Subtarget->hasAVX10_2_512()">; +def NoAVX10_2 : Predicate<"!Subtarget->hasAVX10_2()">; def HasAVX512 : Predicate<"Subtarget->hasAVX512()">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index bc15085f6c7b7..2fc3b6aa98858 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6115,11 +6115,11 @@ def BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{ return getI8Imm(NewImm ^ 0xf, SDLoc(N)); }]>; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoAVX10_2] in { let isCommutable = 0 in { - defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - VR128, load, i128mem, 0, - SchedWriteMPSAD.XMM>, VEX, VVVV, WIG; + defm VMPSADBW : SS41I_binop_rmi<0x42, "vmpsadbw", X86Vmpsadbw, + v8i16, VR128, load, i128mem, 0, + SchedWriteMPSAD.XMM>, VEX, VVVV, WIG; } let Uses = [MXCSR], mayRaiseFPException = 1 in { @@ -6138,19 +6138,19 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { } } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoAVX10_2] in { let isCommutable = 0 in { - defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, - VR256, load, i256mem, 0, - SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG; + defm VMPSADBWY : SS41I_binop_rmi<0x42, "vmpsadbw", X86Vmpsadbw, + v16i16, VR256, load, i256mem, 0, + SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG; } } let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memop, i128mem, 1, - SchedWriteMPSAD.XMM>; + defm MPSADBW : SS41I_binop_rmi<0x42, "mpsadbw", X86Vmpsadbw, + v8i16, VR128, memop, i128mem, 1, + SchedWriteMPSAD.XMM>; } let ExeDomain = SSEPackedSingle in diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 685daca360e08..000138e1837af 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -388,6 +388,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx10_vaddpd256, INTR_TYPE_2OP, ISD::FADD, + X86ISD::FADD_RND), + X86_INTRINSIC_DATA(avx10_vaddph256, INTR_TYPE_2OP, ISD::FADD, + X86ISD::FADD_RND), + X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD, + X86ISD::FADD_RND), + X86_INTRINSIC_DATA(avx10_vmpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, + 0), + X86_INTRINSIC_DATA(avx2_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0), X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), @@ -1663,6 +1672,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse41_blendvpd, BLENDV, X86ISD::BLENDV, 0), X86_INTRINSIC_DATA(sse41_blendvps, BLENDV, X86ISD::BLENDV, 0), X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0), + X86_INTRINSIC_DATA(sse41_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0), X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(sse41_pblendvb, BLENDV, X86ISD::BLENDV, 0), X86_INTRINSIC_DATA(sse41_phminposuw, INTR_TYPE_1OP, X86ISD::PHMINPOS, 0), diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 68aed69ee574b..986b9a211ce6c 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1819,7 +1819,7 @@ const StringMap<bool> sys::getHostCPUFeatures() { Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); - Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); + bool HasAVX10 = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); bool HasAPXF = HasLeaf7Subleaf1 && ((EDX >> 21) & 1); Features["egpr"] = HasAPXF; Features["push2pop2"] = HasAPXF; @@ -1849,8 +1849,13 @@ const StringMap<bool> sys::getHostCPUFeatures() { bool HasLeaf24 = MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); - Features["avx10.1-512"] = - Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1); + + int AVX10Ver = HasLeaf24 && (EBX & 0xff); + int Has512Len = HasLeaf24 && ((EBX >> 18) & 1); + Features["avx10.1-256"] = HasAVX10 && AVX10Ver >= 1; + Features["avx10.1-512"] = HasAVX10 && AVX10Ver >= 1 && Has512Len; + Features["avx10.2-256"] = HasAVX10 && AVX10Ver >= 2; + Features["avx10.2-512"] = HasAVX10 && AVX10Ver >= 2 && Has512Len; return Features; } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index dcf9130052ac1..57bda0651ea82 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -620,6 +620,9 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 = FeatureAVX512FP16; constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1 | FeatureEVEX512; +constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1; +constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = + FeatureAVX10_2 | FeatureAVX10_1_512; // APX Features constexpr FeatureBitset ImpliedFeaturesEGPR = {}; diff --git a/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll new file mode 100644 index 0000000000000..bafa52a2a83ae --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefix=X64 + +; VMPSADBW + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_mm512_mask_mpsadbw(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { +; X86-LABEL: test_mm512_mask_mpsadbw: +; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02] +; X86-NEXT: vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03] +; X86-NEXT: vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X86-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mm512_mask_mpsadbw: +; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02] +; X64-NEXT: vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03] +; X64-NEXT: vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X64-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc] +; X64-NEXT: retq # encoding: [0xc3] + %msk = bitcast i32 %x4 to <32 x i1> + %rs1 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 2) + %ad2 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 3) + %rs2 = select <32 x i1> %msk, <32 x i16> %ad2, <32 x i16> %x3 + %ad3 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 4) + %rs3 = select <32 x i1> %msk, <32 x i16> %ad3, <32 x i16> zeroinitializer + %rs4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } undef, <32 x i16> %rs1, 0 + %rs5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs4, <32 x i16> %rs2, 1 + %rs6 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs5, <32 x i16> %rs3, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %rs6 +} + +declare <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8>, <64 x i8>, i8) diff --git a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll new file mode 100644 index 0000000000000..4080546c0c543 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 + +; VMPSADBW + +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_mask_mpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { +; X86-LABEL: test_mask_mpsadbw_128: +; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmpsadbw $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x42,0xd9,0x02] +; X86-NEXT: vmpsadbw $3, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x09,0x42,0xe1,0x03] +; X86-NEXT: vmpsadbw $4, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0x89,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X86-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_mpsadbw_128: +; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmpsadbw $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x42,0xd9,0x02] +; X64-NEXT: vmpsadbw $3, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x09,0x42,0xe1,0x03] +; X64-NEXT: vmpsadbw $4, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0x89,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X64-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc] +; X64-NEXT: retq # encoding: [0xc3] + %msk = bitcast i8 %x4 to <8 x i1> + %rs1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %x0, <16 x i8> %x1, i8 2) + %ad2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %x0, <16 x i8> %x1, i8 3) + %rs2 = select <8 x i1> %msk, <8 x i16> %ad2, <8 x i16> %x3 + %ad3 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %x0, <16 x i8> %x1, i8 4) + %rs3 = select <8 x i1> %msk, <8 x i16> %ad3, <8 x i16> zeroinitializer + %rs4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } undef, <8 x i16> %rs1, 0 + %rs5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %rs4, <8 x i16> %rs2, 1 + %rs6 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %rs5, <8 x i16> %rs3, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %rs6 +} + +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_mask_mpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { +; X86-LABEL: test_mask_mpsadbw_256: +; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vmpsadbw $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x42,0xd9,0x02] +; X86-NEXT: vmpsadbw $3, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x29,0x42,0xe1,0x03] +; X86-NEXT: vmpsadbw $4, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xa9,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X86-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_mpsadbw_256: +; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vmpsadbw $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x42,0xd9,0x02] +; X64-NEXT: vmpsadbw $3, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x29,0x42,0xe1,0x03] +; X64-NEXT: vmpsadbw $4, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xa9,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X64-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc] +; X64-NEXT: retq # encoding: [0xc3] + %msk = bitcast i16 %x4 to <16 x i1> + %rs1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %x0, <32 x i8> %x1, i8 2) + %ad2 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %x0, <32 x i8> %x1, i8 3) + %rs2 = select <16 x i1> %msk, <16 x i16> %ad2, <16 x i16> %x3 + %ad3 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %x0, <32 x i8> %x1, i8 4) + %rs3 = select <16 x i1> %msk, <16 x i16> %ad3, <16 x i16> zeroinitializer + %rs4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } undef, <16 x i16> %rs1, 0 + %rs5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %rs4, <16 x i16> %rs2, 1 + %rs6 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %rs5, <16 x i16> %rs3, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %rs6 +} + +declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) +declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) + +; YMM Rounding + +declare <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double>, <4 x double>, i32) +define <4 x double> @test_int_x86_vaddpd256(<4 x double> %A, <4 x double> %B) nounwind { +; CHECK-LABEL: test_int_x86_vaddpd256: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double> %A, <4 x double> %B, i32 11) + ret <4 x double> %ret +} + +define <4 x double> @test_int_x86_mask_vaddpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind { +; X86-LABEL: test_int_x86_mask_vaddpd256: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x58,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_mask_vaddpd256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x58,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %ret0 = call <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double> %C, <4 x double> %D, i32 10) + %msk = bitcast i4 %B to <4 x i1> + %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A + ret <4 x double> %ret +} + +define <4 x double> @test_int_x86_maskz_vaddpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind { +; X86-LABEL: test_int_x86_maskz_vaddpd256: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x58,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_maskz_vaddpd256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x58,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %ret0 = call <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double> %B, <4 x double> %C, i32 9) + %msk = bitcast i4 %A to <4 x i1> + %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer + ret <4 x double> %ret +} + +declare <16 x half> @llvm.x86.avx10.vaddph256(<16 x half>, <16 x half>, i32) +define <16 x half> @test_int_x86_vaddph256(<16 x half> %A, <16 x half> %B) nounwind { +; CHECK-LABEL: test_int_x86_vaddph256: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddph {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x half> @llvm.x86.avx10.vaddph256(<16 x half> %A, <16 x half> %B, i32 11) + ret <16 x half> %ret +} + +define <16 x half> @test_int_x86_mask_vaddph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind { +; X86-LABEL: test_int_x86_mask_vaddph256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x58,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_mask_vaddph256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x58,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %ret0 = call <16 x half> @llvm.x86.avx10.vaddph256(<16 x half> %C, <16 x half> %D, i32 10) + %msk = bitcast i16 %B to <16 x i1> + %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A + ret <16 x half> %ret +} + +define <16 x half> @test_int_x86_maskz_vaddph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind { +; X86-LABEL: test_int_x86_maskz_vaddph256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x58,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_maskz_vaddph256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x58,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %ret0 = call <16 x half> @llvm.x86.avx10.vaddph256(<16 x half> %B, <16 x half> %C, i32 9) + %msk = bitcast i16 %A to <16 x i1> + %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer + ret <16 x half> %ret +} + +declare <8 x float> @llvm.x86.avx10.vaddps256(<8 x float>, <8 x float>, i32) +define <8 x float> @test_int_x86_vaddps256(<8 x float> %A, <8 x float> %B) nounwind { +; CHECK-LABEL: test_int_x86_vaddps256: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.avx10.vaddps256(<8 x float> %A, <8 x float> %B, i32 11) + ret <8 x float> %ret +} + +define <8 x float> @test_int_x86_mask_vaddps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind { +; X86-LABEL: test_int_x86_mask_vaddps256: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x58,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_mask_vaddps256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x58,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %ret0 = call <8 x float> @llvm.x86.avx10.vaddps256(<8 x float> %C, <8 x float> %D, i32 10) + %msk = bitcast i8 %B to <8 x i1> + %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A + ret <8 x float> %ret +} + +define <8 x float> @test_int_x86_maskz_vaddps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind { +; X86-LABEL: test_int_x86_maskz_vaddps256: +; X86: # %bb.0: +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vaddps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x58,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_maskz_vaddps256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vaddps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x58,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %ret0 = call <8 x float> @llvm.x86.avx10.vaddps256(<8 x float> %B, <8 x float> %C, i32 9) + %msk = bitcast i8 %A to <8 x i1> + %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer + ret <8 x float> %ret +} diff --git a/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt b/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt new file mode 100644 index 0000000000000..59457e6eec293 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt @@ -0,0 +1,150 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# VMPSADBW + +# ATT: vmpsadbw $123, %xmm4, %xmm3, %xmm2 +# INTEL: vmpsadbw xmm2, xmm3, xmm4, 123 +0xc4,0xe3,0x61,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %xmm4, %xmm3, %xmm2 {%k7} +# INTEL: vmpsadbw xmm2 {k7}, xmm3, xmm4, 123 +0x62,0xf3,0x66,0x0f,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %xmm4, %xmm3, %xmm2 {%k7} {z} +# INTEL: vmpsadbw xmm2 {k7} {z}, xmm3, xmm4, 123 +0x62,0xf3,0x66,0x8f,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %ymm4, %ymm3, %ymm2 +# INTEL: vmpsadbw ymm2, ymm3, ymm4, 123 +0xc4,0xe3,0x65,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vmpsadbw ymm2 {k7}, ymm3, ymm4, 123 +0x62,0xf3,0x66,0x2f,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vmpsadbw ymm2 {k7} {z}, ymm3, ymm4, 123 +0x62,0xf3,0x66,0xaf,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %zmm4, %zmm3, %zmm2 +# INTEL: vmpsadbw zmm2, zmm3, zmm4, 123 +0x62,0xf3,0x66,0x48,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %zmm4, %zmm3, %zmm2 {%k7} +# INTEL: vmpsadbw zmm2 {k7}, zmm3, zmm4, 123 +0x62,0xf3,0x66,0x4f,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, %zmm4, %zmm3, %zmm2 {%k7} {z} +# INTEL: vmpsadbw zmm2 {k7} {z}, zmm3, zmm4, 123 +0x62,0xf3,0x66,0xcf,0x42,0xd4,0x7b + +# ATT: vmpsadbw $123, 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vmpsadbw xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123 +0xc4,0xe3,0x61,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vmpsadbw $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7} +# INTEL: vmpsadbw xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x66,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, (%eax), %xmm3, %xmm2 +# INTEL: vmpsadbw xmm2, xmm3, xmmword ptr [eax], 123 +0xc4,0xe3,0x61,0x42,0x10,0x7b + +# ATT: vmpsadbw $123, -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vmpsadbw xmm2, xmm3, xmmword ptr [2*ebp - 512], 123 +0xc4,0xe3,0x61,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vmpsadbw $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123 +0x62,0xf3,0x66,0x8f,0x42,0x51,0x7f,0x7b + +# ATT: vmpsadbw $123, -2048(%edx), %xmm3, %xmm2 {%k7} {z} +# INTEL: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [edx - 2048], 123 +0x62,0xf3,0x66,0x8f,0x42,0x52,0x80,0x7b + +# ATT: vmpsadbw $123, 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vmpsadbw ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123 +0xc4,0xe3,0x65,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vmpsadbw $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7} +# INTEL: vmpsadbw ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x66,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, (%eax), %ymm3, %ymm2 +# INTEL: vmpsadbw ymm2, ymm3, ymmword ptr [eax], 123 +0xc4,0xe3,0x65,0x42,0x10,0x7b + +# ATT: vmpsadbw $123, -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vmpsadbw ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123 +0xc4,0xe3,0x65,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vmpsadbw $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123 +0x62,0xf3,0x66,0xaf,0x42,0x51,0x7f,0x7b + +# ATT: vmpsadbw $123, -4096(%edx), %ymm3, %ymm2 {%k7} {z} +# INTEL: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [edx - 4096], 123 +0x62,0xf3,0x66,0xaf,0x42,0x52,0x80,0x7b + +# ATT: vmpsadbw $123, 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vmpsadbw zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123 +0x62,0xf3,0x66,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b + +# ATT: vmpsadbw $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7} +# INTEL: vmpsadbw zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123 +0x62,0xf3,0x66,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, (%eax), %zmm3, %zmm2 +# INTEL: vmpsadbw zmm2, zmm3, zmmword ptr [eax], 123 +0x62,0xf3,0x66,0x48,0x42,0x10,0x7b + +# ATT: vmpsadbw $123, -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vmpsadbw zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123 +0x62,0xf3,0x66,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vmpsadbw $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123 +0x62,0xf3,0x66,0xcf,0x42,0x51,0x7f,0x7b + +# ATT: vmpsadbw $123, -8192(%edx), %zmm3, %zmm2 {%k7} {z} +# INTEL: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [edx - 8192], 123 +0x62,0xf3,0x66,0xcf,0x42,0x52,0x80,0x7b + +# YMM Rounding + +# ATT: vaddpd {rn-sae}, %ymm4, %ymm3, %ymm2 +# INTEL: vaddpd ymm2, ymm3, ymm4, {rn-sae} +0x62,0xf1,0xe1,0x18,0x58,0xd4 + +# ATT: vaddpd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vaddpd ymm2 {k7}, ymm3, ymm4, {rd-sae} +0x62,0xf1,0xe1,0x3f,0x58,0xd4 + +# ATT: vaddpd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vaddpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} +0x62,0xf1,0xe1,0xff,0x58,0xd4 + +# ATT: vaddph {rn-sae}, %ymm4, %ymm3, %ymm2 +# INTEL: vaddph ymm2, ymm3, ymm4, {rn-sae} +0x62,0xf5,0x60,0x18,0x58,0xd4 + +# ATT: vaddph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vaddph ymm2 {k7}, ymm3, ymm4, {rd-sae} +0x62,0xf5,0x60,0x3f,0x58,0xd4 + +# ATT: vaddph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vaddph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} +0x62,0xf5,0x60,0xff,0x58,0xd4 + +# ATT: vaddps {rn-sae}, %ymm4, %ymm3, %ymm2 +# INTEL: vaddps ymm2, ymm3, ymm4, {rn-sae} +0x62,0xf1,0x60,0x18,0x58,0xd4 + +# ATT: vaddps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7} +# INTEL: vaddps ymm2 {k7}, ymm3, ymm4, {rd-sae} +0x62,0xf1,0x60,0x3f,0x58,0xd4 + +# ATT: vaddps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z} +# INTEL: vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} +0x62,0xf1,0x60,0xff,0x58,0xd4 diff --git a/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt b/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt new file mode 100644 index 0000000000000..34f8851d04d6b --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt @@ -0,0 +1,150 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# VMPSADBW + +# ATT: vmpsadbw $123, %xmm24, %xmm23, %xmm22 +# INTEL: vmpsadbw xmm22, xmm23, xmm24, 123 +0x62,0x83,0x46,0x00,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} +# INTEL: vmpsadbw xmm22 {k7}, xmm23, xmm24, 123 +0x62,0x83,0x46,0x07,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} {z} +# INTEL: vmpsadbw xmm22 {k7} {z}, xmm23, xmm24, 123 +0x62,0x83,0x46,0x87,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %ymm24, %ymm23, %ymm22 +# INTEL: vmpsadbw ymm22, ymm23, ymm24, 123 +0x62,0x83,0x46,0x20,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vmpsadbw ymm22 {k7}, ymm23, ymm24, 123 +0x62,0x83,0x46,0x27,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vmpsadbw ymm22 {k7} {z}, ymm23, ymm24, 123 +0x62,0x83,0x46,0xa7,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %zmm24, %zmm23, %zmm22 +# INTEL: vmpsadbw zmm22, zmm23, zmm24, 123 +0x62,0x83,0x46,0x40,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} +# INTEL: vmpsadbw zmm22 {k7}, zmm23, zmm24, 123 +0x62,0x83,0x46,0x47,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} {z} +# INTEL: vmpsadbw zmm22 {k7} {z}, zmm23, zmm24, 123 +0x62,0x83,0x46,0xc7,0x42,0xf0,0x7b + +# ATT: vmpsadbw $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vmpsadbw xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x46,0x00,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vmpsadbw $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +# INTEL: vmpsadbw xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x46,0x07,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, (%rip), %xmm23, %xmm22 +# INTEL: vmpsadbw xmm22, xmm23, xmmword ptr [rip], 123 +0x62,0xe3,0x46,0x00,0x42,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vmpsadbw xmm22, xmm23, xmmword ptr [2*rbp - 512], 123 +0x62,0xe3,0x46,0x00,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b + +# ATT: vmpsadbw $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vmpsadbw xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123 +0x62,0xe3,0x46,0x87,0x42,0x71,0x7f,0x7b + +# ATT: vmpsadbw $123, -2048(%rdx), %xmm23, %xmm22 {%k7} {z} +# INTEL: vmpsadbw xmm22 {k7} {z}, xmm23, xmmword ptr [rdx - 2048], 123 +0x62,0xe3,0x46,0x87,0x42,0x72,0x80,0x7b + +# ATT: vmpsadbw $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vmpsadbw ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x46,0x20,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vmpsadbw $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +# INTEL: vmpsadbw ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x46,0x27,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, (%rip), %ymm23, %ymm22 +# INTEL: vmpsadbw ymm22, ymm23, ymmword ptr [rip], 123 +0x62,0xe3,0x46,0x20,0x42,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vmpsadbw ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123 +0x62,0xe3,0x46,0x20,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b + +# ATT: vmpsadbw $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vmpsadbw ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123 +0x62,0xe3,0x46,0xa7,0x42,0x71,0x7f,0x7b + +# ATT: vmpsadbw $123, -4096(%rdx), %ymm23, %ymm22 {%k7} {z} +# INTEL: vmpsadbw ymm22 {k7} {z}, ymm23, ymmword ptr [rdx - 4096], 123 +0x62,0xe3,0x46,0xa7,0x42,0x72,0x80,0x7b + +# ATT: vmpsadbw $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vmpsadbw zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123 +0x62,0xa3,0x46,0x40,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b + +# ATT: vmpsadbw $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +# INTEL: vmpsadbw zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123 +0x62,0xc3,0x46,0x47,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, (%rip), %zmm23, %zmm22 +# INTEL: vmpsadbw zmm22, zmm23, zmmword ptr [rip], 123 +0x62,0xe3,0x46,0x40,0x42,0x35,0x00,0x00,0x00,0x00,0x7b + +# ATT: vmpsadbw $123, -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vmpsadbw zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123 +0x62,0xe3,0x46,0x40,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b + +# ATT: vmpsadbw $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vmpsadbw zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123 +0x62,0xe3,0x46,0xc7,0x42,0x71,0x7f,0x7b + +# ATT: vmpsadbw $123, -8192(%rdx), %zmm23, %zmm22 {%k7} {z} +# INTEL: vmpsadbw zmm22 {k7} {z}, zmm23, zmmword ptr [rdx - 8192], 123 +0x62,0xe3,0x46,0xc7,0x42,0x72,0x80,0x7b + +# YMM Rounding + +# ATT: vaddpd {rn-sae}, %ymm24, %ymm23, %ymm22 +# INTEL: vaddpd ymm22, ymm23, ymm24, {rn-sae} +0x62,0x81,0xc1,0x10,0x58,0xf0 + +# ATT: vaddpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vaddpd ymm22 {k7}, ymm23, ymm24, {rd-sae} +0x62,0x81,0xc1,0x37,0x58,0xf0 + +# ATT: vaddpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vaddpd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae} +0x62,0x81,0xc1,0xf7,0x58,0xf0 + +# ATT: vaddph {rn-sae}, %ymm24, %ymm23, %ymm22 +# INTEL: vaddph ymm22, ymm23, ymm24, {rn-sae} +0x62,0x85,0x40,0x10,0x58,0xf0 + +# ATT: vaddph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vaddph ymm22 {k7}, ymm23, ymm24, {rd-sae} +0x62,0x85,0x40,0x37,0x58,0xf0 + +# ATT: vaddph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vaddph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae} +0x62,0x85,0x40,0xf7,0x58,0xf0 + +# ATT: vaddps {rn-sae}, %ymm24, %ymm23, %ymm22 +# INTEL: vaddps ymm22, ymm23, ymm24, {rn-sae} +0x62,0x81,0x40,0x10,0x58,0xf0 + +# ATT: vaddps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} +# INTEL: vaddps ymm22 {k7}, ymm23, ymm24, {rd-sae} +0x62,0x81,0x40,0x37,0x58,0xf0 + +# ATT: vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} +# INTEL: vaddps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae} +0x62,0x81,0x40,0xf7,0x58,0xf0 diff --git a/llvm/test/MC/X86/avx10_2ni-32-intel.s b/llvm/test/MC/X86/avx10_2ni-32-intel.s new file mode 100644 index 0000000000000..ea9a89f316cc3 --- /dev/null +++ b/llvm/test/MC/X86/avx10_2ni-32-intel.s @@ -0,0 +1,149 @@ +// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// VMPSADBW + +// CHECK: vmpsadbw xmm2, xmm3, xmm4, 123 +// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0xd4,0x7b] + vmpsadbw xmm2, xmm3, xmm4, 123 + +// CHECK: vmpsadbw xmm2 {k7}, xmm3, xmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x0f,0x42,0xd4,0x7b] + vmpsadbw xmm2 {k7}, xmm3, xmm4, 123 + +// CHECK: vmpsadbw xmm2 {k7} {z}, xmm3, xmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x8f,0x42,0xd4,0x7b] + vmpsadbw xmm2 {k7} {z}, xmm3, xmm4, 123 + +// CHECK: vmpsadbw ymm2, ymm3, ymm4, 123 +// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0xd4,0x7b] + vmpsadbw ymm2, ymm3, ymm4, 123 + +// CHECK: vmpsadbw ymm2 {k7}, ymm3, ymm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x2f,0x42,0xd4,0x7b] + vmpsadbw ymm2 {k7}, ymm3, ymm4, 123 + +// CHECK: vmpsadbw ymm2 {k7} {z}, ymm3, ymm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0xaf,0x42,0xd4,0x7b] + vmpsadbw ymm2 {k7} {z}, ymm3, ymm4, 123 + +// CHECK: vmpsadbw zmm2, zmm3, zmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0xd4,0x7b] + vmpsadbw zmm2, zmm3, zmm4, 123 + +// CHECK: vmpsadbw zmm2 {k7}, zmm3, zmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x4f,0x42,0xd4,0x7b] + vmpsadbw zmm2 {k7}, zmm3, zmm4, 123 + +// CHECK: vmpsadbw zmm2 {k7} {z}, zmm3, zmm4, 123 +// CHECK: encoding: [0x62,0xf3,0x66,0xcf,0x42,0xd4,0x7b] + vmpsadbw zmm2 {k7} {z}, zmm3, zmm4, 123 + +// CHECK: vmpsadbw xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vmpsadbw xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vmpsadbw xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vmpsadbw xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vmpsadbw xmm2, xmm3, xmmword ptr [eax], 123 +// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0x10,0x7b] + vmpsadbw xmm2, xmm3, xmmword ptr [eax], 123 + +// CHECK: vmpsadbw xmm2, xmm3, xmmword ptr [2*ebp - 512], 123 +// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vmpsadbw xmm2, xmm3, xmmword ptr [2*ebp - 512], 123 + +// CHECK: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x8f,0x42,0x51,0x7f,0x7b] + vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123 + +// CHECK: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [edx - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x8f,0x42,0x52,0x80,0x7b] + vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [edx - 2048], 123 + +// CHECK: vmpsadbw ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vmpsadbw ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vmpsadbw ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vmpsadbw ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vmpsadbw ymm2, ymm3, ymmword ptr [eax], 123 +// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0x10,0x7b] + vmpsadbw ymm2, ymm3, ymmword ptr [eax], 123 + +// CHECK: vmpsadbw ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123 +// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vmpsadbw ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123 + +// CHECK: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0xaf,0x42,0x51,0x7f,0x7b] + vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123 + +// CHECK: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [edx - 4096], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0xaf,0x42,0x52,0x80,0x7b] + vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [edx - 4096], 123 + +// CHECK: vmpsadbw zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b] + vmpsadbw zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123 + +// CHECK: vmpsadbw zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b] + vmpsadbw zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123 + +// CHECK: vmpsadbw zmm2, zmm3, zmmword ptr [eax], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0x10,0x7b] + vmpsadbw zmm2, zmm3, zmmword ptr [eax], 123 + +// CHECK: vmpsadbw zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vmpsadbw zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123 + +// CHECK: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0xcf,0x42,0x51,0x7f,0x7b] + vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123 + +// CHECK: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [edx - 8192], 123 +// CHECK: encoding: [0x62,0xf3,0x66,0xcf,0x42,0x52,0x80,0x7b] + vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [edx - 8192], 123 + +// YMM Rounding + +// CHECK: vaddpd ymm2, ymm3, ymm4, {rn-sae} +// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x58,0xd4] + vaddpd ymm2, ymm3, ymm4, {rn-sae} + +// CHECK: vaddpd ymm2 {k7}, ymm3, ymm4, {rd-sae} +// CHECK: encoding: [0x62,0xf1,0xe1,0x3f,0x58,0xd4] + vaddpd ymm2 {k7}, ymm3, ymm4, {rd-sae} + +// CHECK: vaddpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} +// CHECK: encoding: [0x62,0xf1,0xe1,0xff,0x58,0xd4] + vaddpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} + +// CHECK: vaddph ymm2, ymm3, ymm4, {rn-sae} +// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x58,0xd4] + vaddph ymm2, ymm3, ymm4, {rn-sae} + +// CHECK: vaddph ymm2 {k7}, ymm3, ymm4, {rd-sae} +// CHECK: encoding: [0x62,0xf5,0x60,0x3f,0x58,0xd4] + vaddph ymm2 {k7}, ymm3, ymm4, {rd-sae} + +// CHECK: vaddph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} +// CHECK: encoding: [0x62,0xf5,0x60,0xff,0x58,0xd4] + vaddph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} + +// CHECK: vaddps ymm2, ymm3, ymm4, {rn-sae} +// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x58,0xd4] + vaddps ymm2, ymm3, ymm4, {rn-sae} + +// CHECK: vaddps ymm2 {k7}, ymm3, ymm4, {rd-sae} +// CHECK: encoding: [0x62,0xf1,0x60,0x3f,0x58,0xd4] + vaddps ymm2 {k7}, ymm3, ymm4, {rd-sae} + +// CHECK: vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} +// CHECK: encoding: [0x62,0xf1,0x60,0xff,0x58,0xd4] + vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae} diff --git a/llvm/test/MC/X86/avx10_2ni-64-att.s b/llvm/test/MC/X86/avx10_2ni-64-att.s new file mode 100644 index 0000000000000..8ee4bc3f64127 --- /dev/null +++ b/llvm/test/MC/X86/avx10_2ni-64-att.s @@ -0,0 +1,149 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// VMPSADBW + +// CHECK: vmpsadbw $123, %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x83,0x46,0x00,0x42,0xf0,0x7b] + vmpsadbw $123, %xmm24, %xmm23, %xmm22 + +// CHECK: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0x83,0x46,0x07,0x42,0xf0,0x7b] + vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} + +// CHECK: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x83,0x46,0x87,0x42,0xf0,0x7b] + vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmpsadbw $123, %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x83,0x46,0x20,0x42,0xf0,0x7b] + vmpsadbw $123, %ymm24, %ymm23, %ymm22 + +// CHECK: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x83,0x46,0x27,0x42,0xf0,0x7b] + vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x83,0x46,0xa7,0x42,0xf0,0x7b] + vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmpsadbw $123, %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x83,0x46,0x40,0x42,0xf0,0x7b] + vmpsadbw $123, %zmm24, %zmm23, %zmm22 + +// CHECK: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0x83,0x46,0x47,0x42,0xf0,0x7b] + vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} + +// CHECK: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0x83,0x46,0xc7,0x42,0xf0,0x7b] + vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmpsadbw $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa3,0x46,0x00,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vmpsadbw $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vmpsadbw $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x46,0x07,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vmpsadbw $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7} + +// CHECK: vmpsadbw $123, (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x46,0x00,0x42,0x35,0x00,0x00,0x00,0x00,0x7b] + vmpsadbw $123, (%rip), %xmm23, %xmm22 + +// CHECK: vmpsadbw $123, -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x46,0x00,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b] + vmpsadbw $123, -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vmpsadbw $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x46,0x87,0x42,0x71,0x7f,0x7b] + vmpsadbw $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmpsadbw $123, -2048(%rdx), %xmm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x46,0x87,0x42,0x72,0x80,0x7b] + vmpsadbw $123, -2048(%rdx), %xmm23, %xmm22 {%k7} {z} + +// CHECK: vmpsadbw $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa3,0x46,0x20,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vmpsadbw $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vmpsadbw $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x46,0x27,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vmpsadbw $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7} + +// CHECK: vmpsadbw $123, (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe3,0x46,0x20,0x42,0x35,0x00,0x00,0x00,0x00,0x7b] + vmpsadbw $123, (%rip), %ymm23, %ymm22 + +// CHECK: vmpsadbw $123, -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe3,0x46,0x20,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b] + vmpsadbw $123, -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vmpsadbw $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x46,0xa7,0x42,0x71,0x7f,0x7b] + vmpsadbw $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmpsadbw $123, -4096(%rdx), %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x46,0xa7,0x42,0x72,0x80,0x7b] + vmpsadbw $123, -4096(%rdx), %ymm23, %ymm22 {%k7} {z} + +// CHECK: vmpsadbw $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa3,0x46,0x40,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b] + vmpsadbw $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vmpsadbw $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc3,0x46,0x47,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b] + vmpsadbw $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7} + +// CHECK: vmpsadbw $123, (%rip), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe3,0x46,0x40,0x42,0x35,0x00,0x00,0x00,0x00,0x7b] + vmpsadbw $123, (%rip), %zmm23, %zmm22 + +// CHECK: vmpsadbw $123, -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe3,0x46,0x40,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b] + vmpsadbw $123, -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vmpsadbw $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x46,0xc7,0x42,0x71,0x7f,0x7b] + vmpsadbw $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z} + +// CHECK: vmpsadbw $123, -8192(%rdx), %zmm23, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe3,0x46,0xc7,0x42,0x72,0x80,0x7b] + vmpsadbw $123, -8192(%rdx), %zmm23, %zmm22 {%k7} {z} + +// YMM Rounding + +// CHECK: vaddpd {rn-sae}, %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x58,0xf0] + vaddpd {rn-sae}, %ymm24, %ymm23, %ymm22 + +// CHECK: vaddpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x81,0xc1,0x37,0x58,0xf0] + vaddpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vaddpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x81,0xc1,0xf7,0x58,0xf0] + vaddpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vaddph {rn-sae}, %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x85,0x40,0x10,0x58,0xf0] + vaddph {rn-sae}, %ymm24, %ymm23, %ymm22 + +// CHECK: vaddph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x85,0x40,0x37,0x58,0xf0] + vaddph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vaddph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x85,0x40,0xf7,0x58,0xf0] + vaddph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} + +// CHECK: vaddps {rn-sae}, %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x81,0x40,0x10,0x58,0xf0] + vaddps {rn-sae}, %ymm24, %ymm23, %ymm22 + +// CHECK: vaddps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} +// CHECK: encoding: [0x62,0x81,0x40,0x37,0x58,0xf0] + vaddps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7} + +// CHECK: vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0x81,0x40,0xf7,0x58,0xf0] + vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z} diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 4a52a58f2de1c..f31c4baada141 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -2889,6 +2889,9 @@ static const X86FoldTableEntry Table2[] = { {X86::VMOVUPSZ256rrkz, X86::VMOVUPSZ256rmkz, TB_NO_REVERSE}, {X86::VMOVUPSZrrkz, X86::VMOVUPSZrmkz, TB_NO_REVERSE}, {X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0}, + {X86::VMPSADBWZ128rri, X86::VMPSADBWZ128rmi, 0}, + {X86::VMPSADBWZ256rri, X86::VMPSADBWZ256rmi, 0}, + {X86::VMPSADBWZrri, X86::VMPSADBWZrmi, 0}, {X86::VMPSADBWrri, X86::VMPSADBWrmi, 0}, {X86::VMULPDYrr, X86::VMULPDYrm, 0}, {X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0}, @@ -4709,6 +4712,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMOVUPSZ128rrk, X86::VMOVUPSZ128rmk, TB_NO_REVERSE}, {X86::VMOVUPSZ256rrk, X86::VMOVUPSZ256rmk, TB_NO_REVERSE}, {X86::VMOVUPSZrrk, X86::VMOVUPSZrmk, TB_NO_REVERSE}, + {X86::VMPSADBWZ128rrikz, X86::VMPSADBWZ128rmikz, 0}, + {X86::VMPSADBWZ256rrikz, X86::VMPSADBWZ256rmikz, 0}, + {X86::VMPSADBWZrrikz, X86::VMPSADBWZrmikz, 0}, {X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0}, {X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0}, {X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0}, @@ -6097,6 +6103,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, TB_NO_REVERSE}, {X86::VMINSHZrr_Intk, X86::VMINSHZrm_Intk, TB_NO_REVERSE}, {X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, TB_NO_REVERSE}, + {X86::VMPSADBWZ128rrik, X86::VMPSADBWZ128rmik, 0}, + {X86::VMPSADBWZ256rrik, X86::VMPSADBWZ256rmik, 0}, + {X86::VMPSADBWZrrik, X86::VMPSADBWZrmik, 0}, {X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0}, {X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0}, {X86::VMULPDZrrk, X86::VMULPDZrmk, 0}, diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 7d28c48055c34..b0acd4ea4224a 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -575,6 +575,31 @@ static inline bool inheritsFrom(InstructionContext child, case IC_EVEX_W_NF: case IC_EVEX_W_B_NF: return false; + case IC_EVEX_B_U: + case IC_EVEX_XS_B_U: + case IC_EVEX_XD_B_U: + case IC_EVEX_OPSIZE_B_U: + case IC_EVEX_W_B_U: + case IC_EVEX_W_XS_B_U: + case IC_EVEX_W_XD_B_U: + case IC_EVEX_W_OPSIZE_B_U: + case IC_EVEX_K_B_U: + case IC_EVEX_XS_K_B_U: + case IC_EVEX_XD_K_B_U: + case IC_EVEX_OPSIZE_K_B_U: + case IC_EVEX_W_K_B_U: + case IC_EVEX_W_XS_K_B_U: + case IC_EVEX_W_XD_K_B_U: + case IC_EVEX_W_OPSIZE_K_B_U: + case IC_EVEX_KZ_B_U: + case IC_EVEX_XS_KZ_B_U: + case IC_EVEX_XD_KZ_B_U: + case IC_EVEX_OPSIZE_KZ_B_U: + case IC_EVEX_W_KZ_B_U: + case IC_EVEX_W_XS_KZ_B_U: + case IC_EVEX_W_XD_KZ_B_U: + case IC_EVEX_W_OPSIZE_KZ_B_U: + return false; default: errs() << "Unknown instruction class: " << stringForContext((InstructionContext)parent) << "\n"; @@ -926,7 +951,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { else o << "IC_VEX"; - if ((index & ATTR_EVEX) && (index & ATTR_EVEXL2)) + if ((index & ATTR_EVEXB) && (index & ATTR_EVEXU)) + ; // Ignore ATTR_VEXL and ATTR_EVEXL2 under YMM rounding. + else if ((index & ATTR_EVEX) && (index & ATTR_EVEXL2)) o << "_L2"; else if (index & ATTR_VEXL) o << "_L"; @@ -949,6 +976,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { if (index & ATTR_EVEXB) o << "_B"; + + if ((index & ATTR_EVEXB) && (index & ATTR_EVEXU)) + o << "_U"; } } else if ((index & ATTR_64BIT) && (index & ATTR_REX2)) o << "IC_64BIT_REX2"; diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def index 58f5449f3b27b..f0154b80a80db 100644 --- a/llvm/utils/TableGen/X86ManualInstrMapping.def +++ b/llvm/utils/TableGen/X86ManualInstrMapping.def @@ -77,6 +77,10 @@ ENTRY(VMOVDQU16Z256rr, VMOVDQUYrr) ENTRY(VMOVDQU8Z256mr, VMOVDQUYmr) ENTRY(VMOVDQU8Z256rm, VMOVDQUYrm) ENTRY(VMOVDQU8Z256rr, VMOVDQUYrr) +ENTRY(VMPSADBWZ128rmi, VMPSADBWrmi) +ENTRY(VMPSADBWZ128rri, VMPSADBWrri) +ENTRY(VMPSADBWZ256rmi, VMPSADBWYrmi) +ENTRY(VMPSADBWZ256rri, VMPSADBWYrri) ENTRY(VSHUFF32X4Z256rmi, VPERM2F128rm) ENTRY(VSHUFF32X4Z256rri, VPERM2F128rr) ENTRY(VSHUFF64X2Z256rmi, VPERM2F128rm) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index a2bc037b690c6..6aae57eca89d3 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -126,6 +126,7 @@ RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) { HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z"); HasEVEX_B = Rec->getValueAsBit("hasEVEX_B"); + HasEVEX_U = Rec->getValueAsBit("hasEVEX_U"); HasEVEX_NF = Rec->getValueAsBit("hasEVEX_NF"); HasTwoConditionalOps = Rec->getValueAsBit("hasTwoConditionalOps"); IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); @@ -191,6 +192,8 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables, #define EVEX_NF(n) (HasEVEX_NF ? n##_NF : n) #define EVEX_B_NF(n) (HasEVEX_B ? EVEX_NF(n##_B) : EVEX_NF(n)) #define EVEX_KB_ADSIZE(n) AdSize == X86Local::AdSize32 ? n##_ADSIZE : EVEX_KB(n) +#define EVEX_KB_U(n) \ + (HasEVEX_KZ ? n##_KZ_B_U : (HasEVEX_K ? n##_K_B_U : n##_B_U)) InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; @@ -200,7 +203,28 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n"; llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled"); } - if (HasEVEX_NF) { + if (EncodeRC && HasEVEX_U) { + // EVEX_U + if (HasREX_W) { + if (OpPrefix == X86Local::PD) + insnContext = EVEX_KB_U(IC_EVEX_W_OPSIZE); + else if (OpPrefix == X86Local::XS) + insnContext = EVEX_KB_U(IC_EVEX_W_XS); + else if (OpPrefix == X86Local::XD) + insnContext = EVEX_KB_U(IC_EVEX_W_XD); + else if (OpPrefix == X86Local::PS) + insnContext = EVEX_KB_U(IC_EVEX_W); + } else { + if (OpPrefix == X86Local::PD) + insnContext = EVEX_KB_U(IC_EVEX_OPSIZE); + else if (OpPrefix == X86Local::XS) + insnContext = EVEX_KB_U(IC_EVEX_XS); + else if (OpPrefix == X86Local::XD) + insnContext = EVEX_KB_U(IC_EVEX_XD); + else if (OpPrefix == X86Local::PS) + insnContext = EVEX_KB_U(IC_EVEX); + } + } else if (HasEVEX_NF) { if (OpPrefix == X86Local::PD) insnContext = EVEX_B_NF(IC_EVEX_OPSIZE); else if (HasREX_W) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index 12fb41750cb3f..eb2cee7bbbf87 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -214,6 +214,8 @@ struct RecognizableInstrBase { bool HasEVEX_KZ; /// The hasEVEX_B field from the record bool HasEVEX_B; + /// The hasEVEX_U field from the record + bool HasEVEX_U; /// The hasEVEX_NF field from the record bool HasEVEX_NF; /// The hasTwoConditionalOps field from the record _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits