llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Freddy Ye (FreddyLeaf) <details> <summary>Changes</summary> Cont. patch after https://github.com/llvm/llvm-project/pull/75580 --- Patch is 163.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92883.diff 47 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.def (-21) - (modified) clang/include/clang/Driver/Options.td (-6) - (modified) clang/lib/Basic/Targets/X86.cpp (-21) - (modified) clang/lib/Basic/Targets/X86.h (-3) - (modified) clang/lib/Headers/CMakeLists.txt (-2) - (removed) clang/lib/Headers/avx512erintrin.h (-271) - (removed) clang/lib/Headers/avx512pfintrin.h (-92) - (modified) clang/lib/Headers/immintrin.h (-8) - (modified) clang/lib/Sema/SemaChecking.cpp (-30) - (removed) clang/test/CodeGen/X86/avx512er-builtins.c (-347) - (removed) clang/test/CodeGen/X86/avx512pf-builtins.c (-100) - (modified) clang/test/CodeGen/attr-cpuspecific.c (+5-5) - (modified) clang/test/CodeGen/attr-target-x86.c (+2-2) - (modified) clang/test/CodeGen/function-target-features.c (+2-2) - (modified) clang/test/CodeGen/target-builtin-noerror.c (-2) - (modified) clang/test/Driver/cl-x86-flags.c (+2-8) - (modified) clang/test/Driver/x86-target-features.c (+4-9) - (modified) clang/test/Frontend/x86-target-cpu.c (+2-8) - (modified) clang/test/Preprocessor/predefined-arch-macros.c (-12) - (modified) clang/test/Preprocessor/x86_target_features.c (-50) - (modified) clang/test/Sema/builtins-x86.c (-8) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (-84) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+3-6) - (modified) llvm/lib/Target/X86/X86.td (-12) - (modified) llvm/lib/Target/X86/X86Instr3DNow.td (+1-2) - (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+65-16) - (modified) llvm/lib/Target/X86/X86InstrFragments.td (+1-7) - (modified) llvm/lib/Target/X86/X86InstrPredicates.td (-3) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (-27) - (modified) llvm/lib/Target/X86/X86Subtarget.h (+3-5) - (modified) llvm/lib/TargetParser/Host.cpp (-9) - (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+3-3) - (modified) llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll (-24) - (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll (-24) - (removed) llvm/test/CodeGen/X86/avx512er-intrinsics.ll (-306) - (modified) llvm/test/CodeGen/X86/crc32-target-feature.ll (+2-2) - (modified) llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll (-5) - (modified) llvm/test/CodeGen/X86/prefetch.ll (-17) - (modified) llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll (-22) - (modified) llvm/test/CodeGen/X86/unfoldMemoryOperand.mir (+1-1) - (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (+1-1) - (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+1-1) - (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+1-1) - (modified) llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll (+1-1) - (modified) llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll (+1-1) - (removed) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s (-373) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index eafcc219c1096..7074479786b97 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -832,23 +832,11 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512") -TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512") - TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512") -TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512") - TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512") @@ -960,15 +948,6 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512") - TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7bb781667e926..515f632f503dd 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6111,14 +6111,10 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>; def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>; def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>; def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>; -def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>; -def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>; def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>; def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>; def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>; def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>; -def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>; -def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>; def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>; def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>; def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>; @@ -6209,8 +6205,6 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>; def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>; def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>; def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>; -def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>; -def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>; def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>; def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>; def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b823eaf6ce336..3a30cff917bb4 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -310,15 +310,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX512VNNI = true; } else if (Feature == "+avx512bf16") { HasAVX512BF16 = true; - } else if (Feature == "+avx512er") { - HasAVX512ER = true; - Diags.Report(diag::warn_knl_knm_isa_support_removed); } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; HasLegalHalfType = true; - } else if (Feature == "+avx512pf") { - HasAVX512PF = true; - Diags.Report(diag::warn_knl_knm_isa_support_removed); } else if (Feature == "+avx512dq") { HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { @@ -375,9 +369,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasWBNOINVD = true; } else if (Feature == "+prefetchi") { HasPREFETCHI = true; - } else if (Feature == "+prefetchwt1") { - HasPREFETCHWT1 = true; - Diags.Report(diag::warn_knl_knm_isa_support_removed); } else if (Feature == "+clzero") { HasCLZERO = true; } else if (Feature == "+cldemote") { @@ -840,12 +831,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512BF16) Builder.defineMacro("__AVX512BF16__"); - if (HasAVX512ER) - Builder.defineMacro("__AVX512ER__"); if (HasAVX512FP16) Builder.defineMacro("__AVX512FP16__"); - if (HasAVX512PF) - Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); if (HasAVX512BITALG) @@ -897,8 +884,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__SM4__"); if (HasPREFETCHI) Builder.defineMacro("__PREFETCHI__"); - if (HasPREFETCHWT1) - Builder.defineMacro("__PREFETCHWT1__"); if (HasCLZERO) Builder.defineMacro("__CLZERO__"); if (HasKL) @@ -1084,9 +1069,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512vpopcntdq", true) .Case("avx512vnni", true) .Case("avx512bf16", true) - .Case("avx512er", true) .Case("avx512fp16", true) - .Case("avx512pf", true) .Case("avx512dq", true) .Case("avx512bitalg", true) .Case("avx512bw", true) @@ -1134,7 +1117,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("pku", true) .Case("popcnt", true) .Case("prefetchi", true) - .Case("prefetchwt1", true) .Case("prfchw", true) .Case("ptwrite", true) .Case("raoint", true) @@ -1201,9 +1183,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) .Case("avx512vnni", HasAVX512VNNI) .Case("avx512bf16", HasAVX512BF16) - .Case("avx512er", HasAVX512ER) .Case("avx512fp16", HasAVX512FP16) - .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) @@ -1253,7 +1233,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("pku", HasPKU) .Case("popcnt", HasPOPCNT) .Case("prefetchi", HasPREFETCHI) - .Case("prefetchwt1", HasPREFETCHWT1) .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) .Case("raoint", HasRAOINT) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 6a0a6cb84203d..0633b7e0da96a 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -103,8 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAVX512VNNI = false; bool HasAVX512FP16 = false; bool HasAVX512BF16 = false; - bool HasAVX512ER = false; - bool HasAVX512PF = false; bool HasAVX512DQ = false; bool HasAVX512BITALG = false; bool HasAVX512BW = false; @@ -136,7 +134,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasCLWB = false; bool HasMOVBE = false; bool HasPREFETCHI = false; - bool HasPREFETCHWT1 = false; bool HasRDPID = false; bool HasRDPRU = false; bool HasRetpolineExternalThunk = false; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 5f02c71f6ca51..dbff92b4e59b4 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -153,12 +153,10 @@ set(x86_files avx512bwintrin.h avx512cdintrin.h avx512dqintrin.h - avx512erintrin.h avx512fintrin.h avx512fp16intrin.h avx512ifmaintrin.h avx512ifmavlintrin.h - avx512pfintrin.h avx512vbmi2intrin.h avx512vbmiintrin.h avx512vbmivlintrin.h diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h deleted file mode 100644 index 1c5a2d2d208ff..0000000000000 --- a/clang/lib/Headers/avx512erintrin.h +++ /dev/null @@ -1,271 +0,0 @@ -/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef __AVX512ERINTRIN_H -#define __AVX512ERINTRIN_H - -/* exp2a23 */ -#define _mm512_exp2a23_round_pd(A, R) \ - ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ - ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R))) - -#define _mm512_maskz_exp2a23_round_pd(M, A, R) \ - ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm512_exp2a23_pd(A) \ - _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_exp2a23_pd(S, M, A) \ - _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_exp2a23_pd(M, A) \ - _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_exp2a23_round_ps(A, R) \ - ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R))) - -#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ - ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R))) - -#define _mm512_maskz_exp2a23_round_ps(M, A, R) \ - ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R))) - -#define _mm512_exp2a23_ps(A) \ - _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_exp2a23_ps(S, M, A) \ - _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_exp2a23_ps(M, A) \ - _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) - -/* rsqrt28 */ -#define _mm512_rsqrt28_round_pd(A, R) \ - ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ - ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R))) - -#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ - ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm512_rsqrt28_pd(A) \ - _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rsqrt28_pd(S, M, A) \ - _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rsqrt28_pd(M, A) \ - _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_rsqrt28_round_ps(A, R) \ - ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R))) - -#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ - ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R))) - -#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ - ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R))) - -#define _mm512_rsqrt28_ps(A) \ - _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rsqrt28_ps(S, M, A) \ - _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rsqrt28_ps(M, A) \ - _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rsqrt28_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R))) - -#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R))) - -#define _mm_rsqrt28_ss(A, B) \ - _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_rsqrt28_ss(S, M, A, B) \ - _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_rsqrt28_ss(M, A, B) \ - _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rsqrt28_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R))) - -#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm_rsqrt28_sd(A, B) \ - _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_rsqrt28_sd(S, M, A, B) \ - _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_rsqrt28_sd(M, A, B) \ - _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -/* rcp28 */ -#define _mm512_rcp28_round_pd(A, R) \ - ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm512_mask_rcp28_round_pd(S, M, A, R) \ - ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R))) - -#define _mm512_maskz_rcp28_round_pd(M, A, R) \ - ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm512_rcp28_pd(A) \ - _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rcp28_pd(S, M, A) \ - _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rcp28_pd(M, A) \ - _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_rcp28_round_ps(A, R) \ - ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R))) - -#define _mm512_mask_rcp28_round_ps(S, M, A, R) \ - ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R))) - -#define _mm512_maskz_rcp28_round_ps(M, A, R) \ - ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R))) - -#define _mm512_rcp28_ps(A) \ - _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rcp28_ps(S, M, A) \ - _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rcp28_ps(M, A) \ - _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rcp28_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/92883 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits