llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir Author: Freddy Ye (FreddyLeaf) <details> <summary>Changes</summary> Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- Patch is 613.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101600.diff 23 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.def (+47) - (modified) clang/lib/Headers/CMakeLists.txt (+2) - (added) clang/lib/Headers/avx10_2_512convertintrin.h (+304) - (added) clang/lib/Headers/avx10_2convertintrin.h (+560) - (modified) clang/lib/Headers/immintrin.h (+2) - (modified) clang/lib/Sema/SemaX86.cpp (+2) - (added) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+274) - (added) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+530) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+130) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+38) - (modified) llvm/lib/Target/X86/X86ISelLowering.h (+24) - (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+431) - (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+89) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+85) - (added) llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll (+578) - (added) llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll (+1147) - (added) llvm/test/MC/Disassembler/X86/avx10.2convert-32.txt (+1491) - (added) llvm/test/MC/Disassembler/X86/avx10.2convert-64.txt (+1491) - (added) llvm/test/MC/X86/avx10.2convert-32-att.s (+1490) - (added) llvm/test/MC/X86/avx10.2convert-32-intel.s (+1490) - (added) llvm/test/MC/X86/avx10.2convert-64-att.s (+1490) - (added) llvm/test/MC/X86/avx10.2convert-64-intel.s (+1490) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+243) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index c49b5c36da4fc..1deac6247a361 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2158,6 +2158,53 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi", TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256") + +// AVX10.2 CONVERT +TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx128_mask, "V8xV4fV4fV8xUc", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx256_mask, "V16xV8fV8fV16xUsIi", "ncV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx512_mask, "V32xV16fV16fV32xUiIi", "ncV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtnebf8_2ph128_mask, "V8xV16cV8xUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtnebf8_2ph256_mask, "V16xV16cV16xUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtnebf8_2ph512_mask, "V32xV32cV32xUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtnehf8_2ph128_mask, "V8xV16cV8xUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtnehf8_2ph256_mask, "V16xV16cV16xUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtnehf8_2ph512_mask, "V32xV32cV32xUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512") #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index f3d19e38f8f2b..2eb550d07afaa 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -147,8 +147,10 @@ set(x86_files amxcomplexintrin.h amxfp16intrin.h amxintrin.h + avx10_2_512convertintrin.h avx10_2_512minmaxintrin.h avx10_2_512niintrin.h + avx10_2convertintrin.h avx10_2minmaxintrin.h avx10_2niintrin.h avx2intrin.h diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h new file mode 100644 index 0000000000000..c56c917772b32 --- /dev/null +++ b/clang/lib/Headers/avx10_2_512convertintrin.h @@ -0,0 +1,304 @@ +/*===--------- avx10_2_512convertintrin.h - AVX10_2_512CONVERT -------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2_512convertintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512CONVERTINTRIN_H +#define __AVX10_2_512CONVERTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A, + __m512 __B) { + return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask( + (__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)(-1), + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtx2ps_ph(__m512h __W, __mmask32 __U, __m512 __A, __m512 __B) { + return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask( + (__v16sf)__A, (__v16sf)__B, (__v32hf)__W, (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtx_round2ps_ph(A, B, R) \ + ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask( \ + (__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)(-1), (const int)(R))) + +#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask((__v16sf)(A), (__v16sf)(B), \ + (__v32hf)(W), (__mmask32)(U), \ + (const int)(R))) + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiasph_pbf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_pbf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiasph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiassph_pbf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_pbf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiassph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiasph_phf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_phf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiasph_phf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiassph_phf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_phf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiassph_phf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtne2ph_pbf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvtne2ph2bf8_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_pbf8( + __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_pbf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtnes2ph_pbf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvtne2ph2bf8s_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtnes2ph_pbf8( + __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_pbf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtne2ph_phf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvtne2ph2hf8_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_phf8( + __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_phf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtne2ph2hf8s_phf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvtne2ph2hf8s_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph2hf8s_phf8( + __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvtne2ph2hf8s_phf8(__A, __B), + (__v64qi)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_cvtnebf8_ph(__m256i __A) { + return (__m512h)__builtin_ia32_vcvtnebf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtnebf8_ph(__m512h __W, __mmask32 __U, __m256i __A) { + return (__m512h)__builtin_ia32_vcvtnebf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtnebf8_ph(__mmask32 __U, __m256i __A) { + return (__m512h)__builtin_ia32_vcvtnebf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_cvtnehf8_ph(__m256i __A) { + return (__m512h)__builtin_ia32_vcvtnehf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtnehf8_ph(__m512h __W, __mmask32 __U, __m256i __A) { + return (__m512h)__builtin_ia32_vcvtnehf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtnehf8_ph(__mmask32 __U, __m256i __A) { + return (__m512h)__builtin_ia32_vcvtnehf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtneph_pbf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtneph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtneph_pbf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtnesph_pbf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtnesph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtnesph_pbf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtneph_phf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtneph_phf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtneph_phf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtnesph_phf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtnesph_phf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtnesph_phf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) { + return _mm512_castsi512_ph(_mm512_slli_epi16(_mm512_cvtepi8_epi16(__A), 8)); +} + +static __inline __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) { + return _mm512_castsi512_ph( + _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8)); +} + +static __inline __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) { + return _mm512_castsi512_ph( + _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8)); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif // __AVX10_2_512CONVERTINTRIN_H +#endif // __SSE2__ diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h new file mode 100644 index 0000000000000..9d7b6008cb6af --- /dev/null +++ b/clang/lib/Headers/avx10_2convertintrin.h @@ -0,0 +1,560 @@ +/*===---------- avx10_2convertintrin.h - AVX512NECONVERTFP8 ----------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifdef __SSE2__ + +#ifndef __AVX10_2CONVERTINTRIN_H +#define __AVX10_2CONVERTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, + __m128 __B) { + return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( + (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( + (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, + ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/101600 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits