================ @@ -0,0 +1,92 @@ +/*===------------- amxfp8intrin.h - AMX intrinsics -*- C++ -*----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <amxfp8intrin.h> directly; include <immintrin.h> instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMXFP8INTRIN_H +#define __AMXFP8INTRIN_H +#ifdef __x86_64__ + +/// These instructions compute dot product of brain-float8 (BF8) or +/// hybrid-float8 (HF8) accumulating into a single precision (FP32). The input +/// elements can be BF8 or HF8. These instructions have three tile operands, one +/// source/dest accumulator operand, and two source operands, \a a and \a b. The +/// \a a and \a b operands can be BF8 or HF8 independently, and the source/dest +/// operand, \a dst is always FP32. +/// +/// \headerfile <immintrin.h> +/// +/// \code +/// void _tile_dpbf8ps (__tile dst, __tile a, __tile b) +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPBF8PS instruction, which is the dot +/// product of a BF8 value (\a a) by a BF8 value (\a b) accumulating into a +/// Single Precision (FP32) source/dest (\a dst). +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_dpbf8ps(dst, a, b) __builtin_ia32_tdpbf8ps((dst), (a), (b)) + +/// \code +/// void _tile_dpbhf8ps (__tile dst, __tile a, __tile b) +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPBHF8PS instruction, which is the dot +/// product of a BF8 value (\a a) by an HF8 value (\a b) accumulating into a +/// Single Precision (FP32) source/dest (\a dst). +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. ---------------- phoebewang wrote:
The current rule of rule of thumb is ``` Intrinsic description Pseudo code if any /// \headerfile <immintrin.h> /// \code /// void _tile_dpbhf8ps (__tile dst, __tile a, __tile b) /// \endcode /// This intrinsic corresponds to the \c TDPBHF8PS instruction. /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. ``` See examples in other files. https://github.com/llvm/llvm-project/pull/113850 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits