llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Momchil Velikov (momchil-velikov) <details> <summary>Changes</summary> This patch adds the following intrinsics: float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) Supersedes https://github.com/llvm/llvm-project/pull/120273 --- Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123614.diff 37 Files Affected: - (modified) clang/include/clang/AST/Type.h (+5) - (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) - (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) - (modified) clang/include/clang/Basic/arm_neon.td (+52) - (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) - (modified) clang/lib/AST/ASTContext.cpp (+18-12) - (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) - (modified) clang/lib/AST/Type.cpp (+1-3) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) - (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) - (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) - (modified) clang/lib/Sema/SemaARM.cpp (+2) - (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) - (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) - (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) - (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) - (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) - (added) clang/test/Sema/aarch64-fp8-cast.c (+104) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) - (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) - (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) - (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) - (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) - (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) ``````````diff diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 3457d524c63aaa..1d9743520654eb 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isFloat32Type() const; bool isDoubleType() const; bool isBFloat16Type() const; + bool isMFloat8Type() const; bool isFloat128Type() const; bool isIbm128Type() const; bool isRealType() const; // C99 6.2.5p17 (real floating + integer) @@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const { return isSpecificBuiltinType(BuiltinType::BFloat16); } +inline bool Type::isMFloat8Type() const { + return isSpecificBuiltinType(BuiltinType::MFloat8); +} + inline bool Type::isFloat128Type() const { return isSpecificBuiltinType(BuiltinType::Float128); } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 063cac1f4a58ee..a408bb0c54057c 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -57,6 +57,11 @@ // - IsBF true for vector of brain float elements. //===----------------------------------------------------------------------===// +#ifndef SVE_SCALAR_TYPE +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + SVE_TYPE(Name, Id, SingletonId) +#endif + #ifndef SVE_VECTOR_TYPE #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ SVE_TYPE(Name, Id, SingletonId) @@ -72,6 +77,11 @@ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true) #endif +#ifndef SVE_VECTOR_TYPE_MFLOAT +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ + SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, false) +#endif + #ifndef SVE_VECTOR_TYPE_FLOAT #define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false) @@ -97,16 +107,6 @@ SVE_TYPE(Name, Id, SingletonId) #endif -#ifndef AARCH64_VECTOR_TYPE -#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ - SVE_TYPE(Name, Id, SingletonId) -#endif - -#ifndef AARCH64_VECTOR_TYPE_MFLOAT -#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ - AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) -#endif - //===- Vector point types -----------------------------------------------===// SVE_VECTOR_TYPE_INT("__SVInt8_t", "__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, 1, true) @@ -125,8 +125,7 @@ SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1) -// This is a 8 bits opaque type. -SVE_VECTOR_TYPE_INT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1, false) +SVE_VECTOR_TYPE_MFLOAT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1) // // x2 @@ -148,7 +147,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2) -SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false) +SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2) // // x3 @@ -170,7 +169,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3) -SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false) +SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3) // // x4 @@ -192,7 +191,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4) -SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false) +SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4) SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1) SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2) @@ -200,17 +199,15 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy) -AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1) -AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1) -AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1) +SVE_SCALAR_TYPE("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 8) #undef SVE_VECTOR_TYPE +#undef SVE_VECTOR_TYPE_MFLOAT #undef SVE_VECTOR_TYPE_BFLOAT #undef SVE_VECTOR_TYPE_FLOAT #undef SVE_VECTOR_TYPE_INT #undef SVE_PREDICATE_TYPE #undef SVE_PREDICATE_TYPE_ALL #undef SVE_OPAQUE_TYPE -#undef AARCH64_VECTOR_TYPE_MFLOAT -#undef AARCH64_VECTOR_TYPE +#undef SVE_SCALAR_TYPE #undef SVE_TYPE diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 4dc8b24ed8ae6c..83ef015018f1a1 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -208,7 +208,8 @@ namespace clang { Float16, Float32, Float64, - BFloat16 + BFloat16, + MFloat8 }; NeonTypeFlags(unsigned F) : Flags(F) {} @@ -230,6 +231,7 @@ namespace clang { switch (getEltType()) { case Int8: case Poly8: + case MFloat8: return 8; case Int16: case Float16: diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index ef89fa4358dfeb..e0e342a76a9cd7 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -2125,6 +2125,58 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in { } } +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in { + def VBF1CVT_BF16_MF8 : VInst<"vcvt1_bf16_mf8_fpm", "(QB).V", "m">; + def VBF1CVT_LOW_BF16_MF8 : VInst<"vcvt1_low_bf16_mf8_fpm", "B.V", "Hm">; + def VBF2CVTL_BF16_MF8 : VInst<"vcvt2_bf16_mf8_fpm", "(QB).V", "m">; + def VBF2CVTL_LOW_BF16_MF8 : VInst<"vcvt2_low_bf16_mf8_fpm", "B.V", "Hm">; + def VBF1CVTL2_HIGH_BF16_MF8 : VInst<"vcvt1_high_bf16_mf8_fpm", "B.V", "Hm">; + def VBF2CVTL2_HIGH_BF16_MF8 : VInst<"vcvt2_high_bf16_mf8_fpm", "B.V", "Hm">; +} + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in { + def VF1CVT_F16_MF8 : VInst<"vcvt1_f16_mf8_fpm", "(>QF).V", "m">; + def VF1CVT_LOW_F16_MF8 : VInst<"vcvt1_low_f16_mf8_fpm", "(>F).V", "Hm">; + def VF2CVTL_F16_MF8 : VInst<"vcvt2_f16_mf8_fpm", "(>QF).V", "m">; + def VF2CVTL_LOW_F16_MF8 : VInst<"vcvt2_low_f16_mf8_fpm", "(>F).V", "Hm">; + def VF1CVTL2_HIGH_F16_MF8 : VInst<"vcvt1_high_f16_mf8_fpm", "(>F).V", "Hm">; + def VF2CVTL2_HIGH_F16_MF8 : VInst<"vcvt2_high_f16_mf8_fpm", "(>F).V", "Hm">; + + def VCVTN_LOW_F8_F32 : VInst<"vcvt_mf8_f32_fpm", ".(>>QF)(>>QF)V", "m">; + def VCVTN_HIGH_F8_F32 : VInst<"vcvt_high_mf8_f32_fpm", ".(q)(>>F)(>>F)V", "Hm">; + def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "mQm">; +} + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in { + def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "mQm">; + + def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>; + def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", "m", [ImmCheck<3, ImmCheck0_7, 0>]>; + + def VDOTQ_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>; + def VDOTQ_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_7, 0>]>; +} + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in { + def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "mQm">; + + def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV", "m", [ImmCheck<3, ImmCheck0_1, 0>]>; + def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>; + + def VDOTQ_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_1, 0>]>; + def VDOTQ_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>; +} + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8fma,neon" in { + def VMLALB_F16_F8 : VInst<"vmlalb_f16_mf8_fpm", "(>F)(>F)..V", "Qm">; + def VMLALT_F16_F8 : VInst<"vmlalt_f16_mf8_fpm", "(>F)(>F)..V", "Qm">; + + def VMLALLBB_F32_F8 : VInst<"vmlallbb_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">; + def VMLALLBT_F32_F8 : VInst<"vmlallbt_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">; + def VMLALLTB_F32_F8 : VInst<"vmlalltb_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">; + def VMLALLTT_F32_F8 : VInst<"vmlalltt_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">; +} + let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in { def FAMIN : WInst<"vamin", "...", "fhQdQfQh">; def FAMAX : WInst<"vamax", "...", "fhQdQfQh">; diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index fd800e5a6278e4..b9b9d509c22512 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -243,6 +243,7 @@ def OP_UNAVAILABLE : Operation { // B: change to BFloat16 // P: change to polynomial category. // p: change polynomial to equivalent integer category. Otherwise nop. +// V: change to fpm_t // // >: double element width (vector size unchanged). // <: half element width (vector size unchanged). @@ -301,6 +302,7 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{ class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} +class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {} // The following instruction classes are implemented via operators // instead of builtins. As such these declarations are only used for diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 155dbcfcaeed31..51764095a68980 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2269,11 +2269,10 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Width = 0; \ Align = 16; \ break; -#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ - ElBits, NF) \ +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ case BuiltinType::Id: \ - Width = NumEls * ElBits * NF; \ - Align = NumEls * ElBits; \ + Width = Bits; \ + Align = Bits; \ break; #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ @@ -4395,15 +4394,14 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const { ElBits, NF) \ case BuiltinType::Id: \ return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF}; +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ + ElBits, NF) \ + case BuiltinType::Id: \ + return {MFloat8Ty, llvm::ElementCount::getScalable(NumEls), NF}; #define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \ case BuiltinType::Id: \ return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF}; -#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ - ElBits, NF) \ - case BuiltinType::Id: \ - return {getIntTypeForBitwidth(ElBits, false), \ - llvm::ElementCount::getFixed(NumEls), NF}; -#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" #define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \ @@ -4465,11 +4463,16 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts, EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) { \ return SingletonId; \ } +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ + ElBits, NF) \ + if (EltTy->isMFloat8Type() && EltTySize == ElBits && \ + NumElts == (NumEls * NF) && NumFields == 1) { \ + return SingletonId; \ + } #define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \ if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1) \ return SingletonId; -#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) -#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" } else if (Target->hasRISCVVTypes()) { uint64_t EltTySize = getTypeSize(EltTy); @@ -12354,6 +12357,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, case 'p': Type = Context.getProcessIDType(); break; + case 'm': + Type = Context.MFloat8Ty; + break; } // If there are modifiers and if we're allowed to parse them, go for it. diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 1dd936cf4fb518..49089c0ea3c8ac 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3433,7 +3433,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { type_name = MangledName; \ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ break; -#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ case BuiltinType::Id: \ type_name = MangledName; \ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ @@ -3919,6 +3919,9 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) { case BuiltinType::Float: EltName = "float32_t"; break; case BuiltinType::Half: EltName = "float16_t"; break; case BuiltinType::BFloat16: EltName = "bfloat16_t"; break; + case BuiltinType::MFloat8: + EltName = "mfloat8_t"; + break; default: llvm_unreachable("unexpected Neon vector element type"); } @@ -3972,6 +3975,8 @@ static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) { return "Float64"; case BuiltinType::BFloat16: return "Bfloat16"; + case BuiltinType::MFloat8: + return "Mfloat8"; default: llvm_unreachable("Unexpected vector element base type"); } diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index caa0ac858a1bea..fde0746a175705 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2527,9 +2527,7 @@ bool Type::isSVESizelessBuiltinType() const { #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \ case BuiltinType::Id: \ return true; -#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ - case BuiltinType::Id: \ - return false; +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" default: return false; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b80833fd91884d..74b28cc23b7dd8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6819,6 +6819,7 @@ static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF, switch (TypeFlags.getEltType()) { case NeonTypeFlags::Int8: case NeonTypeFlags::Poly8: + case NeonTypeFlags::MFloat8: return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); case NeonTypeFlags::Int16: case NeonTypeFlags::Poly16: @@ -6898,12 +6899,53 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, return Builder.CreateCall(F, Ops, name); } +Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID, + ArrayRef<llvm::Type *> Tys, + SmallVectorImpl<Value *> &Ops, + const CallExpr *E, const char *name) { + llvm::Value *FPM = + EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E); + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM); + return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name); +} + +llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall( + unsigned IID, bool ExtendLane, llvm::Type *RetTy, + SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) { + + const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() / + RetTy->getPrimitiveSizeInBits(); + llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount), + Ops[1]->getType()}; + if (ExtendLane) { + auto *VT = llvm::FixedVectorType::get(Int8Ty, 16); + Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2], + Builder.getInt64(0)); + } + return EmitFP8NeonCall(IID, Tys, Ops, E, name); +} + Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, bool neg) { int SV = cast<ConstantInt>(V)->getSExtValue(); return ConstantInt::get(Ty, neg ? -SV : SV); } +Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, + llvm::Type *Ty1, bool Extract, + SmallVectorImpl<llvm::Value *> &Ops, + const CallExpr *E, + const char *name) { + llvm::Type *Tys[] = {Ty0, Ty1}; + if (Extract) { + // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of + // the vector. + Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8); + Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0)); + } + return EmitFP8NeonCall(IID, Tys, Ops, E, name); +} + // Right-shift a vector by a constant. Value *CodeGenFu... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/123614 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits