https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/118969
* The FP8 scalar type (`__mfp8`) was described as a vector type * The FP8 vector types were described/assumed to have integer element type (the element type ought to be `__mfp8`), * Add support for `m` type specifier (denoting `__mfp8`) in `DecodeTypeFromStr` and create SVE builtin prototypes using the specifier, instead of `int8_t`. >From 7e2d60348850619fb7b0c8a88e92ab103f907d34 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Fri, 6 Dec 2024 11:08:21 +0000 Subject: [PATCH 1/3] Handle scalable store size in MemCpyOptimizer The compiler crashes with an ICE when it tries to create a `memset` with scalable size. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 +- .../CodeGen/AArch64/memset-scalable-size.ll | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/memset-scalable-size.ll diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0cba5d077da62b..fc5f6ff2b7f377 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -800,8 +800,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // in subsequent passes. auto *T = V->getType(); if (T->isAggregateType()) { - uint64_t Size = DL.getTypeStoreSize(T); IRBuilder<> Builder(SI); + Value *Size = + Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T)); auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, SI->getAlign()); M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); diff --git a/llvm/test/CodeGen/AArch64/memset-scalable-size.ll b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll new file mode 100644 index 00000000000000..8ea6330f235a69 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=memcpyopt < %s | FileCheck %s +target triple = "aarch64-unknown-linux" + +define void @f0() { +; CHECK-LABEL: define void @f0() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @g(ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %p = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>}, align 2 + store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr %p, align 2 + call void @g(ptr %p) + ret void +} + +define void @f1() { +; CHECK-LABEL: define void @f1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 48 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @g(ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %p = alloca {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16 + store {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr %p, align 16 + call void @g(ptr %p) + ret void +} + +define void @f2() { +; CHECK-LABEL: define void @f2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 192 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @g(ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %p = alloca {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16 + store {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> } zeroinitializer, ptr %p, align 16 + call void @g(ptr %p) + ret void +} + +declare void @g(ptr) >From 83331bbf9d083ec8cba96acc32114ed1518e91f7 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Fri, 6 Dec 2024 10:47:43 +0000 Subject: [PATCH 2/3] Fix SVE tuples --- clang/lib/CodeGen/Targets/AArch64.cpp | 68 +++++++---- .../test/CodeGen/AArch64/pure-scalable-args.c | 19 ++++ .../CodeGenCXX/aarch64-mangle-sve-vectors.cpp | 106 ++++++++---------- 3 files changed, 111 insertions(+), 82 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index be33e26f047841..ad7f405cc72550 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -52,6 +52,7 @@ class AArch64ABIInfo : public ABIInfo { bool isIllegalVectorType(QualType Ty) const; + bool passAsAggregateType(QualType Ty) const; bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP, SmallVectorImpl<llvm::Type *> &CoerceToSeq) const; @@ -337,6 +338,10 @@ ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate( NSRN += NVec; NPRN += NPred; + // Handle SVE vector tuples. + if (Ty->isSVESizelessBuiltinType()) + return ABIArgInfo::getDirect(); + llvm::Type *UnpaddedCoerceToType = UnpaddedCoerceToSeq.size() == 1 ? UnpaddedCoerceToSeq[0] @@ -362,7 +367,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty, NSRN, NPRN); - if (!isAggregateTypeForABI(Ty)) { + if (!passAsAggregateType(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); @@ -417,7 +422,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, // elsewhere for GNU compatibility. uint64_t Size = getContext().getTypeSize(Ty); bool IsEmpty = isEmptyRecord(getContext(), Ty, true); - if (IsEmpty || Size == 0) { + if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); @@ -504,7 +509,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); - if (!isAggregateTypeForABI(RetTy)) { + if (!passAsAggregateType(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); @@ -519,7 +524,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, } uint64_t Size = getContext().getTypeSize(RetTy); - if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) + if (!RetTy->isSVESizelessBuiltinType() && + (isEmptyRecord(getContext(), RetTy, true) || Size == 0)) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; @@ -654,6 +660,15 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() return true; } +bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const { + if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) { + const auto *BT = Ty->getAs<BuiltinType>(); + return !BT->isSVECount() && + getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1; + } + return isAggregateTypeForABI(Ty); +} + // Check if a type needs to be passed in registers as a Pure Scalable Type (as // defined by AAPCS64). Return the number of data vectors and the number of // predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon @@ -719,37 +734,38 @@ bool AArch64ABIInfo::passAsPureScalableType( return true; } - const auto *VT = Ty->getAs<VectorType>(); - if (!VT) - return false; + if (const auto *VT = Ty->getAs<VectorType>()) { + if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { + ++NPred; + if (CoerceToSeq.size() + 1 > 12) + return false; + CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); + return true; + } - if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { - ++NPred; - if (CoerceToSeq.size() + 1 > 12) - return false; - CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); - return true; - } + if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { + ++NVec; + if (CoerceToSeq.size() + 1 > 12) + return false; + CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); + return true; + } - if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { - ++NVec; - if (CoerceToSeq.size() + 1 > 12) - return false; - CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); - return true; + return false; } - if (!VT->isBuiltinType()) + if (!Ty->isBuiltinType()) return false; - switch (cast<BuiltinType>(VT)->getKind()) { + bool isPredicate; + switch (Ty->getAs<BuiltinType>()->getKind()) { #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ case BuiltinType::Id: \ - ++NVec; \ + isPredicate = false; \ break; #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \ case BuiltinType::Id: \ - ++NPred; \ + isPredicate = true; \ break; #define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" @@ -761,6 +777,10 @@ bool AArch64ABIInfo::passAsPureScalableType( getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty)); assert(Info.NumVectors > 0 && Info.NumVectors <= 4 && "Expected 1, 2, 3 or 4 vectors!"); + if (isPredicate) + NPred += Info.NumVectors; + else + NVec += Info.NumVectors; auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType), Info.EC.getKnownMinValue()); diff --git a/clang/test/CodeGen/AArch64/pure-scalable-args.c b/clang/test/CodeGen/AArch64/pure-scalable-args.c index f40c944335e4a4..e1dbf5f48ce0ce 100644 --- a/clang/test/CodeGen/AArch64/pure-scalable-args.c +++ b/clang/test/CodeGen/AArch64/pure-scalable-args.c @@ -459,3 +459,22 @@ void test_va_arg(int n, ...) { // CHECK-DARWIN-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap) // CHECK-DARWIN-NEXT: ret void // CHECK-DARWIN-NEXT: } + +// Regression test for incorrect passing of SVE vector tuples +// The whole `y` need to be passed indirectly. +void test_tuple_reg_count(svfloat32_t x, svfloat32x2_t y) { + void test_tuple_reg_count_callee(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t, + svfloat32_t, svfloat32_t, svfloat32_t, svfloat32x2_t); + test_tuple_reg_count_callee(x, x, x, x, x, x, x, y); +} +// CHECK-AAPCS: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, ptr noundef) +// CHECK-DARWIN: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) + +// Regression test for incorrect passing of SVE vector tuples +// The whole `y` need to be passed indirectly. +void test_tuple_reg_count_bool(svboolx4_t x, svboolx4_t y) { + void test_tuple_reg_count_bool_callee(svboolx4_t, svboolx4_t); + test_tuple_reg_count_bool_callee(x, y); +} +// CHECK-AAPCS: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, ptr noundef) +// CHECK-DARWIN: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>) diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp index 9f481e1f0f0857..152be26948f281 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp @@ -141,13 +141,13 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t); // CHECK-NEXT: [[COERCE72:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 // CHECK-NEXT: [[COERCE73:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 // CHECK-NEXT: [[COERCE74:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 -// CHECK-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 +// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 +// CHECK-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // CHECK-NEXT: [[COERCE76:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 -// CHECK-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// CHECK-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // CHECK-NEXT: [[COERCE78:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 -// CHECK-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// CHECK-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // CHECK-NEXT: [[COERCE80:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 -// CHECK-NEXT: [[COERCE81:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // CHECK-NEXT: call void @_Z1fu10__SVInt8_tS_(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer) @@ -575,46 +575,41 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t); // CHECK-NEXT: [[COERCE74_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 1 // CHECK-NEXT: [[COERCE74_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 2 // CHECK-NEXT: [[COERCE74_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 3 -// CHECK-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[COERCE75]], align 2 -// CHECK-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[COERCE75]], align 2 -// CHECK-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 0 -// CHECK-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 1 -// CHECK-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 2 -// CHECK-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 3 -// CHECK-NEXT: call void @_Z1f10svboolx4_tS_(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], <vscale x 16 x i1> [[COERCE75_EXTRACT0]], <vscale x 16 x i1> [[COERCE75_EXTRACT1]], <vscale x 16 x i1> [[COERCE75_EXTRACT2]], <vscale x 16 x i1> [[COERCE75_EXTRACT3]]) +// CHECK-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[BYVAL_TEMP]], align 2 +// CHECK-NEXT: call void @_Z1f10svboolx4_tS_(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], ptr noundef [[BYVAL_TEMP]]) +// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE75]], align 16 +// CHECK-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE75]], align 16 +// CHECK-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 0 +// CHECK-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 1 // CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE76]], align 16 // CHECK-NEXT: [[COERCE76_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE76]], align 16 // CHECK-NEXT: [[COERCE76_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 0 // CHECK-NEXT: [[COERCE76_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 1 -// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16 -// CHECK-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16 -// CHECK-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0 -// CHECK-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1 -// CHECK-NEXT: call void @_Z1f13svmfloat8x2_tS_(<vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]]) +// CHECK-NEXT: call void @_Z1f13svmfloat8x2_tS_(<vscale x 16 x i8> [[COERCE75_EXTRACT0]], <vscale x 16 x i8> [[COERCE75_EXTRACT1]], <vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]]) +// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16 +// CHECK-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16 +// CHECK-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0 +// CHECK-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1 +// CHECK-NEXT: [[COERCE77_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 2 // CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE78]], align 16 // CHECK-NEXT: [[COERCE78_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE78]], align 16 // CHECK-NEXT: [[COERCE78_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 0 // CHECK-NEXT: [[COERCE78_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 1 // CHECK-NEXT: [[COERCE78_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 2 -// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16 -// CHECK-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16 -// CHECK-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0 -// CHECK-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1 -// CHECK-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2 -// CHECK-NEXT: call void @_Z1f13svmfloat8x3_tS_(<vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]]) +// CHECK-NEXT: call void @_Z1f13svmfloat8x3_tS_(<vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT2]], <vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]]) +// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16 +// CHECK-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16 +// CHECK-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0 +// CHECK-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1 +// CHECK-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2 +// CHECK-NEXT: [[COERCE79_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 3 // CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE80]], align 16 // CHECK-NEXT: [[COERCE80_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE80]], align 16 // CHECK-NEXT: [[COERCE80_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 0 // CHECK-NEXT: [[COERCE80_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 1 // CHECK-NEXT: [[COERCE80_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 2 // CHECK-NEXT: [[COERCE80_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 3 -// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE81]], align 16 -// CHECK-NEXT: [[COERCE81_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE81]], align 16 -// CHECK-NEXT: [[COERCE81_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 0 -// CHECK-NEXT: [[COERCE81_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 1 -// CHECK-NEXT: [[COERCE81_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 2 -// CHECK-NEXT: [[COERCE81_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 3 -// CHECK-NEXT: call void @_Z1f13svmfloat8x4_tS_(<vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]], <vscale x 16 x i8> [[COERCE81_EXTRACT0]], <vscale x 16 x i8> [[COERCE81_EXTRACT1]], <vscale x 16 x i8> [[COERCE81_EXTRACT2]], <vscale x 16 x i8> [[COERCE81_EXTRACT3]]) +// CHECK-NEXT: call void @_Z1f13svmfloat8x4_tS_(<vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT3]], <vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]]) // CHECK-NEXT: ret void // // COMPAT_17-LABEL: define dso_local void @_Z3foov( @@ -695,13 +690,13 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t); // COMPAT_17-NEXT: [[COERCE72:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 // COMPAT_17-NEXT: [[COERCE73:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 // COMPAT_17-NEXT: [[COERCE74:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 -// COMPAT_17-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 +// COMPAT_17-NEXT: [[BYVAL_TEMP:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2 +// COMPAT_17-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // COMPAT_17-NEXT: [[COERCE76:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 -// COMPAT_17-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// COMPAT_17-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // COMPAT_17-NEXT: [[COERCE78:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 -// COMPAT_17-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// COMPAT_17-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // COMPAT_17-NEXT: [[COERCE80:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 -// COMPAT_17-NEXT: [[COERCE81:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 // COMPAT_17-NEXT: call void @_Z1fu10__SVInt8_tu10__SVInt8_t(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer) @@ -1129,46 +1124,41 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t); // COMPAT_17-NEXT: [[COERCE74_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 1 // COMPAT_17-NEXT: [[COERCE74_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 2 // COMPAT_17-NEXT: [[COERCE74_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 3 -// COMPAT_17-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[COERCE75]], align 2 -// COMPAT_17-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[COERCE75]], align 2 -// COMPAT_17-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 0 -// COMPAT_17-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 1 -// COMPAT_17-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 2 -// COMPAT_17-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 3 -// COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], <vscale x 16 x i1> [[COERCE75_EXTRACT0]], <vscale x 16 x i1> [[COERCE75_EXTRACT1]], <vscale x 16 x i1> [[COERCE75_EXTRACT2]], <vscale x 16 x i1> [[COERCE75_EXTRACT3]]) +// COMPAT_17-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[BYVAL_TEMP]], align 2 +// COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], ptr noundef [[BYVAL_TEMP]]) +// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE75]], align 16 +// COMPAT_17-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE75]], align 16 +// COMPAT_17-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 1 // COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE76]], align 16 // COMPAT_17-NEXT: [[COERCE76_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE76]], align 16 // COMPAT_17-NEXT: [[COERCE76_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 0 // COMPAT_17-NEXT: [[COERCE76_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 1 -// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16 -// COMPAT_17-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16 -// COMPAT_17-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0 -// COMPAT_17-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1 -// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x2_t13svmfloat8x2_t(<vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]]) +// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x2_t13svmfloat8x2_t(<vscale x 16 x i8> [[COERCE75_EXTRACT0]], <vscale x 16 x i8> [[COERCE75_EXTRACT1]], <vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]]) +// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16 +// COMPAT_17-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16 +// COMPAT_17-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE77_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 2 // COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE78]], align 16 // COMPAT_17-NEXT: [[COERCE78_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE78]], align 16 // COMPAT_17-NEXT: [[COERCE78_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 0 // COMPAT_17-NEXT: [[COERCE78_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 1 // COMPAT_17-NEXT: [[COERCE78_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 2 -// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16 -// COMPAT_17-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16 -// COMPAT_17-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0 -// COMPAT_17-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1 -// COMPAT_17-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2 -// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x3_t13svmfloat8x3_t(<vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]]) +// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x3_t13svmfloat8x3_t(<vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT2]], <vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]]) +// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16 +// COMPAT_17-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 3 // COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE80]], align 16 // COMPAT_17-NEXT: [[COERCE80_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE80]], align 16 // COMPAT_17-NEXT: [[COERCE80_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 0 // COMPAT_17-NEXT: [[COERCE80_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 1 // COMPAT_17-NEXT: [[COERCE80_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 2 // COMPAT_17-NEXT: [[COERCE80_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 3 -// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE81]], align 16 -// COMPAT_17-NEXT: [[COERCE81_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE81]], align 16 -// COMPAT_17-NEXT: [[COERCE81_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 0 -// COMPAT_17-NEXT: [[COERCE81_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 1 -// COMPAT_17-NEXT: [[COERCE81_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 2 -// COMPAT_17-NEXT: [[COERCE81_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 3 -// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x4_t13svmfloat8x4_t(<vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]], <vscale x 16 x i8> [[COERCE81_EXTRACT0]], <vscale x 16 x i8> [[COERCE81_EXTRACT1]], <vscale x 16 x i8> [[COERCE81_EXTRACT2]], <vscale x 16 x i8> [[COERCE81_EXTRACT3]]) +// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x4_t13svmfloat8x4_t(<vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT3]], <vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]]) // COMPAT_17-NEXT: ret void // void foo() { >From 89133b279afb5c556b197eaaa3268a1d50e68b66 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Fri, 6 Dec 2024 13:09:23 +0000 Subject: [PATCH 3/3] [AArch64] Refactor implementation of FP8 types (NFC) * The FP8 scalar type (`__mfp8`) was described as a vector type * The FP8 vector types were described/assumed to have integer element type (the element type ought to be `__mfp8`), * Add support for `m` type specifier (denoting `__mfp8`) in `DecodeTypeFromStr` and create SVE builtin prototypes using the specifier, instead of `int8_t`. --- clang/include/clang/AST/Type.h | 5 +++ .../clang/Basic/AArch64SVEACLETypes.def | 24 +++++++++--- clang/lib/AST/ASTContext.cpp | 37 +++++++++++++++---- clang/lib/AST/ItaniumMangle.cpp | 5 +++ clang/lib/AST/Type.cpp | 4 +- clang/lib/CodeGen/CodeGenTypes.cpp | 13 +++++-- clang/lib/CodeGen/Targets/AArch64.cpp | 7 +++- clang/utils/TableGen/SveEmitter.cpp | 4 +- 8 files changed, 76 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 6fd6c73a516f08..626cfea56a47db 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isFloat32Type() const; bool isDoubleType() const; bool isBFloat16Type() const; + bool isMFloat8Type() const; bool isFloat128Type() const; bool isIbm128Type() const; bool isRealType() const; // C99 6.2.5p17 (real floating + integer) @@ -8532,6 +8533,10 @@ inline bool Type::isBFloat16Type() const { return isSpecificBuiltinType(BuiltinType::BFloat16); } +inline bool Type::isMFloat8Type() const { + return isSpecificBuiltinType(BuiltinType::MFloat8); +} + inline bool Type::isFloat128Type() const { return isSpecificBuiltinType(BuiltinType::Float128); } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 063cac1f4a58ee..6b704b386536c9 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -57,6 +57,11 @@ // - IsBF true for vector of brain float elements. //===----------------------------------------------------------------------===// +#ifndef SVE_SCALAR_TYPE +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + SVE_TYPE(Name, Id, SingletonId) +#endif + #ifndef SVE_VECTOR_TYPE #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ SVE_TYPE(Name, Id, SingletonId) @@ -72,6 +77,11 @@ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true) #endif +#ifndef SVE_VECTOR_TYPE_MFLOAT +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ + SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, false) +#endif + #ifndef SVE_VECTOR_TYPE_FLOAT #define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false) @@ -125,8 +135,7 @@ SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1) -// This is a 8 bits opaque type. -SVE_VECTOR_TYPE_INT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1, false) +SVE_VECTOR_TYPE_MFLOAT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1) // // x2 @@ -148,7 +157,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2) -SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false) +SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2) // // x3 @@ -170,7 +179,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3) -SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false) +SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3) // // x4 @@ -192,7 +201,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4) -SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false) +SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4) SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1) SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2) @@ -200,11 +209,13 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy) -AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1) +SVE_SCALAR_TYPE("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 8) + AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1) AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1) #undef SVE_VECTOR_TYPE +#undef SVE_VECTOR_TYPE_MFLOAT #undef SVE_VECTOR_TYPE_BFLOAT #undef SVE_VECTOR_TYPE_FLOAT #undef SVE_VECTOR_TYPE_INT @@ -213,4 +224,5 @@ AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloa #undef SVE_OPAQUE_TYPE #undef AARCH64_VECTOR_TYPE_MFLOAT #undef AARCH64_VECTOR_TYPE +#undef SVE_SCALAR_TYPE #undef SVE_TYPE diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 80e8c5b9df58e7..f1dbc3b9233929 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2254,6 +2254,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Width = NumEls * ElBits * NF; \ Align = NumEls * ElBits; \ break; +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + case BuiltinType::Id: \ + Width = Bits; \ + Align = Bits; \ + break; #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ @@ -4374,15 +4379,18 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const { ElBits, NF) \ case BuiltinType::Id: \ return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF}; +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ + ElBits, NF) \ + case BuiltinType::Id: \ + return {MFloat8Ty, llvm::ElementCount::getScalable(NumEls), NF}; #define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \ case BuiltinType::Id: \ return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF}; #define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ ElBits, NF) \ case BuiltinType::Id: \ - return {getIntTypeForBitwidth(ElBits, false), \ - llvm::ElementCount::getFixed(NumEls), NF}; -#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) + return {MFloat8Ty, llvm::ElementCount::getFixed(NumEls), NF}; +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" #define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \ @@ -4444,11 +4452,16 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts, EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) { \ return SingletonId; \ } +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \ + ElBits, NF) \ + if (EltTy->isMFloat8Type() && EltTySize == ElBits && \ + NumElts == (NumEls * NF) && NumFields == 1) { \ + return SingletonId; \ + } #define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \ if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1) \ return SingletonId; -#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) -#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" } else if (Target->hasRISCVVTypes()) { uint64_t EltTySize = getTypeSize(EltTy); @@ -12153,8 +12166,15 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, RequiresICE, false); assert(!RequiresICE && "Can't require vector ICE"); - // TODO: No way to make AltiVec vectors in builtins yet. - Type = Context.getVectorType(ElementType, NumElements, VectorKind::Generic); + if (ElementType == Context.MFloat8Ty) { + assert((NumElements == 8 || NumElements == 16) && + "Invalid number of elements"); + Type = NumElements == 8 ? Context.MFloat8x8Ty : Context.MFloat8x16Ty; + } else { + // TODO: No way to make AltiVec vectors in builtins yet. + Type = + Context.getVectorType(ElementType, NumElements, VectorKind::Generic); + } break; } case 'E': { @@ -12210,6 +12230,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, case 'p': Type = Context.getProcessIDType(); break; + case 'm': + Type = Context.MFloat8Ty; + break; } // If there are modifiers and if we're allowed to parse them, go for it. diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 47aa9b40dab845..9404f9fd9b151d 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3438,6 +3438,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { type_name = MangledName; \ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ break; +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + case BuiltinType::Id: \ + type_name = MangledName; \ + Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ + break; #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 976361d07b68bf..dc9df9524457c2 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2527,9 +2527,7 @@ bool Type::isSVESizelessBuiltinType() const { #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \ case BuiltinType::Id: \ return true; -#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ - case BuiltinType::Id: \ - return false; +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" default: return false; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 09191a4901f493..fd3327cf9acd89 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -507,13 +507,15 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::Id: #define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ case BuiltinType::Id: -#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) +#define SVE_TYPE(Name, Id, SingletonId) #include "clang/Basic/AArch64SVEACLETypes.def" { ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty)); - auto VTy = - llvm::VectorType::get(ConvertType(Info.ElementType), Info.EC); + auto *EltTy = Info.ElementType->isMFloat8Type() + ? llvm::Type::getInt8Ty(getLLVMContext()) + : ConvertType(Info.ElementType); + auto *VTy = llvm::VectorType::get(EltTy, Info.EC); switch (Info.NumVectors) { default: llvm_unreachable("Expected 1, 2, 3 or 4 vectors!"); @@ -529,6 +531,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } case BuiltinType::SveCount: return llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); + case BuiltinType::MFloat8: + return llvm::VectorType::get(llvm::Type::getInt8Ty(getLLVMContext()), 1, + false); #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ ResultType = \ @@ -650,6 +655,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { // An ext_vector_type of Bool is really a vector of bits. llvm::Type *IRElemTy = VT->isExtVectorBoolType() ? llvm::Type::getInt1Ty(getLLVMContext()) + : VT->getElementType()->isMFloat8Type() + ? llvm::Type::getInt8Ty(getLLVMContext()) : ConvertType(VT->getElementType()); ResultType = llvm::FixedVectorType::get(IRElemTy, VT->getNumElements()); break; diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index ad7f405cc72550..9e714d7e64df86 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -244,6 +244,7 @@ AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const { case BuiltinType::SChar: case BuiltinType::UChar: + case BuiltinType::MFloat8: return llvm::ScalableVectorType::get( llvm::Type::getInt8Ty(getVMContext()), 16); @@ -781,8 +782,10 @@ bool AArch64ABIInfo::passAsPureScalableType( NPred += Info.NumVectors; else NVec += Info.NumVectors; - auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType), - Info.EC.getKnownMinValue()); + llvm::Type *EltTy = Info.ElementType->isMFloat8Type() + ? llvm::Type::getInt8Ty(getVMContext()) + : CGT.ConvertType(Info.ElementType); + auto *VTy = llvm::ScalableVectorType::get(EltTy, Info.EC.getKnownMinValue()); if (CoerceToSeq.size() + Info.NumVectors > 12) return false; diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 2d9f5c3381018a..735970d946fb45 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -448,7 +448,7 @@ std::string SVEType::builtinBaseType() const { case TypeKind::PredicatePattern: return "i"; case TypeKind::Fpm: - return "Wi"; + return "UWi"; case TypeKind::Predicate: return "b"; case TypeKind::BFloat16: @@ -456,7 +456,7 @@ std::string SVEType::builtinBaseType() const { return "y"; case TypeKind::MFloat8: assert(ElementBitwidth == 8 && "Invalid MFloat8!"); - return "c"; + return "m"; case TypeKind::Float: switch (ElementBitwidth) { case 16: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits