llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Kerry McLaughlin (kmclaughlin-arm) <details> <summary>Changes</summary> Adds the following SME2 builtins: - sv(add|sub) - sv(add|sub)_za32/za64, - sv(add|sub)_write_za32/za64 Other changes in this patch: - CGBuiltin.cpp: The GetAArch64SMEProcessedOperands function is created to avoid duplicating existing code from EmitAArch64SVEBuiltinExpr. - arm_sve.td: The add/sub SME2 builtins which do not operate on ZA have been added to arm_sve.td, matching the corrosponding LLVM IR intrinsic names which start with @<!-- -->llvm.aarch64.sve for this reason. - SveEmitter.cpp: Adds the createCoreHeaderIntrinsics function to remove duplicated code in createHeader & createSMEHeader. Uses a new enum (ACLEKind) to choose either "__builtin_sme_" or "__builtin_sve_" when emitting the intrinsics. See https://github.com/ARM-software/acle/pull/217/files --- Patch is 405.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69725.diff 14 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+35) - (modified) clang/include/clang/Basic/arm_sve.td (+9) - (modified) clang/include/clang/Basic/arm_sve_sme_incl.td (+1-1) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+68-62) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+5) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c (+1226) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c (+418) - (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c (+18-18) - (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c (+178-178) - (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c (+22-22) - (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c (+218-218) - (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c (+26-26) - (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c (+258-258) - (modified) clang/utils/TableGen/SveEmitter.cpp (+43-62) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 8d85327a86b1aaf..b5655afdf419ecf 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -263,3 +263,38 @@ multiclass ZAFPOuterProd<string n_suffix> { defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; + +//////////////////////////////////////////////////////////////////////////////// +// SME2 - ADD, SUB + +multiclass ZAAddSub<string n_suffix> { + let TargetGuard = "sme2" in { + def NAME # _WRITE_SINGLE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x2", "vm2d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_SINGLE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x4", "vm4d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _WRITE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x2", "vm22", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x4", "vm44", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _ZA32_VG1x2_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x2", "vm2", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x4", "vm4", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x4", [IsStreaming, IsSharedZA], []>; + + let TargetGuard = "sme-i16i64" in { + def NAME # _WRITE_SINGLE_ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x2", "vm2d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_SINGLE_ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x4", "vm4d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _WRITE_ZA64_VG1x2_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x2", "vm22", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_ZA64_VG1x4_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x4", "vm44", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>; + } + + let TargetGuard = "sme-f64f64" in { + def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>; + } + } +} + +defm SVADD : ZAAddSub<"add">; +defm SVSUB : ZAAddSub<"sub">; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 8f9bdd18829ff6b..a0b0ba4f2976052 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1893,3 +1893,12 @@ def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>; def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } + +//////////////////////////////////////////////////////////////////////////////// +// SME2 + +let TargetGuard = "sme2" in { +// == ADD (vectors) == + def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; + def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index c3a6dc4e4d44abe..a484ed19efc7e0b 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -256,7 +256,7 @@ class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> { } class Inst<string n, string p, string t, MergeType mt, string i, - list<FlagType> ft, list<ImmCheck> ch, MemEltType met> { + list<FlagType> ft, list<ImmCheck> ch, MemEltType met = MemEltTyDefault> { string Name = n; string Prototype = p; string Types = t; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2b341b8090fad7d..59a31505b8993fa 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9571,22 +9571,17 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, Value *BasePtr = Ops[1]; // Does the store have an offset? - if (Ops.size() > 3) + if (Ops.size() > (2 + N)) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); - Value *Val = Ops.back(); - // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we // need to break up the tuple vector. SmallVector<llvm::Value*, 5> Operands; - unsigned MinElts = VTy->getElementCount().getKnownMinValue(); - for (unsigned I = 0; I < N; ++I) { - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx)); - } + for (unsigned I = Ops.size() - N; I < Ops.size(); ++I) + Operands.push_back(Ops[I]); Operands.append({Predicate, BasePtr}); - Function *F = CGM.getIntrinsic(IntID, { VTy }); + return Builder.CreateCall(F, Operands); } @@ -9893,24 +9888,37 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +void CodeGenFunction::GetAArch64SMEProcessedOperands( + unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, + SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && - BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { - Value *Val = EmitScalarExpr(E->getArg(0)); - return EmitSVEReinterpret(Val, Ty); - } + bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); - llvm::SmallVector<Value *, 4> Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if ((ICEArguments & (1 << i)) == 0) + if (!IsTupleGetOrSet && (ICEArguments & (1 << i)) == 0) { + Value *Arg = EmitScalarExpr(E->getArg(i)); + if (auto *VTy = dyn_cast<ScalableVectorType>(Arg->getType())) { + unsigned MinElts = VTy->getMinNumElements(); + bool IsPred = VTy->getElementType()->isIntegerTy(1); + unsigned N = + (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); + for (unsigned I = 0; I < N; ++I) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); + auto *NewVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts / N); + if (N == 1 && VTy == NewVTy) + Ops.push_back(Arg); + else + Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); + } + } else + Ops.push_back(Arg); + } else if ((ICEArguments & (1 << i)) == 0) Ops.push_back(EmitScalarExpr(E->getArg(i))); else { // If this is required to be a constant, constant fold it so that we know @@ -9927,9 +9935,25 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, } } + return; +} + +Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + llvm::Type *Ty = ConvertType(E->getType()); + if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { + Value *Val = EmitScalarExpr(E->getArg(0)); + return EmitSVEReinterpret(Val, Ty); + } + auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, AArch64SVEIntrinsicsProvenSorted); + + llvm::SmallVector<Value *, 4> Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); + GetAArch64SMEProcessedOperands(BuiltinID, E, Ops, TypeFlags); + if (TypeFlags.isLoad()) return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, TypeFlags.isZExtReturn()); @@ -9943,10 +9967,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherPrefetch()) return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (TypeFlags.isStructLoad()) - return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (TypeFlags.isStructStore()) - return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructLoad()) + return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructStore()) + return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); else if (TypeFlags.isTupleCreate()) @@ -10202,13 +10226,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF)); - Value *V0 = Builder.CreateExtractVector(VTy, Ops[0], - ConstantInt::get(CGM.Int64Ty, 0)); - unsigned MinElts = VTy->getMinNumElements(); - Value *V1 = Builder.CreateExtractVector( - VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); - return Builder.CreateCall(F, {V0, V1, Ops[1]}); + return Builder.CreateCall(F, Ops); } case SVE::BI__builtin_sve_svset_neonq_s8: @@ -10272,29 +10291,13 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - llvm::SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if ((ICEArguments & (1 << i)) == 0) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - std::optional<llvm::APSInt> Result = - E->getArg(i)->getIntegerConstantExpr(getContext()); - assert(Result && "Expected argument to be a constant"); - - // Immediates for SVE llvm intrinsics are always 32bit. We can safely - // truncate because the immediate has been range checked and no valid - // immediate requires more than a handful of bits. - *Result = Result->extOrTrunc(32); - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); - } - } - auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, AArch64SMEIntrinsicsProvenSorted); + + llvm::SmallVector<Value *, 4> Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); + GetAArch64SMEProcessedOperands(BuiltinID, E, Ops, TypeFlags); + if (TypeFlags.isLoad() || TypeFlags.isStore()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) @@ -10307,21 +10310,24 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, BuiltinID == SME::BI__builtin_sme_svldr_za || BuiltinID == SME::BI__builtin_sme_svstr_za) return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (Builtin->LLVMIntrinsic != 0) { - // Predicates must match the main datatype. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) - if (PredTy->getElementType()->isIntegerTy(1)) - Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); - Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, - getSVEOverloadTypes(TypeFlags, Ty, Ops)); - Value *Call = Builder.CreateCall(F, Ops); - return Call; - } + // Should not happen! + if (Builtin->LLVMIntrinsic == 0) + return nullptr; - /// Should not happen - return nullptr; + // Predicates must match the main datatype. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) + if (PredTy->getElementType()->isIntegerTy(1)) + Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); + + Function *F = + TypeFlags.isOverloadNone() + ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) + : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); + Value *Call = Builder.CreateCall(F, Ops); + + return FormSVEBuiltinResult(Call); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e82115e2d706cf1..ed50b051e77f96e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4297,6 +4297,11 @@ class CodeGenFunction : public CodeGenTypeCache { /// the wider vector. This avoids the error when allocating space in llvm /// for struct of scalable vectors if a function returns struct. llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); + + void GetAArch64SMEProcessedOperands(unsigned BuiltinID, const CallExpr *E, + SmallVectorImpl<llvm::Value *> &Ops, + SVETypeFlags TypeFlags); + llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c new file mode 100644 index 000000000000000..18d7c66896d8b84 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -0,0 +1,1226 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sme_draft_spec_subject_to_change.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Single-Multi +// + +// x2 +// CHECK-LABEL: @test_svadd_write_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s32j11svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u32j12svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[ZN:%.*]], i64 0) +// CHECK-NEX... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/69725 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits