https://github.com/SamTebbs33 created https://github.com/llvm/llvm-project/pull/68565
The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. >From f57f952989ee64d419dc51e9ecf9786720ece3ff Mon Sep 17 00:00:00 2001 From: Samuel Tebbs <samuel.te...@arm.com> Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/include/clang/Basic/arm_sme.td | 5 +- clang/lib/CodeGen/CGBuiltin.cpp | 42 +++++++++---- .../aarch64-sme-intrinsics/acle_sme_ldr.c | 26 +++++--- .../aarch64-sme-intrinsics/acle_sme_str.c | 19 +++--- .../aarch64-sme-intrinsics/acle_sme_imm.cpp | 8 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 10 ++-- .../CostModel/ARM/unaligned_double_load.ll | 59 +++++++++++++++++++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 ++++++++--- 9 files changed, 150 insertions(+), 56 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index d014900d719c338..49ef6b6b3fc4359 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -44,10 +44,9 @@ defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0 defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>]>; defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>]>; -def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQi", "", +def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQn", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], - MemEltTyDefault, "aarch64_sme_ldr", - [ImmCheck<2, ImmCheck0_15>]>; + MemEltTyDefault, "aarch64_sme_ldr">; def SVLDR_ZA : MInst<"svldr_za", "vmQ", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d14cf0dccb09982..ca4bf498cab9535 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9606,7 +9606,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, } Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { - llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); return Builder.CreateAdd(Base, CastOffset, "tileslice"); } @@ -9665,18 +9665,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { - if (Ops.size() == 3) { - Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); - llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - llvm::Value *MulVL = Builder.CreateMul( - CntsbCall, - Builder.getInt64(cast<llvm::ConstantInt>(Ops[2])->getZExtValue()), - "mulvl"); - - Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); - Ops[0] = EmitTileslice(Ops[0], Ops[2]); - Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { + // Intrinsics without a vecnum also use this function, so just provide 0 + Ops.push_back(Ops[1]); + Ops[1] = Builder.getInt32(0); + } else { + int Imm = -1; + if (ConstantInt* C = dyn_cast<ConstantInt>(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + + if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); + } else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); + } + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index acddc2ef50a3ddf..df7ff4ca995b544 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -8,7 +8,7 @@ // CHECK-C-LABEL: @test_svldr_vnum_za( // CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPKv( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]]) // CHECK-NEXT: ret void // void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { @@ -18,22 +18,34 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { // CHECK-C-LABEL: @test_svldr_vnum_za_1( // CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() -// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] -// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 -// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], i32 15, ptr [[PTR]]) // CHECK-NEXT: ret void // void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { svldr_vnum_za(slice_base, ptr, 15); } +// CHECK-C-LABEL: @test_svldr_vnum_za_var( +// CHECK-CXX-LABEL: @_Z22test_svldr_vnum_za_varjPKvm( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], [[VNUM]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[SLICE_BASE]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TMP2]], i32 0, ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, uint64_t vnum) { + svldr_vnum_za(slice_base, ptr, vnum); +} + // CHECK-C-LABEL: @test_svldr_za( // CHECK-CXX-LABEL: @_Z13test_svldr_zajPKv( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]]) // CHECK-NEXT: ret void +// void test_svldr_za(uint32_t slice_base, const void *ptr) { svldr_za(slice_base, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c index 2728f9ac0cd12d3..f384bd76899b0fd 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -8,31 +8,28 @@ // CHECK-C-LABEL: @test_svstr_vnum_za( // CHECK-CXX-LABEL: @_Z18test_svstr_vnum_zajPv( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]]) // CHECK-NEXT: ret void // void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { svstr_vnum_za(slice_base, ptr, 0); } -// CHECK-C-LABEL: @test_svstr_vnum_za_1( -// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv( +// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za_1( +// CHECK-CXX-LABEL: define dso_local void @_Z20test_svstr_vnum_za_1jPv( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() -// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] -// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 -// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], i32 15, ptr [[PTR]]) // CHECK-NEXT: ret void // void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { svstr_vnum_za(slice_base, ptr, 15); } -// CHECK-C-LABEL: @test_svstr_za( -// CHECK-CXX-LABEL: @_Z13test_svstr_zajPv( +// CHECK-C-LABEL: define dso_local void @test_svstr_za( +// CHECK-CXX-LABEL: define dso_local void @_Z13test_svstr_zajPv( +// CHECK-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]]) // CHECK-NEXT: ret void // void test_svstr_za(uint32_t slice_base, void *ptr) { diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp index 7475fd53b80ba2b..1faa5638c801c2d 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -143,11 +143,6 @@ void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) { // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, slice, pg, ptr, 1); - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svldr_vnum_za,,,)(-1, ptr, 16); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svstr_vnum_za,,,)(-1, ptr, -1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, slice); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} @@ -171,9 +166,6 @@ void test_constant(uint64_t u64, svbool_t pg, void *ptr) { SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}} - SVE_ACLE_FUNC(svldr_vnum_za,,,)(u64, ptr, u64); // expected-error {{argument to 'svldr_vnum_za' must be a constant integer}} - SVE_ACLE_FUNC(svstr_vnum_za,,,)(u64, ptr, u64); // expected-error {{argument to 'svstr_vnum_za' must be a constant integer}} - SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, 0); // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}} SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}} } diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 557063c8813268e..26827cf6110d497 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2680,9 +2680,9 @@ let TargetPrefix = "aarch64" in { // Spill + fill def int_aarch64_sme_ldr : DefaultAttrsIntrinsic< - [], [llvm_i32_ty, llvm_ptr_ty]>; + [], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<1>>]>; def int_aarch64_sme_str : DefaultAttrsIntrinsic< - [], [llvm_i32_ty, llvm_ptr_ty]>; + [], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<1>>]>; class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index edd24b4a849b547..5b5b6a31705df33 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -794,8 +794,8 @@ multiclass sme_spill<string opcodestr> { (!cast<Instruction>(NAME) MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; // base - def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), - (!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>; + def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm, GPR64sp:$base), + (!cast<Instruction>(NAME) ZA, $idx, $imm, $base, 0)>; } multiclass sme_fill<string opcodestr> { @@ -805,7 +805,7 @@ multiclass sme_fill<string opcodestr> { MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; def NAME # _PSEUDO : Pseudo<(outs), - (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4, + (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4, GPR64sp:$base), []>, Sched<[]> { // Translated to actual instruction in AArch64ISelLowering.cpp @@ -813,8 +813,8 @@ multiclass sme_fill<string opcodestr> { let mayLoad = 1; } // base - def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), - (!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>; + def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm, GPR64sp:$base), + (!cast<Instruction>(NAME # _PSEUDO) $idx, $imm, $base)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll b/llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll new file mode 100644 index 000000000000000..8d457220ea9c5ae --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP + +define float @f(ptr %x) { +; CHECK-NOVEC-LABEL: 'f' +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a.0.copyload = load float, ptr %x, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %a.0.copyload +; +; CHECK-FP-LABEL: 'f' +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a.0.copyload = load float, ptr %x, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %a.0.copyload +; +entry: + %a.0.copyload = load float, ptr %x, align 1 + ret float %a.0.copyload +} + +define float @ff(ptr %x, float %f) { +; CHECK-NOVEC-LABEL: 'ff' +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %f, ptr %x, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef +; +; CHECK-FP-LABEL: 'ff' +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store float %f, ptr %x, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef +; +entry: + store float %f, ptr %x, align 1 + ret float undef +} + +define double @d(ptr %x) { +; CHECK-NOVEC-LABEL: 'd' +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a.0.copyload = load double, ptr %x, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %a.0.copyload +; +; CHECK-FP-LABEL: 'd' +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a.0.copyload = load double, ptr %x, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %a.0.copyload +; +entry: + %a.0.copyload = load double, ptr %x, align 1 + ret double %a.0.copyload +} + +define double @dd(ptr %x, double %f) { +; CHECK-NOVEC-LABEL: 'dd' +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store double %f, ptr %x, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef +; +; CHECK-FP-LABEL: 'dd' +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double %f, ptr %x, align 1 +; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef +; +entry: + store double %f, ptr %x, align 1 + ret double undef +} diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll index c96aca366ed43f2..f5d25a3229a7f82 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -252,10 +252,28 @@ define void @ldr(ptr %ptr) { ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: ldr za[w12, 0], [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr) + call void @llvm.aarch64.sme.ldr(i32 0, i32 0, ptr %ptr) ret void; } +define void @ldr_vnum(i32 %tile_slice, ptr %ptr, i64 %vnum) { +; CHECK-LABEL: ldr_vnum: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w2, w0 +; CHECK-NEXT: madd x8, x8, x2, x1 +; CHECK-NEXT: ldr za[w12, 0], [x8] +; CHECK-NEXT: ret +entry: + %svlb = tail call i64 @llvm.aarch64.sme.cntsb() + %mulvl = mul i64 %svlb, %vnum + %0 = getelementptr i8, ptr %ptr, i64 %mulvl + %1 = trunc i64 %vnum to i32 + %2 = add i32 %1, %tile_slice + tail call void @llvm.aarch64.sme.ldr(i32 %2, i32 0, ptr %0) + ret void +} + define void @ldr_with_off_15(ptr %ptr) { ; CHECK-LABEL: ldr_with_off_15: ; CHECK: // %bb.0: @@ -264,7 +282,7 @@ define void @ldr_with_off_15(ptr %ptr) { ; CHECK-NEXT: ldr za[w12, 0], [x8] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 15 - call void @llvm.aarch64.sme.ldr(i32 15, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 15, i32 0, ptr %base) ret void; } @@ -278,7 +296,7 @@ define void @ldr_with_off_15mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 240 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 15, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 15, i32 0, ptr %base) ret void; } @@ -292,7 +310,7 @@ define void @ldr_with_off_16mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 16, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 16, i32 0, ptr %base) ret void; } @@ -302,13 +320,13 @@ define void @test_ld1_sink_tile0_offset_operand(<vscale x 4 x i1> %pg, ptr %src, ; CHECK-LABEL: test_ld1_sink_tile0_offset_operand: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w12, w1 -; CHECK-NEXT: .LBB14_1: // %for.body +; CHECK-NEXT: .LBB15_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w {za0h.s[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: subs w2, w2, #1 ; CHECK-NEXT: ld1w {za0h.s[w12, 1]}, p0/z, [x0] ; CHECK-NEXT: ld1w {za0h.s[w12, 2]}, p0/z, [x0] -; CHECK-NEXT: b.ne .LBB14_1 +; CHECK-NEXT: b.ne .LBB15_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret entry: @@ -341,5 +359,6 @@ declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1>, ptr, i32, i32) declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1>, ptr, i32, i32) declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1>, ptr, i32, i32) -declare void @llvm.aarch64.sme.ldr(i32, ptr) +declare void @llvm.aarch64.sme.ldr(i32, i32, ptr) declare i64 @llvm.vscale.i64() +declare i64 @llvm.aarch64.sme.cntsb() _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits