llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir Author: Matthew Devereau (MDevereau) <details> <summary>Changes</summary> Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @<!-- -->llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @<!-- -->llvm.aarch64.sme.str.zt(i32, ptr) Patch by: Kerry McLaughlin <kerry.mclaughlin@<!-- -->arm.com> --- Full diff: https://github.com/llvm/llvm-project/pull/71795.diff 11 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+5) - (modified) clang/include/clang/Basic/arm_sve.td (+9) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c (+51) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+7-4) - (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+5-2) - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+21) - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2) - (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+6) - (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+2-2) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+18-5) - (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll (+27) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fe3de56ce3298c5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,8 @@ multiclass ZAAddSub<string n_suffix> { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..f0b3747898d4145 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_ def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; } +// // +// // Spill and fill of ZT0 +// // + +// let TargetGuard = "sme2" in { +// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// } + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000000000000000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sme_draft_spec_subject_to_change.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} ; + + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a42e2c49cb477ba..9164604f7d78cbc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2679,10 +2679,10 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; // Spill + fill - def int_aarch64_sme_ldr : DefaultAttrsIntrinsic< - [], [llvm_i32_ty, llvm_ptr_ty]>; - def int_aarch64_sme_str : DefaultAttrsIntrinsic< - [], [llvm_i32_ty, llvm_ptr_ty]>; + class SME_LDR_STR_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>; + def int_aarch64_sme_ldr : SME_LDR_STR_Intrinsic; + def int_aarch64_sme_str : SME_LDR_STR_Intrinsic; class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -3454,4 +3454,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic; def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic; + def int_aarch64_sme_ldr_zt : SME_LDR_STR_Intrinsic; + def int_aarch64_sme_str_zt : SME_LDR_STR_Intrinsic; + } diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7617dccdeee397f..c011a46cf02ab8b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -326,15 +326,18 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } - template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) { + template <unsigned BaseReg, unsigned Max> bool ImmToTile(SDValue N, SDValue &Imm) { if (auto *CI = dyn_cast<ConstantSDNode>(N)) { uint64_t C = CI->getZExtValue(); + + if (C > Max) + return false; + Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); return true; } return false; } - /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have /// between 1 and 4 elements. If it contains a single element that is returned diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b9d6578ee33f2a0..c6ff3f1ce6a33b6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2746,6 +2746,23 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } +MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsSpill) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB; + if (IsSpill) { + MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STR_TX)); + MIB.addReg(MI.getOperand(0).getReg()); + } + else + MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_TX), + MI.getOperand(0).getReg()); + MIB.add(MI.getOperand(1)); // Base + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, @@ -2862,6 +2879,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB); case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); + case AArch64::LDR_TX_PSEUDO: + return EmitZTSpillFill(MI, BB, /*IsSpill=*/false); + case AArch64::STR_TX_PSEUDO: + return EmitZTSpillFill(MI, BB, /*IsSpill=*/true); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7332a95615a4da5..d96e5977de0f4c7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -608,6 +608,8 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const; + MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB, + bool IsSpill) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock * diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1a730a0b955d3d1..af2181c0791bca8 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { Reserved.set(SubReg); } + if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) { + for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true); + SubReg.isValid(); ++SubReg) + Reserved.set(*SubReg); + } + markSuperRegs(Reserved, AArch64::FPCR); assert(checkAllSuperRegsMarked(Reserved)); diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index bb9464a8d2e1cf2..fcfa5f82a3809c2 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -541,8 +541,8 @@ defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops def ZERO_T : sme2_zero_zt<"zero", 0b0001>; -def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>; -def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>; +defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>; +defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>; def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>; def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 4f40fa538b0c3c7..ee7ca31aea3c91e 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -10,11 +10,12 @@ // //===----------------------------------------------------------------------===// -def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>; -def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>; -def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>; -def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>; -def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>; +def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>; +def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>; +def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>; +def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>; +def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>; +def imm_to_tile_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>; def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>; def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>; @@ -3132,6 +3133,18 @@ class sme2_spill_fill_vector<string mnemonic, bits<8> opc> let mayStore = opc{7}; } + +multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> { + def NAME : sme2_spill_fill_vector<mnemonic, opc>; + def NAME # _PSEUDO + : Pseudo<(outs), (ins MatrixOp:$ZTt, GPR64sp:$base), []>, Sched<[]> { + // Translated to actual instruction in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + } + def : Pat<(op (imm_to_tile_zt untyped:$tile), GPR64sp:$base), + (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>; +} + //===----------------------------------------------------------------------===/// // SME2 move to/from lookup table class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll new file mode 100644 index 000000000000000..30205d86f2fb203 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; LDR + +define void @ldr_zt0(ptr %ptr) { +; CHECK-LABEL: ldr_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr zt0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptr) + ret void; +} + +; STR + +define void @str_zt0(ptr %ptr) { +; CHECK-LABEL: str_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: str zt0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.str.zt(i32 0, ptr %ptr) + ret void; +} + +declare void @llvm.aarch64.sme.ldr.zt(i32, ptr) +declare void @llvm.aarch64.sme.str.zt(i32, ptr) `````````` </details> https://github.com/llvm/llvm-project/pull/71795 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits