llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Tomer Shafir (tomershafir) <details> <summary>Changes</summary> Adds 2 subtarget hooks `canLowerToZeroCycleRegMove` and `canLowerToZeroCycleRegZeroing` to enable query if an instruction can be lowered to a zero cycle instruction. The logic depends on the microarchitecture. This patch also provide an implementation for AArch64 based on `AArch64InstrInfo::copyPhysReg` which supports both physical and virtual registers. It prepares for a register coalescer optimization to prevent rematerialization of moves where the target supports ZCM. --- Full diff: https://github.com/llvm/llvm-project/pull/148428.diff 3 Files Affected: - (modified) llvm/include/llvm/CodeGen/TargetSubtargetInfo.h (+42) - (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+81) - (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+13) ``````````diff diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 45e67d80629cb..c5a7ed19d54dd 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -185,6 +185,48 @@ class LLVM_ABI TargetSubtargetInfo : public MCSubtargetInfo { return false; } + /// Returns true if CopyMI can be lowered to a zero cycle register move. + /// Otherwise, returns false. + /// + /// Lowering to zero cycle register moves depend on the microarchitecture + /// for the specific architectural registers and instructions supported. + /// Thus, currently its applied after register allocation, + /// when `ExpandPostRAPseudos` pass calls `TargetInstrInfo::lowerCopy` + /// which in turn calls `TargetInstrInfo::copyPhysReg`. + /// + /// Subtargets can override this method to classify lowering candidates. + /// Note that this cannot be defined in tablegen because it operates at + /// a higher level. + /// + /// NOTE: Subtargets must maintain consistency between the logic here and + /// on lowering. + virtual bool canLowerToZeroCycleRegMove(const MachineInstr *CopyMI, + const Register &DestReg, + const Register &SrcReg) const { + return false; + } + + /// Returns true if CopyMI can be lowered to a zero cycle register zeroing. + /// Otherwise, returns false. + /// + /// Lowering to zero cycle register zeroing depends on the microarchitecture + /// for the specific architectural registers and instructions supported. + /// Thus, currently it takes place after register allocation, + /// when `ExpandPostRAPseudos` pass calls `TargetInstrInfo::lowerCopy` + /// which in turn calls `TargetInstrInfo::copyPhysReg`. + /// + /// Subtargets can override this method to classify lowering candidates. + /// Note that this cannot be defined in tablegen because it operates at + /// a higher level. + /// + /// NOTE: Subtargets must maintain consistency between the logic here and + /// on lowering. + virtual bool canLowerToZeroCycleRegZeroing(const MachineInstr *CopyMI, + const Register &DestReg, + const Register &SrcReg) const { + return false; + } + /// True if the subtarget should run MachineScheduler after aggressive /// coalescing. /// diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 0956823346795..d87f91d0fcc91 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -673,3 +673,84 @@ bool AArch64Subtarget::isX16X17Safer() const { bool AArch64Subtarget::enableMachinePipeliner() const { return getSchedModel().hasInstrSchedModel(); } + +bool AArch64Subtarget::isRegInClass(const MachineInstr *MI, const Register &Reg, + const TargetRegisterClass *TRC) const { + if (Reg.isPhysical()) { + return TRC->contains(Reg); + } else { + const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); + return TRC->hasSubClassEq(MRI.getRegClass(Reg)); + } +} + +/// NOTE: must maintain consistency with `AArch64InstrInfo::copyPhysReg`. +bool AArch64Subtarget::canLowerToZeroCycleRegMove( + const MachineInstr *CopyMI, const Register &DestReg, + const Register &SrcReg) const { + if (isRegInClass(CopyMI, DestReg, &AArch64::GPR32allRegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::GPR32allRegClass) && + DestReg != AArch64::WZR) { + if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP || + SrcReg != AArch64::WZR || !hasZeroCycleZeroingGP()) { + return hasZeroCycleRegMoveGPR64() || hasZeroCycleRegMoveGPR32(); + } + return false; + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::GPR64allRegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::GPR64allRegClass) && + DestReg != AArch64::XZR) { + if (DestReg == AArch64::SP || SrcReg == AArch64::SP || + SrcReg != AArch64::XZR || !hasZeroCycleZeroingGP()) { + return hasZeroCycleRegMoveGPR64(); + } + return false; + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::FPR128RegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::FPR128RegClass)) { + return isNeonAvailable() && hasZeroCycleRegMoveFPR128(); + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::FPR64RegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::FPR64RegClass)) { + return hasZeroCycleRegMoveFPR64(); + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::FPR32RegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::FPR32RegClass)) { + return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64(); + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::FPR16RegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::FPR16RegClass)) { + return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64(); + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::FPR8RegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::FPR8RegClass)) { + return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64(); + } + + return false; +} + +/// NOTE: must maintain consistency with `AArch64InstrInfo::copyPhysReg`. +bool AArch64Subtarget::canLowerToZeroCycleRegZeroing( + const MachineInstr *CopyMI, const Register &DestReg, + const Register &SrcReg) const { + if (isRegInClass(CopyMI, DestReg, &AArch64::GPR32allRegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::GPR32allRegClass) && + DestReg != AArch64::WZR) { + return AArch64::WZR == SrcReg && hasZeroCycleZeroingGP(); + } + + if (isRegInClass(CopyMI, DestReg, &AArch64::GPR64allRegClass) && + isRegInClass(CopyMI, SrcReg, &AArch64::GPR64allRegClass) && + DestReg != AArch64::XZR) { + return AArch64::XZR == SrcReg && hasZeroCycleZeroingGP(); + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 154db3c074f71..638febd1cd3d1 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -120,6 +120,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// Initialize properties based on the selected processor family. void initializeProperties(bool HasMinSize); + /// Returns true if Reg is virtual and is assigned to, + /// or is physcial and is a member of, the TRC register class. + /// Otherwise, returns false. + bool isRegInClass(const MachineInstr *MI, const Register &Reg, + const TargetRegisterClass *TRC) const; + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -163,6 +169,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } + bool canLowerToZeroCycleRegMove(const MachineInstr *CopyMI, + const Register &DestReg, + const Register &SrcReg) const override; + bool canLowerToZeroCycleRegZeroing(const MachineInstr *CopyMI, + const Register &DestReg, + const Register &SrcReg) const override; + /// Returns ARM processor family. /// Avoid this function! CPU specifics should be kept local to this class /// and preferably modeled with SubtargetFeatures or properties in `````````` </details> https://github.com/llvm/llvm-project/pull/148428 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits