void created this revision. void added reviewers: nickdesaulniers, danielkiss, MaskRay. Herald added subscribers: StephenFan, pengfei, hiraditya, kristof.beyls. Herald added a project: All. void requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Support the "-fzero-call-used-regs" option on AArch64. This involves much less specialized code than the X86 version. Most of the checks can be done with TableGen. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D124836 Files: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp llvm/lib/Target/AArch64/AArch64FrameLowering.cpp llvm/lib/Target/AArch64/AArch64FrameLowering.h llvm/lib/Target/AArch64/AArch64RegisterInfo.td llvm/lib/Target/X86/X86RegisterInfo.cpp llvm/test/CodeGen/AArch64/zero-call-used-regs.ll llvm/utils/TableGen/RegisterInfoEmitter.cpp
Index: llvm/utils/TableGen/RegisterInfoEmitter.cpp =================================================================== --- llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1188,6 +1188,8 @@ << "MCRegister) const override;\n" << " bool isFixedRegister(const MachineFunction &, " << "MCRegister) const override;\n" + << " bool isArgumentRegister(const MachineFunction &, " + << "MCRegister) const override;\n" << " /// Devirtualized TargetFrameLowering.\n" << " static const " << TargetName << "FrameLowering *getFrameLowering(\n" << " const MachineFunction &MF);\n" @@ -1662,6 +1664,20 @@ OS << " false;\n"; OS << "}\n\n"; + OS << "bool " << ClassName << "::\n" + << "isArgumentRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "ArgumentRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + OS << "ArrayRef<const char *> " << ClassName << "::getRegMaskNames() const {\n"; if (!CSRSets.empty()) { Index: llvm/test/CodeGen/AArch64/zero-call-used-regs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +@result = dso_local global i32 0, align 4 + +define dso_local i32 @skip(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="skip" { +; CHECK-LABEL: skip: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr-arg" { +; CHECK-LABEL: used_gpr_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_gpr(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr" { +; CHECK-LABEL: used_gpr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-arg" { +; CHECK-LABEL: used_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used" { +; CHECK-LABEL: used: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr-arg" { +; CHECK-LABEL: all_gpr_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_gpr(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr" { +; CHECK-LABEL: all_gpr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x9, #0 +; CHECK-NEXT: mov x10, #0 +; CHECK-NEXT: mov x11, #0 +; CHECK-NEXT: mov x12, #0 +; CHECK-NEXT: mov x13, #0 +; CHECK-NEXT: mov x14, #0 +; CHECK-NEXT: mov x15, #0 +; CHECK-NEXT: mov x16, #0 +; CHECK-NEXT: mov x17, #0 +; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-arg" { +; CHECK-LABEL: all_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: movi q0, #0000000000000000 +; CHECK-NEXT: movi q1, #0000000000000000 +; CHECK-NEXT: movi q2, #0000000000000000 +; CHECK-NEXT: movi q3, #0000000000000000 +; CHECK-NEXT: movi q4, #0000000000000000 +; CHECK-NEXT: movi q5, #0000000000000000 +; CHECK-NEXT: movi q6, #0000000000000000 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all" { +; CHECK-LABEL: all: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x9, #0 +; CHECK-NEXT: mov x10, #0 +; CHECK-NEXT: mov x11, #0 +; CHECK-NEXT: mov x12, #0 +; CHECK-NEXT: mov x13, #0 +; CHECK-NEXT: mov x14, #0 +; CHECK-NEXT: mov x15, #0 +; CHECK-NEXT: mov x16, #0 +; CHECK-NEXT: mov x17, #0 +; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: movi q0, #0000000000000000 +; CHECK-NEXT: movi q1, #0000000000000000 +; CHECK-NEXT: movi q2, #0000000000000000 +; CHECK-NEXT: movi q3, #0000000000000000 +; CHECK-NEXT: movi q4, #0000000000000000 +; CHECK-NEXT: movi q5, #0000000000000000 +; CHECK-NEXT: movi q6, #0000000000000000 +; CHECK-NEXT: movi q7, #0000000000000000 +; CHECK-NEXT: movi q8, #0000000000000000 +; CHECK-NEXT: movi q9, #0000000000000000 +; CHECK-NEXT: movi q10, #0000000000000000 +; CHECK-NEXT: movi q11, #0000000000000000 +; CHECK-NEXT: movi q12, #0000000000000000 +; CHECK-NEXT: movi q13, #0000000000000000 +; CHECK-NEXT: movi q14, #0000000000000000 +; CHECK-NEXT: movi q15, #0000000000000000 +; CHECK-NEXT: movi q16, #0000000000000000 +; CHECK-NEXT: movi q17, #0000000000000000 +; CHECK-NEXT: movi q18, #0000000000000000 +; CHECK-NEXT: movi q19, #0000000000000000 +; CHECK-NEXT: movi q20, #0000000000000000 +; CHECK-NEXT: movi q21, #0000000000000000 +; CHECK-NEXT: movi q22, #0000000000000000 +; CHECK-NEXT: movi q23, #0000000000000000 +; CHECK-NEXT: movi q24, #0000000000000000 +; CHECK-NEXT: movi q25, #0000000000000000 +; CHECK-NEXT: movi q26, #0000000000000000 +; CHECK-NEXT: movi q27, #0000000000000000 +; CHECK-NEXT: movi q28, #0000000000000000 +; CHECK-NEXT: movi q29, #0000000000000000 +; CHECK-NEXT: movi q30, #0000000000000000 +; CHECK-NEXT: movi q31, #0000000000000000 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +; Don't emit zeroing registers in "main" function. +define dso_local i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + +entry: + ret i32 0 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } Index: llvm/lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.cpp +++ llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -656,7 +656,7 @@ [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) return true; - return false; + return X86GenRegisterInfo::isArgumentRegister(MF, Reg); } bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1385,3 +1385,33 @@ return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr; }]; } + +//===----------------------------------------------------------------------===// +// Register categories. +// + +def GeneralPurposeRegisters : RegisterCategory<[GPR64, GPR32]>; + +def GPR_ARG : RegisterClass<"AArch64", [i64, i32], 64, + (add (sequence "X%u", 0, 7), + (sequence "W%u", 0, 7))>; +def FPR8_ARG : RegisterClass<"AArch64", [untyped], 8, (trunc FPR8, 7)> { + let Size = 8; +} +def FPR16_ARG : RegisterClass<"AArch64", [f16, bf16], 16, (trunc FPR16, 7)> { + let Size = 16; +} +def FPR32_ARG : RegisterClass<"AArch64", [f32, i32], 32, (trunc FPR32, 7)>; +def FPR64_ARG : RegisterClass<"AArch64", + [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, + v1i64, v4f16, v4bf16], 64, + (trunc FPR64, 7)>; +def FPR128_ARG : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, + f128, v8f16, v8bf16], 128, + (trunc FPR128, 7)>; +def ArgumentRegisters : RegisterCategory<[GPR_ARG, FPR8_ARG, FPR16_ARG, + FPR32_ARG, FPR64_ARG, FPR128_ARG]>; + +def FIXED_REGS : RegisterClass<"AArch64", [i64], 64, (add FP, SP, VG, FFR)>; +def FixedRegisters : RegisterCategory<[CCR, FIXED_REGS]>; Index: llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -153,6 +153,10 @@ MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; + + /// Emit target zero call-used regs. + void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const override; }; } // End llvm namespace Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -675,6 +675,123 @@ emitCalleeSavedRestores(MBB, MBBI, true); } +static MCRegister getRegisterOrZero(MCRegister Reg) { + switch (Reg.id()) { + default: + // The called routine is expected to preserve r19-r28 + // r29 and r30 are used as frame pointer and link register resp. + return 0; + + // GPRs +#define CASE(n) \ + case AArch64::W##n: \ + case AArch64::X##n: \ + return AArch64::X##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); +#undef CASE + + // FPRs +#define CASE(n) \ + case AArch64::B##n: \ + case AArch64::H##n: \ + case AArch64::S##n: \ + case AArch64::D##n: \ + case AArch64::Q##n: \ + return AArch64::Q##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); + CASE(19); + CASE(20); + CASE(21); + CASE(22); + CASE(23); + CASE(24); + CASE(25); + CASE(26); + CASE(27); + CASE(28); + CASE(29); + CASE(30); + CASE(31); +#undef CASE + } +} + +void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const { + const MachineFunction &MF = *MBB.getParent(); + + // Insertion point. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + + // Fake a debug loc. + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const AArch64InstrInfo *TII = + static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); + + BitVector GPRsToZero(TRI.getNumRegs()); + BitVector FPRsToZero(TRI.getNumRegs()); + for (MCRegister Reg : RegsToZero.set_bits()) { + if (TRI.isGeneralPurposeRegister(MF, Reg)) { + // For GPRs, we only care to clear out the 64-bit register. + if (MCRegister XReg = getRegisterOrZero(Reg)) + GPRsToZero.set(XReg); + } else if (AArch64::FPR128RegClass.contains(Reg)) { + // For FPRs, + if (MCRegister XReg = getRegisterOrZero(Reg)) + FPRsToZero.set(XReg); + } + } + + // Zero out GPRs. + for (MCRegister Reg : GPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), Reg).addImm(0); + + // Zero out FP/vector registers. + for (MCRegister Reg : FPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVID), Reg).addImm(0); +} + // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -5984,7 +5984,7 @@ // FIXME: There's no reason for this to be restricted to X86. The backend // code needs to be changed to include the appropriate function calls // automatically. - if (!Triple.isX86()) + if (!Triple.isX86() && !Triple.isAArch64()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; } Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2977,7 +2977,7 @@ def fzero_call_used_regs_EQ : Joined<["-"], "fzero-call-used-regs=">, Group<f_Group>, Flags<[CC1Option]>, - HelpText<"Clear call-used registers upon function return.">, + HelpText<"Clear call-used registers upon function return (AArch64/x86 only)">, Values<"skip,used-gpr-arg,used-gpr,used-arg,used,all-gpr-arg,all-gpr,all-arg,all">, NormalizedValues<["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used", "AllGPRArg", "AllGPR", "AllArg", "All"]>,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits