https://github.com/dongjianqiang2 updated https://github.com/llvm/llvm-project/pull/142982
>From e828099b2daa6c4f092b7428318fc9b12a85514d Mon Sep 17 00:00:00 2001 From: dong jianqiang <dongjianqia...@huawei.com> Date: Thu, 5 Jun 2025 22:46:26 +0800 Subject: [PATCH] [AArch64] Add support for -mlong-calls code generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements backend support for -mlong-calls on AArch64 targets. When enabled, calls to external functions are lowered to an indirect call via an address computed using `adrp` and `add` rather than a direct `bl` instruction, which is limited to a ±128MB PC-relative offset. This is particularly useful when code and/or data exceeds the 26-bit immediate range of `bl`, such as in large binaries or link-time-optimized builds. Key changes: - In SelectionDAG lowering (`LowerCall`), detect `-mlong-calls` and emit: - `adrp + add` address calculation - `blr` indirect call instruction This patch ensures that long-calls are emitted correctly for both GlobalAddress and ExternalSymbol call targets. Tested: - New codegen tests under `llvm/test/CodeGen/AArch64/aarch64-long-calls.ll` - Verified `adrp + add + blr` output in `.s` for global and external functions --- clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 6 +++ llvm/lib/Target/AArch64/AArch64Features.td | 4 ++ .../Target/AArch64/AArch64ISelLowering.cpp | 13 +++++-- .../AArch64/GISel/AArch64CallLowering.cpp | 11 ++++++ .../GISel/AArch64InstructionSelector.cpp | 9 +++++ .../AArch64/GISel/AArch64LegalizerInfo.cpp | 37 +++++++++++++++++-- .../AArch64/GISel/AArch64LegalizerInfo.h | 4 +- .../CodeGen/AArch64/aarch64-long-calls.ll | 27 ++++++++++++++ 8 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-long-calls.ll diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index eaae9f876e3ad..2463bcdae2f4f 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -466,6 +466,12 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, if (Args.getLastArg(options::OPT_mno_bti_at_return_twice)) Features.push_back("+no-bti-at-return-twice"); + + if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, + options::OPT_mno_long_calls)) { + if (A->getOption().matches(options::OPT_mlong_calls)) + Features.push_back("+long-calls"); + } } void aarch64::setPAuthABIInTriple(const Driver &D, const ArgList &Args, diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 469c76752c78c..5af6ed5f1ffa2 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -825,6 +825,10 @@ def FeatureDisableFastIncVL : SubtargetFeature<"disable-fast-inc-vl", "HasDisableFastIncVL", "true", "Do not prefer INC/DEC, ALL, { 1, 2, 4 } over ADDVL">; +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + //===----------------------------------------------------------------------===// // Architectures. // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9f51caef6d228..d6015ccf94afc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9286,8 +9286,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Callee = DAG.getTargetGlobalAddress(CalledGlobal, DL, PtrVT, 0, OpFlags); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } else { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); + if (Subtarget->genLongCalls()) + Callee = getAddr(G, DAG, OpFlags); + else { + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); + } } } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large && @@ -9298,7 +9302,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } else { - Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); + if (Subtarget->genLongCalls()) + Callee = getAddr(S, DAG, 0); + else + Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index fd77571fe1c52..b3a4ae61017a9 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1351,6 +1351,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB); MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT); Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false); + } else if (Subtarget.genLongCalls()) { + // If -mlong-calls are enabled, materialize the symbol/global with MO_PAGE + // to allow ADRP+LDR relocation sequence for calls beyond 128MB range. + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE); + DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB); + if (Info.Callee.isGlobal()) { + const GlobalValue *GV = Info.Callee.getGlobal(); + MIB.addGlobalAddress(GV, 0, AArch64II::MO_PAGE); + } else if (Info.Callee.isSymbol()) + MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_PAGE); + Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false); } Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.PAI, MRI); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index e0c693bff3c0a..735eb39929ce1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2706,6 +2706,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { "Expected small code model"); auto Op1 = BaseMI->getOperand(1); auto Op2 = I.getOperand(2); + if (Subtarget->genLongCalls() && Op1.isSymbol()) { + auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) + .addExternalSymbol(Op1.getSymbolName(), + Op1.getTargetFlags()) + .addExternalSymbol(Op2.getSymbolName(), + Op2.getTargetFlags()); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); + } auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), Op1.getTargetFlags()) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 12843e16d0da1..411af1f4ac137 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1571,15 +1571,21 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( // By splitting this here, we can optimize accesses in the small code model by // folding in the G_ADD_LOW into the load/store offset. auto &GlobalOp = MI.getOperand(1); - // Don't modify an intrinsic call. - if (GlobalOp.isSymbol()) - return true; + if (GlobalOp.isSymbol()) { + // For symbol operands, use long call expansion if required. + return ST->genLongCalls() + ? legalizeSmallCMSymbol(MI, MRI, MIRBuilder, Observer) + : true; + } const auto* GV = GlobalOp.getGlobal(); if (GV->isThreadLocal()) return true; // Don't want to modify TLS vars. auto &TM = ST->getTargetLowering()->getTargetMachine(); - unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); + unsigned OpFlags = 0; + + if (!ST->genLongCalls()) + OpFlags = ST->ClassifyGlobalReference(GV, TM); if (OpFlags & AArch64II::MO_GOT) return true; @@ -1621,6 +1627,29 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( return true; } +bool AArch64LegalizerInfo::legalizeSmallCMSymbol( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { + assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); + // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + + // G_ADD_LOW instructions. + // By splitting this here, we can optimize accesses in the small code model by + // folding in the G_ADD_LOW into the load/store offset. + auto &SymbolOp = MI.getOperand(1); + + Register DstReg = MI.getOperand(0).getReg(); + auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) + .addExternalSymbol(SymbolOp.getSymbolName(), AArch64II::MO_PAGE); + // Set the regclass on the dest reg too. + MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); + + MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) + .addExternalSymbol(SymbolOp.getSymbolName(), + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + MI.eraseFromParent(); + return true; +} + bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &MIB = Helper.MIRBuilder; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index bcb294326fa92..33b3bb827d365 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -42,7 +42,9 @@ class AArch64LegalizerInfo : public LegalizerInfo { bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; - + bool legalizeSmallCMSymbol(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const; bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; diff --git a/llvm/test/CodeGen/AArch64/aarch64-long-calls.ll b/llvm/test/CodeGen/AArch64/aarch64-long-calls.ll new file mode 100644 index 0000000000000..52cffe49bf9ca --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-long-calls.ll @@ -0,0 +1,27 @@ +; RUN: llc -O2 -mtriple=aarch64-linux-gnu -mcpu=generic -mattr=+long-calls < %s | FileCheck %s +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mcpu=generic -mattr=+long-calls < %s | FileCheck %s + +declare void @far_func() +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) + +define void @test() { +entry: + call void @far_func() + ret void +} + +define void @test2(ptr %dst, i8 %val, i64 %len) { +entry: + call void @llvm.memset.p0.i64(ptr %dst, i8 %val, i64 %len, i1 false) + ret void +} + +; CHECK-LABEL: test: +; CHECK: adrp {{x[0-9]+}}, far_func +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:far_func +; CHECK: blr {{x[0-9]+}} + +; CHECK-LABEL: test2: +; CHECK: adrp {{x[0-9]+}}, memset +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:memset +; CHECK: blr {{x[0-9]+}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits