================ @@ -0,0 +1,701 @@ +//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is an optimisation pass for variadic functions. If called from codegen, +// it can serve as the implementation of variadic functions for a given target. +// +// The target-dependent parts are in namespace VariadicABIInfo. Enabling a new +// target means adding a case to VariadicABIInfo::create() along with tests. +// +// The module pass using that information is class ExpandVariadics. +// +// The strategy is: +// 1. Test whether a variadic function is sufficiently simple +// 2. If it was, calls to it can be replaced with calls to a different function +// 3. If it wasn't, try to split it into a simple function and a remainder +// 4. Optionally rewrite the varadic function calling convention as well +// +// This pass considers "sufficiently simple" to mean a variadic function that +// calls into a different function taking a va_list to do the real work. For +// example, libc might implement fprintf as a single basic block calling into +// vfprintf. This pass can then rewrite call to the variadic into some code +// to construct a target-specific value to use for the va_list and a call +// into the non-variadic implementation function. There's a test for that. +// +// Most other variadic functions whose definition is known can be converted into +// that form. Create a new internal function taking a va_list where the original +// took a ... parameter. Move the blocks across. Create a new block containing a +// va_start that calls into the new function. This is nearly target independent. +// +// Where this transform is consistent with the ABI, e.g. AMDGPU or NVPTX, or +// where the ABI can be chosen to align with this transform, the function +// interface can be rewritten along with calls to unknown variadic functions. +// +// The aggregate effect is to unblock other transforms, most critically the +// general purpose inliner. Known calls to variadic functions become zero cost. +// +// This pass does define some target specific information which is partially +// redundant with other parts of the compiler. In particular, the call frame +// it builds must be the exact complement of the va_arg lowering performed +// by clang. The va_list construction is similar to work done by the backend +// for targets that lower variadics there, though distinct in that this pass +// constructs the pieces using alloca instead of relative to stack pointers. +// +// Consistency with clang is primarily tested by emitting va_arg using clang +// then expanding the variadic functions using this pass, followed by trying +// to constant fold the functions to no-ops. +// +// Target specific behaviour is tested in IR - mainly checking that values are +// put into positions in call frames that make sense for that particular target. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ExpandVariadics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/TargetParser/Triple.h" + +#define DEBUG_TYPE "expand-variadics" + +using namespace llvm; + +namespace { +namespace VariadicABIInfo { + +// calling convention for passing as valist object, same as it would be in C +// aarch64 uses byval +enum class ValistCc { value, pointer, /*byval*/ }; + +struct Interface { +protected: + Interface(uint32_t MinAlign, uint32_t MaxAlign) + : MinAlign(MinAlign), MaxAlign(MaxAlign) {} + +public: + virtual ~Interface() {} + const uint32_t MinAlign; + const uint32_t MaxAlign; + + // Most ABIs use a void* or char* for va_list, others can specialise + virtual Type *vaListType(LLVMContext &Ctx) { + return PointerType::getUnqual(Ctx); + } + + // Lots of targets use a void* pointed at a buffer for va_list. + // Some use more complicated iterator constructs. + // This interface seeks to express both. + // Ideally it would be a compile time error for a derived class + // to override only one of valistOnStack, initializeVAList. + + // How the vaListType is passed + virtual ValistCc valistCc() { return ValistCc::value; } + + // The valist might need to be stack allocated. + virtual bool valistOnStack() { return false; } + + virtual void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst * /*va_list*/, Value * /*buffer*/) { + // Function needs to be implemented iff valist is on the stack. + assert(!valistOnStack()); + llvm_unreachable("Only called if valistOnStack() returns true"); + } + + // All targets currently implemented use a ptr for the valist parameter + Type *vaListParameterType(LLVMContext &Ctx) { + return PointerType::getUnqual(Ctx); + } + + bool vaEndIsNop() { return true; } + + bool vaCopyIsMemcpy() { return true; } +}; + +struct X64SystemV final : public Interface { + // X64 documented behaviour: + // Slots are at least eight byte aligned and at most 16 byte aligned. + // If the type needs more than sixteen byte alignment, it still only gets + // that much alignment on the stack. + // X64 behaviour in clang: + // Slots are at least eight byte aligned and at most naturally aligned + // This matches clang, not the ABI docs. + X64SystemV() : Interface(8, 0) {} + + Type *vaListType(LLVMContext &Ctx) override { + auto I32 = Type::getInt32Ty(Ctx); + auto Ptr = PointerType::getUnqual(Ctx); + return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1); + } + ValistCc valistCc() override { return ValistCc::pointer; } + + bool valistOnStack() override { return true; } + + void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst *VaList, Value *VoidBuffer) override { + assert(valistOnStack()); + assert(VaList != nullptr); + assert(VaList->getAllocatedType() == vaListType(Ctx)); + + Type *VaListTy = vaListType(Ctx); + + Type *I32 = Type::getInt32Ty(Ctx); + Type *I64 = Type::getInt64Ty(Ctx); + + Value *Idxs[3] = { + ConstantInt::get(I64, 0), + ConstantInt::get(I32, 0), + nullptr, + }; + + Idxs[2] = ConstantInt::get(I32, 0); + Builder.CreateStore( + ConstantInt::get(I32, 48), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gp_offset")); + + Idxs[2] = ConstantInt::get(I32, 1); + Builder.CreateStore( + ConstantInt::get(I32, 6 * 8 + 8 * 16), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "fp_offset")); + + Idxs[2] = ConstantInt::get(I32, 2); + Builder.CreateStore( + VoidBuffer, + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "overfow_arg_area")); + + Idxs[2] = ConstantInt::get(I32, 3); + Builder.CreateStore( + ConstantPointerNull::get(PointerType::getUnqual(Ctx)), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "reg_save_area")); + } +}; + +std::unique_ptr<Interface> create(Module &M) { + llvm::Triple Triple(M.getTargetTriple()); + const bool IsLinuxABI = Triple.isOSLinux() || Triple.isOSCygMing(); + + switch (Triple.getArch()) { + case Triple::x86: { + // These seem to all fall out the same, despite getTypeStackAlign + // implying otherwise. + if (Triple.isOSDarwin()) { + struct X86Darwin final : public Interface { + // X86_32ABIInfo::getTypeStackAlignInBytes is misleading for this. + // The slotSize(4) implies a minimum alignment + // The AllowHigherAlign = true means there is no maximum alignment. + X86Darwin() : Interface(4, 0) {} + }; + + return std::make_unique<X86Darwin>(); + } + if (Triple.getOS() == llvm::Triple::Win32) { + struct X86Windows final : public Interface { + X86Windows() : Interface(4, 0) {} + }; + return std::make_unique<X86Windows>(); + } + + if (IsLinuxABI) { + struct X86Linux final : public Interface { + X86Linux() : Interface(4, 0) {} + }; + return std::make_unique<X86Linux>(); + } + break; + } + + case Triple::x86_64: { + if (Triple.isWindowsMSVCEnvironment() || Triple.isOSWindows()) { + struct X64Windows final : public Interface { + X64Windows() : Interface(8, 8) {} + }; + // x64 msvc emit vaarg passes > 8 byte values by pointer + // however the variadic call instruction created does not, e.g. + // a <4 x f32> will be passed as itself, not as a pointer or byval. + // Postponing resolution of that for now. + return nullptr; + } + + if (Triple.isOSDarwin()) { + return std::make_unique<VariadicABIInfo::X64SystemV>(); + } + + if (IsLinuxABI) { + return std::make_unique<VariadicABIInfo::X64SystemV>(); + } + + break; + } + + default: + return nullptr; + } + + return nullptr; +} + +} // namespace VariadicABIInfo + +class ExpandVariadics : public ModulePass { +public: + static char ID; + std::unique_ptr<VariadicABIInfo::Interface> ABI; + + ExpandVariadics() : ModulePass(ID) {} + StringRef getPassName() const override { return "Expand variadic functions"; } + + // A predicate in that return nullptr means false + // Returns the function target to use when inlining on success + Function *isFunctionInlinable(Module &M, Function *F); + + // Rewrite a call site. + void expandCall(Module &M, CallInst *CB, Function *VarargF, Function *NF); + + // this could be partially target specific + bool expansionApplicableToFunction(Module &M, Function *F) { + if (F->isIntrinsic() || !F->isVarArg() || + F->hasFnAttribute(Attribute::Naked)) + return false; + + if (F->getCallingConv() != CallingConv::C) + return false; + + if (GlobalValue::isInterposableLinkage(F->getLinkage())) + return false; + + for (const Use &U : F->uses()) { + const auto *CB = dyn_cast<CallBase>(U.getUser()); + + if (!CB) + return false; + + if (CB->isMustTailCall()) { + return false; + } + + if (!CB->isCallee(&U) || CB->getFunctionType() != F->getFunctionType()) { + return false; + } + } + + // Branch funnels look like variadic functions but aren't: + // + // define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) { + // musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr @vt1_1, + // ptr @vf1_1, ...) ret void + // } + // + // %1 = call i32 @__typeid_typeid1_0_branch_funnel(ptr nest %vtable, ptr + // %obj, i32 1) + + // TODO: there should be a reasonable way to check for an intrinsic + // without inserting a prototype that then needs to be removed + Function *Funnel = + Intrinsic::getDeclaration(&M, Intrinsic::icall_branch_funnel); + for (const User *U : Funnel->users()) { + if (auto *I = dyn_cast<CallBase>(U)) { + if (F == I->getFunction()) { + return false; + } + } + } + if (Funnel->use_empty()) + Funnel->eraseFromParent(); + + return true; + } + + template <Intrinsic::ID ID> + static BasicBlock::iterator + skipIfInstructionIsSpecificIntrinsic(BasicBlock::iterator Iter) { + if (auto *Intrinsic = dyn_cast<IntrinsicInst>(&*Iter)) + if (Intrinsic->getIntrinsicID() == ID) + Iter++; + return Iter; + } + + bool callinstRewritable(CallBase *CB, Function *NF) { + if (CallInst *CI = dyn_cast<CallInst>(CB)) + if (CI->isMustTailCall()) + return false; + + return true; + } + + bool runOnFunction(Module &M, Function *F) { + bool Changed = false; + + if (!expansionApplicableToFunction(M, F)) + return false; + + Function *Equivalent = isFunctionInlinable(M, F); + + if (!Equivalent) + return Changed; + + for (User *U : llvm::make_early_inc_range(F->users())) + if (CallInst *CB = dyn_cast<CallInst>(U)) { + Value *calledOperand = CB->getCalledOperand(); + if (F == calledOperand) { + expandCall(M, CB, F, Equivalent); + Changed = true; + } + } + + return Changed; + } + + bool runOnModule(Module &M) override { + ABI = VariadicABIInfo::create(M); + if (!ABI) + return false; + + bool Changed = false; + for (Function &F : llvm::make_early_inc_range(M)) { + Changed |= runOnFunction(M, &F); + } + + return Changed; + } +}; + +Function *ExpandVariadics::isFunctionInlinable(Module &M, Function *F) { + assert(F->isVarArg()); + assert(expansionApplicableToFunction(M, F)); + + if (F->isDeclaration()) + return nullptr; + + // A variadic function is inlinable if it is sufficiently simple. + // Specifically, if it is a single basic block which is functionally + // equivalent to packing the variadic arguments into a va_list which is + // passed to another function. The inlining strategy is to build a va_list + // in the caller and then call said inner function. + + // Single basic block. + BasicBlock &BB = F->getEntryBlock(); + if (!isa<ReturnInst>(BB.getTerminator())) + return nullptr; + + // Walk the block in order checking for specific instructions, some of them + // optional. + BasicBlock::iterator Iter = BB.begin(); + + AllocaInst *Alloca = dyn_cast<AllocaInst>(&*Iter++); + if (!Alloca) + return nullptr; + + Value *ValistArgument = Alloca; + + Iter = skipIfInstructionIsSpecificIntrinsic<Intrinsic::lifetime_start>(Iter); + + VAStartInst *Start = dyn_cast<VAStartInst>(&*Iter++); + if (!Start || Start->getArgList() != ValistArgument) { + return nullptr; + } + + // The va_list instance is stack allocated + // The ... replacement is a va_list passed "by value" + // That involves a load for some ABIs and passing the pointer for others + Value *ValistTrailingArgument = nullptr; + switch (ABI->valistCc()) { + case VariadicABIInfo::ValistCc::value: { + // If it's being passed by value, need a load + // TODO: Check it's loading the right thing + auto *load = dyn_cast<LoadInst>(&*Iter); + if (!load) + return nullptr; + ValistTrailingArgument = load; + Iter++; + break; + } + case VariadicABIInfo::ValistCc::pointer: { + // If it's being passed by pointer, going to use the alloca directly + ValistTrailingArgument = ValistArgument; + break; + } + } + + CallInst *Call = dyn_cast<CallInst>(&*Iter++); + if (!Call) + return nullptr; + + if (auto *end = dyn_cast<VAEndInst>(&*Iter)) { ---------------- Pierre-vh wrote:
CamelCase https://github.com/llvm/llvm-project/pull/81058 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits