================ @@ -0,0 +1,701 @@ +//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is an optimisation pass for variadic functions. If called from codegen, +// it can serve as the implementation of variadic functions for a given target. +// +// The target-dependent parts are in namespace VariadicABIInfo. Enabling a new +// target means adding a case to VariadicABIInfo::create() along with tests. +// +// The module pass using that information is class ExpandVariadics. +// +// The strategy is: +// 1. Test whether a variadic function is sufficiently simple +// 2. If it was, calls to it can be replaced with calls to a different function +// 3. If it wasn't, try to split it into a simple function and a remainder +// 4. Optionally rewrite the varadic function calling convention as well +// +// This pass considers "sufficiently simple" to mean a variadic function that +// calls into a different function taking a va_list to do the real work. For +// example, libc might implement fprintf as a single basic block calling into +// vfprintf. This pass can then rewrite call to the variadic into some code +// to construct a target-specific value to use for the va_list and a call +// into the non-variadic implementation function. There's a test for that. +// +// Most other variadic functions whose definition is known can be converted into +// that form. Create a new internal function taking a va_list where the original +// took a ... parameter. Move the blocks across. Create a new block containing a +// va_start that calls into the new function. This is nearly target independent. +// +// Where this transform is consistent with the ABI, e.g. AMDGPU or NVPTX, or +// where the ABI can be chosen to align with this transform, the function +// interface can be rewritten along with calls to unknown variadic functions. +// +// The aggregate effect is to unblock other transforms, most critically the +// general purpose inliner. Known calls to variadic functions become zero cost. +// +// This pass does define some target specific information which is partially +// redundant with other parts of the compiler. In particular, the call frame +// it builds must be the exact complement of the va_arg lowering performed +// by clang. The va_list construction is similar to work done by the backend +// for targets that lower variadics there, though distinct in that this pass +// constructs the pieces using alloca instead of relative to stack pointers. +// +// Consistency with clang is primarily tested by emitting va_arg using clang +// then expanding the variadic functions using this pass, followed by trying +// to constant fold the functions to no-ops. +// +// Target specific behaviour is tested in IR - mainly checking that values are +// put into positions in call frames that make sense for that particular target. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ExpandVariadics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/TargetParser/Triple.h" + +#define DEBUG_TYPE "expand-variadics" + +using namespace llvm; + +namespace { +namespace VariadicABIInfo { + +// calling convention for passing as valist object, same as it would be in C +// aarch64 uses byval +enum class ValistCc { value, pointer, /*byval*/ }; + +struct Interface { +protected: + Interface(uint32_t MinAlign, uint32_t MaxAlign) + : MinAlign(MinAlign), MaxAlign(MaxAlign) {} + +public: + virtual ~Interface() {} + const uint32_t MinAlign; + const uint32_t MaxAlign; + + // Most ABIs use a void* or char* for va_list, others can specialise + virtual Type *vaListType(LLVMContext &Ctx) { + return PointerType::getUnqual(Ctx); + } + + // Lots of targets use a void* pointed at a buffer for va_list. + // Some use more complicated iterator constructs. + // This interface seeks to express both. + // Ideally it would be a compile time error for a derived class + // to override only one of valistOnStack, initializeVAList. + + // How the vaListType is passed + virtual ValistCc valistCc() { return ValistCc::value; } + + // The valist might need to be stack allocated. + virtual bool valistOnStack() { return false; } + + virtual void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst * /*va_list*/, Value * /*buffer*/) { + // Function needs to be implemented iff valist is on the stack. + assert(!valistOnStack()); + llvm_unreachable("Only called if valistOnStack() returns true"); + } + + // All targets currently implemented use a ptr for the valist parameter + Type *vaListParameterType(LLVMContext &Ctx) { + return PointerType::getUnqual(Ctx); + } + + bool vaEndIsNop() { return true; } + + bool vaCopyIsMemcpy() { return true; } +}; + +struct X64SystemV final : public Interface { + // X64 documented behaviour: + // Slots are at least eight byte aligned and at most 16 byte aligned. + // If the type needs more than sixteen byte alignment, it still only gets + // that much alignment on the stack. + // X64 behaviour in clang: + // Slots are at least eight byte aligned and at most naturally aligned + // This matches clang, not the ABI docs. + X64SystemV() : Interface(8, 0) {} + + Type *vaListType(LLVMContext &Ctx) override { + auto I32 = Type::getInt32Ty(Ctx); + auto Ptr = PointerType::getUnqual(Ctx); + return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1); + } + ValistCc valistCc() override { return ValistCc::pointer; } + + bool valistOnStack() override { return true; } + + void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst *VaList, Value *VoidBuffer) override { + assert(valistOnStack()); + assert(VaList != nullptr); + assert(VaList->getAllocatedType() == vaListType(Ctx)); + + Type *VaListTy = vaListType(Ctx); + + Type *I32 = Type::getInt32Ty(Ctx); + Type *I64 = Type::getInt64Ty(Ctx); + + Value *Idxs[3] = { + ConstantInt::get(I64, 0), + ConstantInt::get(I32, 0), + nullptr, + }; + + Idxs[2] = ConstantInt::get(I32, 0); + Builder.CreateStore( + ConstantInt::get(I32, 48), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gp_offset")); + + Idxs[2] = ConstantInt::get(I32, 1); + Builder.CreateStore( + ConstantInt::get(I32, 6 * 8 + 8 * 16), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "fp_offset")); + + Idxs[2] = ConstantInt::get(I32, 2); + Builder.CreateStore( + VoidBuffer, + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "overfow_arg_area")); + + Idxs[2] = ConstantInt::get(I32, 3); + Builder.CreateStore( + ConstantPointerNull::get(PointerType::getUnqual(Ctx)), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "reg_save_area")); + } +}; + +std::unique_ptr<Interface> create(Module &M) { + llvm::Triple Triple(M.getTargetTriple()); + const bool IsLinuxABI = Triple.isOSLinux() || Triple.isOSCygMing(); + + switch (Triple.getArch()) { + case Triple::x86: { + // These seem to all fall out the same, despite getTypeStackAlign + // implying otherwise. + if (Triple.isOSDarwin()) { + struct X86Darwin final : public Interface { + // X86_32ABIInfo::getTypeStackAlignInBytes is misleading for this. + // The slotSize(4) implies a minimum alignment + // The AllowHigherAlign = true means there is no maximum alignment. + X86Darwin() : Interface(4, 0) {} + }; + + return std::make_unique<X86Darwin>(); + } + if (Triple.getOS() == llvm::Triple::Win32) { + struct X86Windows final : public Interface { + X86Windows() : Interface(4, 0) {} + }; + return std::make_unique<X86Windows>(); + } + + if (IsLinuxABI) { + struct X86Linux final : public Interface { + X86Linux() : Interface(4, 0) {} + }; + return std::make_unique<X86Linux>(); + } + break; + } + + case Triple::x86_64: { + if (Triple.isWindowsMSVCEnvironment() || Triple.isOSWindows()) { + struct X64Windows final : public Interface { + X64Windows() : Interface(8, 8) {} + }; + // x64 msvc emit vaarg passes > 8 byte values by pointer + // however the variadic call instruction created does not, e.g. + // a <4 x f32> will be passed as itself, not as a pointer or byval. + // Postponing resolution of that for now. + return nullptr; + } + + if (Triple.isOSDarwin()) { + return std::make_unique<VariadicABIInfo::X64SystemV>(); + } + + if (IsLinuxABI) { + return std::make_unique<VariadicABIInfo::X64SystemV>(); + } ---------------- Pierre-vh wrote:
both can drop `{}` https://github.com/llvm/llvm-project/pull/81058 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits