================ @@ -0,0 +1,471 @@ +//===- AllocToken.cpp - Allocation token instrumentation ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements AllocToken, an instrumentation pass that +// replaces allocation calls with token-enabled versions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/AllocToken.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <memory> +#include <optional> +#include <string> +#include <utility> +#include <variant> + +using namespace llvm; + +#define DEBUG_TYPE "alloc-token" + +namespace { + +//===--- Constants --------------------------------------------------------===// + +enum class TokenMode : unsigned { + /// Incrementally increasing token ID. + Increment = 0, + + /// Simple mode that returns a statically-assigned random token ID. + Random = 1, + + /// Token ID based on allocated type hash. + TypeHash = 2, +}; + +//===--- Command-line options ---------------------------------------------===// + +cl::opt<TokenMode> + ClMode("alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"), + cl::init(TokenMode::TypeHash), + cl::values(clEnumValN(TokenMode::Increment, "increment", + "Incrementally increasing token ID"), + clEnumValN(TokenMode::Random, "random", + "Statically-assigned random token ID"), + clEnumValN(TokenMode::TypeHash, "typehash", + "Token ID based on allocated type hash"))); + +cl::opt<std::string> ClFuncPrefix("alloc-token-prefix", + cl::desc("The allocation function prefix"), + cl::Hidden, cl::init("__alloc_token_")); + +cl::opt<uint64_t> ClMaxTokens("alloc-token-max", + cl::desc("Maximum number of tokens (0 = no max)"), + cl::Hidden, cl::init(0)); + +cl::opt<bool> + ClFastABI("alloc-token-fast-abi", + cl::desc("The token ID is encoded in the function name"), + cl::Hidden, cl::init(false)); + +// Instrument libcalls only by default - compatible allocators only need to take +// care of providing standard allocation functions. With extended coverage, also +// instrument non-libcall allocation function calls with !alloc_token +// metadata. +cl::opt<bool> + ClExtended("alloc-token-extended", + cl::desc("Extend coverage to custom allocation functions"), + cl::Hidden, cl::init(false)); + +// C++ defines ::operator new (and variants) as replaceable (vs. standard +// library versions), which are nobuiltin, and are therefore not covered by +// isAllocationFn(). Cover by default, as users of AllocToken are already +// required to provide token-aware allocation functions (no defaults). +cl::opt<bool> ClCoverReplaceableNew("alloc-token-cover-replaceable-new", + cl::desc("Cover replaceable operator new"), + cl::Hidden, cl::init(true)); + +cl::opt<uint64_t> ClFallbackToken( + "alloc-token-fallback", + cl::desc("The default fallback token where none could be determined"), + cl::Hidden, cl::init(0)); + +//===--- Statistics -------------------------------------------------------===// + +STATISTIC(NumFunctionsInstrumented, "Functions instrumented"); +STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); + +//===----------------------------------------------------------------------===// + +/// Returns the !alloc_token metadata if available. +/// +/// Expected format is: !{<type-name>} +MDNode *getAllocTokenMetadata(const CallBase &CB) { + MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token); + if (!Ret) + return nullptr; + assert(Ret->getNumOperands() == 1 && "bad !alloc_token"); + assert(isa<MDString>(Ret->getOperand(0))); + return Ret; +} + +class ModeBase { +public: + explicit ModeBase(uint64_t MaxTokens) : MaxTokens(MaxTokens) {} + +protected: + uint64_t boundedToken(uint64_t Val) const { + return MaxTokens ? Val % MaxTokens : Val; + } + + const uint64_t MaxTokens; +}; + +/// Implementation for TokenMode::Increment. +class IncrementMode : public ModeBase { +public: + using ModeBase::ModeBase; + + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) { + return boundedToken(Counter++); + } + +private: + uint64_t Counter = 0; +}; + +/// Implementation for TokenMode::Random. +class RandomMode : public ModeBase { +public: + RandomMode(uint64_t MaxTokens, std::unique_ptr<RandomNumberGenerator> RNG) + : ModeBase(MaxTokens), RNG(std::move(RNG)) {} + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) { + return boundedToken((*RNG)()); + } + +private: + std::unique_ptr<RandomNumberGenerator> RNG; +}; + +/// Implementation for TokenMode::TypeHash. The implementation ensures +/// hashes are stable across different compiler invocations. Uses xxHash as the +/// hash function. +class TypeHashMode : public ModeBase { +public: + using ModeBase::ModeBase; + + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + if (MDNode *N = getAllocTokenMetadata(CB)) { + MDString *S = cast<MDString>(N->getOperand(0)); + return boundedToken(xxHash64(S->getString())); + } + remarkNoMetadata(CB, ORE); + return ClFallbackToken; + } + + /// Remark that there was no precise type information. + static void remarkNoMetadata(const CallBase &CB, + OptimizationRemarkEmitter &ORE) { + ORE.emit([&] { + ore::NV FuncNV("Function", CB.getParent()->getParent()); + const Function *Callee = CB.getCalledFunction(); + ore::NV CalleeNV("Callee", Callee ? Callee->getName() : "<unknown>"); + return OptimizationRemark(DEBUG_TYPE, "NoAllocToken", &CB) + << "Call to '" << CalleeNV << "' in '" << FuncNV + << "' without source-level type token"; + }); + } +}; + +// Apply opt overrides. +AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) { + if (!Opts.MaxTokens.has_value()) + Opts.MaxTokens = ClMaxTokens; + Opts.FastABI |= ClFastABI; + Opts.Extended |= ClExtended; + return Opts; +} + +class AllocToken { +public: + explicit AllocToken(AllocTokenOptions Opts, Module &M, + ModuleAnalysisManager &MAM) + : Options(transformOptionsFromCl(std::move(Opts))), Mod(M), + FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), + Mode(IncrementMode(*Options.MaxTokens)) { + switch (ClMode.getValue()) { + case TokenMode::Increment: + break; + case TokenMode::Random: + Mode.emplace<RandomMode>(*Options.MaxTokens, M.createRNG(DEBUG_TYPE)); + break; + case TokenMode::TypeHash: + Mode.emplace<TypeHashMode>(*Options.MaxTokens); + break; + } + } + + bool instrumentFunction(Function &F); + +private: + /// Returns true for !isAllocationFn() functions that are also eligible for + /// instrumentation. + static bool isInstrumentableLibFunc(LibFunc Func, const Value *V, + const TargetLibraryInfo *TLI); + + /// Returns true for isAllocationFn() functions that we should ignore. + static bool ignoreInstrumentableLibFunc(LibFunc Func); + + /// Replace a call/invoke with a call/invoke to the allocation function + /// with token ID. + bool replaceAllocationCall(CallBase *CB, LibFunc Func, + OptimizationRemarkEmitter &ORE, + const TargetLibraryInfo &TLI); + + /// Return replacement function for a LibFunc that takes a token ID. + FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID, + LibFunc OriginalFunc); + + /// Return the token ID from metadata in the call. + uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode); + } + + const AllocTokenOptions Options; + Module &Mod; + IntegerType *IntPtrTy = Mod.getDataLayout().getIntPtrType(Mod.getContext()); + FunctionAnalysisManager &FAM; + // Cache for replacement functions. + DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions; + // Selected mode. + std::variant<IncrementMode, RandomMode, TypeHashMode> Mode; +}; + +bool AllocToken::instrumentFunction(Function &F) { + // Do not apply any instrumentation for naked functions. + if (F.hasFnAttribute(Attribute::Naked)) + return false; + if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) + return false; + // Don't touch available_externally functions, their actual body is elsewhere. + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) + return false; + // Only instrument functions that have the sanitize_alloc_token attribute. + if (!F.hasFnAttribute(Attribute::SanitizeAllocToken)) + return false; + + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); + auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls; + + // Collect all allocation calls to avoid iterator invalidation. + for (Instruction &I : instructions(F)) { + auto *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + const Function *Callee = CB->getCalledFunction(); + if (!Callee) + continue; + // Ignore nobuiltin of the CallBase, so that we can cover nobuiltin libcalls + // if requested via isInstrumentableLibFunc(). Note that isAllocationFn() is + // returning false for nobuiltin calls. + LibFunc Func; + if (TLI.getLibFunc(*Callee, Func)) { + if (ignoreInstrumentableLibFunc(Func)) + continue; + if (isInstrumentableLibFunc(Func, CB, &TLI)) + AllocCalls.emplace_back(CB, Func); + } else if (Options.Extended && getAllocTokenMetadata(*CB)) { + AllocCalls.emplace_back(CB, NotLibFunc); + } ---------------- zmodem wrote:
(Feel free to ignore or do in a followup) I think it might be cleaner to break out this logic into a `shouldInstrumentCall` function, combining the `isInstrumentable..` and `ignoreInstrumentable..` logic (those two are always called together anyway), and the metadata check. https://github.com/llvm/llvm-project/pull/156838 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits