================
@@ -0,0 +1,471 @@
+//===- AllocToken.cpp - Allocation token instrumentation 
------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements AllocToken, an instrumentation pass that
+// replaces allocation calls with token-enabled versions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/AllocToken.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <variant>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "alloc-token"
+
+namespace {
+
+//===--- Constants 
--------------------------------------------------------===//
+
+enum class TokenMode : unsigned {
+  /// Incrementally increasing token ID.
+  Increment = 0,
+
+  /// Simple mode that returns a statically-assigned random token ID.
+  Random = 1,
+
+  /// Token ID based on allocated type hash.
+  TypeHash = 2,
+};
+
+//===--- Command-line options 
---------------------------------------------===//
+
+cl::opt<TokenMode>
+    ClMode("alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"),
+           cl::init(TokenMode::TypeHash),
+           cl::values(clEnumValN(TokenMode::Increment, "increment",
+                                 "Incrementally increasing token ID"),
+                      clEnumValN(TokenMode::Random, "random",
+                                 "Statically-assigned random token ID"),
+                      clEnumValN(TokenMode::TypeHash, "typehash",
+                                 "Token ID based on allocated type hash")));
+
+cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
+                                  cl::desc("The allocation function prefix"),
+                                  cl::Hidden, cl::init("__alloc_token_"));
+
+cl::opt<uint64_t> ClMaxTokens("alloc-token-max",
+                              cl::desc("Maximum number of tokens (0 = no 
max)"),
+                              cl::Hidden, cl::init(0));
+
+cl::opt<bool>
+    ClFastABI("alloc-token-fast-abi",
+              cl::desc("The token ID is encoded in the function name"),
+              cl::Hidden, cl::init(false));
+
+// Instrument libcalls only by default - compatible allocators only need to 
take
+// care of providing standard allocation functions. With extended coverage, 
also
+// instrument non-libcall allocation function calls with !alloc_token
+// metadata.
+cl::opt<bool>
+    ClExtended("alloc-token-extended",
+               cl::desc("Extend coverage to custom allocation functions"),
+               cl::Hidden, cl::init(false));
+
+// C++ defines ::operator new (and variants) as replaceable (vs. standard
+// library versions), which are nobuiltin, and are therefore not covered by
+// isAllocationFn(). Cover by default, as users of AllocToken are already
+// required to provide token-aware allocation functions (no defaults).
+cl::opt<bool> ClCoverReplaceableNew("alloc-token-cover-replaceable-new",
+                                    cl::desc("Cover replaceable operator new"),
+                                    cl::Hidden, cl::init(true));
+
+cl::opt<uint64_t> ClFallbackToken(
+    "alloc-token-fallback",
+    cl::desc("The default fallback token where none could be determined"),
+    cl::Hidden, cl::init(0));
+
+//===--- Statistics 
-------------------------------------------------------===//
+
+STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
+
+//===----------------------------------------------------------------------===//
+
+/// Returns the !alloc_token metadata if available.
+///
+/// Expected format is: !{<type-name>}
+MDNode *getAllocTokenMetadata(const CallBase &CB) {
+  MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+  if (!Ret)
+    return nullptr;
+  assert(Ret->getNumOperands() == 1 && "bad !alloc_token");
+  assert(isa<MDString>(Ret->getOperand(0)));
+  return Ret;
+}
+
+class ModeBase {
+public:
+  explicit ModeBase(uint64_t MaxTokens) : MaxTokens(MaxTokens) {}
+
+protected:
+  uint64_t boundedToken(uint64_t Val) const {
+    return MaxTokens ? Val % MaxTokens : Val;
+  }
+
+  const uint64_t MaxTokens;
+};
+
+/// Implementation for TokenMode::Increment.
+class IncrementMode : public ModeBase {
+public:
+  using ModeBase::ModeBase;
+
+  uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) {
+    return boundedToken(Counter++);
+  }
+
+private:
+  uint64_t Counter = 0;
+};
+
+/// Implementation for TokenMode::Random.
+class RandomMode : public ModeBase {
+public:
+  RandomMode(uint64_t MaxTokens, std::unique_ptr<RandomNumberGenerator> RNG)
+      : ModeBase(MaxTokens), RNG(std::move(RNG)) {}
+  uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) {
+    return boundedToken((*RNG)());
+  }
+
+private:
+  std::unique_ptr<RandomNumberGenerator> RNG;
+};
+
+/// Implementation for TokenMode::TypeHash. The implementation ensures
+/// hashes are stable across different compiler invocations. Uses xxHash as the
+/// hash function.
+class TypeHashMode : public ModeBase {
+public:
+  using ModeBase::ModeBase;
+
+  uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+    if (MDNode *N = getAllocTokenMetadata(CB)) {
+      MDString *S = cast<MDString>(N->getOperand(0));
+      return boundedToken(xxHash64(S->getString()));
+    }
+    remarkNoMetadata(CB, ORE);
+    return ClFallbackToken;
+  }
+
+  /// Remark that there was no precise type information.
+  static void remarkNoMetadata(const CallBase &CB,
+                               OptimizationRemarkEmitter &ORE) {
+    ORE.emit([&] {
+      ore::NV FuncNV("Function", CB.getParent()->getParent());
+      const Function *Callee = CB.getCalledFunction();
+      ore::NV CalleeNV("Callee", Callee ? Callee->getName() : "<unknown>");
+      return OptimizationRemark(DEBUG_TYPE, "NoAllocToken", &CB)
+             << "Call to '" << CalleeNV << "' in '" << FuncNV
+             << "' without source-level type token";
+    });
+  }
+};
+
+// Apply opt overrides.
+AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) {
+  if (!Opts.MaxTokens.has_value())
+    Opts.MaxTokens = ClMaxTokens;
+  Opts.FastABI |= ClFastABI;
+  Opts.Extended |= ClExtended;
+  return Opts;
+}
+
+class AllocToken {
+public:
+  explicit AllocToken(AllocTokenOptions Opts, Module &M,
+                      ModuleAnalysisManager &MAM)
+      : Options(transformOptionsFromCl(std::move(Opts))), Mod(M),
+        FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
+        Mode(IncrementMode(*Options.MaxTokens)) {
+    switch (ClMode.getValue()) {
+    case TokenMode::Increment:
+      break;
+    case TokenMode::Random:
+      Mode.emplace<RandomMode>(*Options.MaxTokens, M.createRNG(DEBUG_TYPE));
+      break;
+    case TokenMode::TypeHash:
+      Mode.emplace<TypeHashMode>(*Options.MaxTokens);
+      break;
+    }
+  }
+
+  bool instrumentFunction(Function &F);
+
+private:
+  /// Returns true for !isAllocationFn() functions that are also eligible for
+  /// instrumentation.
+  static bool isInstrumentableLibFunc(LibFunc Func, const Value *V,
+                                      const TargetLibraryInfo *TLI);
+
+  /// Returns true for isAllocationFn() functions that we should ignore.
+  static bool ignoreInstrumentableLibFunc(LibFunc Func);
+
+  /// Replace a call/invoke with a call/invoke to the allocation function
+  /// with token ID.
+  bool replaceAllocationCall(CallBase *CB, LibFunc Func,
+                             OptimizationRemarkEmitter &ORE,
+                             const TargetLibraryInfo &TLI);
+
+  /// Return replacement function for a LibFunc that takes a token ID.
+  FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
+                                       LibFunc OriginalFunc);
+
+  /// Return the token ID from metadata in the call.
+  uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+    return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
+  }
+
+  const AllocTokenOptions Options;
+  Module &Mod;
+  IntegerType *IntPtrTy = Mod.getDataLayout().getIntPtrType(Mod.getContext());
+  FunctionAnalysisManager &FAM;
+  // Cache for replacement functions.
+  DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions;
+  // Selected mode.
+  std::variant<IncrementMode, RandomMode, TypeHashMode> Mode;
+};
+
+bool AllocToken::instrumentFunction(Function &F) {
+  // Do not apply any instrumentation for naked functions.
+  if (F.hasFnAttribute(Attribute::Naked))
+    return false;
+  if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+    return false;
+  // Don't touch available_externally functions, their actual body is 
elsewhere.
+  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
+    return false;
+  // Only instrument functions that have the sanitize_alloc_token attribute.
+  if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
+    return false;
+
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+  SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+
+  // Collect all allocation calls to avoid iterator invalidation.
+  for (Instruction &I : instructions(F)) {
+    auto *CB = dyn_cast<CallBase>(&I);
+    if (!CB)
+      continue;
+    const Function *Callee = CB->getCalledFunction();
+    if (!Callee)
+      continue;
+    // Ignore nobuiltin of the CallBase, so that we can cover nobuiltin 
libcalls
+    // if requested via isInstrumentableLibFunc(). Note that isAllocationFn() 
is
+    // returning false for nobuiltin calls.
+    LibFunc Func;
+    if (TLI.getLibFunc(*Callee, Func)) {
+      if (ignoreInstrumentableLibFunc(Func))
+        continue;
+      if (isInstrumentableLibFunc(Func, CB, &TLI))
+        AllocCalls.emplace_back(CB, Func);
+    } else if (Options.Extended && getAllocTokenMetadata(*CB)) {
+      AllocCalls.emplace_back(CB, NotLibFunc);
+    }
----------------
zmodem wrote:

(Feel free to ignore or do in a followup)

I think it might be cleaner to break out this logic into a 
`shouldInstrumentCall` function, combining the `isInstrumentable..` and 
`ignoreInstrumentable..` logic (those two are always called together anyway), 
and the metadata check.

https://github.com/llvm/llvm-project/pull/156838
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to