================ @@ -0,0 +1,1572 @@ +//=== SemaFunctionEffects.cpp - Sema handling of function effects ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Sema handling of function effects. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Sema/SemaInternal.h" + +#define DEBUG_TYPE "effectanalysis" + +using namespace clang; + +namespace { + +enum class ViolationID : uint8_t { + None = 0, // Sentinel for an empty Violation. + // These first few map to a %select{} in a diagnostic. + BaseDiagnosticIndex, + AllocatesMemory = BaseDiagnosticIndex, + ThrowsOrCatchesExceptions, + HasStaticLocalVariable, + AccessesThreadLocalVariable, + AccessesObjCMethodOrProperty, + + // These only apply to callees, where the analysis stops at the Decl. + DeclDisallowsInference, + + // These both apply to indirect calls. The difference is that sometimes + // we have an actual Decl (generally a variable) which is the function + // pointer being called, and sometimes, typically due to a cast, we only + // have an expression. + CallsDeclWithoutEffect, + CallsExprWithoutEffect, +}; + +// Information about the AST context in which a violation was found, so +// that diagnostics can point to the correct source. +class ViolationSite { +public: + enum class Kind : uint8_t { + Default = 0, // Function body. + MemberInitializer = 1, + DefaultArgExpr = 2 + }; + +private: + llvm::PointerIntPair<CXXDefaultArgExpr *, 2, Kind> Impl; + +public: + ViolationSite() = default; + + explicit ViolationSite(CXXDefaultArgExpr *E) + : Impl(E, Kind::DefaultArgExpr) {} + + Kind kind() const { return static_cast<Kind>(Impl.getInt()); } + CXXDefaultArgExpr *defaultArgExpr() const { return Impl.getPointer(); } + + void setKind(Kind K) { Impl.setPointerAndInt(nullptr, K); } +}; + +// Represents a violation of the rules, potentially for the entire duration of +// the analysis phase, in order to refer to it when explaining why a caller has +// been made unsafe by a callee. Can be transformed into either a Diagnostic +// (warning or a note), depending on whether the violation pertains to a +// function failing to be verifed as holding an effect vs. a function failing to +// be inferred as holding that effect. +struct Violation { + FunctionEffect Effect; + FunctionEffect + CalleeEffectPreventingInference; // Only for certain IDs; can be None. + ViolationID ID = ViolationID::None; + ViolationSite Site; + SourceLocation Loc; + const Decl *Callee = nullptr; // Only valid for Calls*. + + Violation() = default; + + Violation(FunctionEffect Effect, ViolationID ID, ViolationSite VS, + SourceLocation Loc, const Decl *Callee = nullptr, + std::optional<FunctionEffect> CalleeEffect = std::nullopt) + : Effect(Effect), CalleeEffectPreventingInference( + CalleeEffect.value_or(FunctionEffect())), + ID(ID), Site(VS), Loc(Loc), Callee(Callee) {} + + unsigned diagnosticSelectIndex() const { + return unsigned(ID) - unsigned(ViolationID::BaseDiagnosticIndex); + } +}; + +enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete }; +enum class CallableType : uint8_t { + // Unknown: probably function pointer. + Unknown, + Function, + Virtual, + Block +}; + +// Return whether a function's effects CAN be verified. +// The question of whether it SHOULD be verified is independent. +static bool functionIsVerifiable(const FunctionDecl *FD) { + if (FD->isTrivial()) { + // Otherwise `struct x { int a; };` would have an unverifiable default + // constructor. + return true; + } + return FD->hasBody(); +} + +static bool isNoexcept(const FunctionDecl *FD) { + const auto *FPT = FD->getType()->castAs<FunctionProtoType>(); + if (FPT->isNothrow() || FD->hasAttr<NoThrowAttr>()) + return true; + return false; +} + +// This list is probably incomplete. +// FIXME: Investigate: +// __builtin_eh_return? +// __builtin_allow_runtime_check? +// __builtin_unwind_init and other similar things that sound exception-related. +// va_copy? +// coroutines? +static FunctionEffectKindSet getBuiltinFunctionEffects(unsigned BuiltinID) { + FunctionEffectKindSet Result; + + switch (BuiltinID) { + case 0: // Not builtin. + default: // By default, builtins have no known effects. + break; + + // These allocate/deallocate heap memory. + case Builtin::ID::BI__builtin_calloc: + case Builtin::ID::BI__builtin_malloc: + case Builtin::ID::BI__builtin_realloc: + case Builtin::ID::BI__builtin_free: + case Builtin::ID::BI__builtin_operator_delete: + case Builtin::ID::BI__builtin_operator_new: + case Builtin::ID::BIaligned_alloc: + case Builtin::ID::BIcalloc: + case Builtin::ID::BImalloc: + case Builtin::ID::BImemalign: + case Builtin::ID::BIrealloc: + case Builtin::ID::BIfree: + + case Builtin::ID::BIfopen: + case Builtin::ID::BIpthread_create: + case Builtin::ID::BI_Block_object_dispose: + Result.insert(FunctionEffect(FunctionEffect::Kind::Allocating)); + break; + + // These block in some other way than allocating memory. + case Builtin::ID::BIlongjmp: + case Builtin::ID::BI_longjmp: + case Builtin::ID::BIsiglongjmp: + case Builtin::ID::BI__builtin_longjmp: + case Builtin::ID::BIobjc_exception_throw: + + // Objective-C runtime. + case Builtin::ID::BIobjc_msgSend: + case Builtin::ID::BIobjc_msgSend_fpret: + case Builtin::ID::BIobjc_msgSend_fp2ret: + case Builtin::ID::BIobjc_msgSend_stret: + case Builtin::ID::BIobjc_msgSendSuper: + case Builtin::ID::BIobjc_getClass: + case Builtin::ID::BIobjc_getMetaClass: + case Builtin::ID::BIobjc_enumerationMutation: + case Builtin::ID::BIobjc_assign_ivar: + case Builtin::ID::BIobjc_assign_global: + case Builtin::ID::BIobjc_sync_enter: + case Builtin::ID::BIobjc_sync_exit: + case Builtin::ID::BINSLog: + case Builtin::ID::BINSLogv: + + // stdio.h + case Builtin::ID::BIfread: + case Builtin::ID::BIfwrite: + + // stdio.h: printf family. + case Builtin::ID::BIprintf: + case Builtin::ID::BI__builtin_printf: + case Builtin::ID::BIfprintf: + case Builtin::ID::BIsnprintf: + case Builtin::ID::BIsprintf: + case Builtin::ID::BIvprintf: + case Builtin::ID::BIvfprintf: + case Builtin::ID::BIvsnprintf: + case Builtin::ID::BIvsprintf: + + // stdio.h: scanf family. + case Builtin::ID::BIscanf: + case Builtin::ID::BIfscanf: + case Builtin::ID::BIsscanf: + case Builtin::ID::BIvscanf: + case Builtin::ID::BIvfscanf: + case Builtin::ID::BIvsscanf: + + Result.insert(FunctionEffect(FunctionEffect::Kind::Blocking)); + break; + } + + return Result; +} + +// Transitory, more extended information about a callable, which can be a +// function, block, or function pointer. +struct CallableInfo { + // CDecl holds the function's definition, if any. + // FunctionDecl if CallableType::Function or Virtual + // BlockDecl if CallableType::Block + const Decl *CDecl; + + // Remember whether the callable is a function, block, virtual method, + // or (presumed) function pointer. + CallableType CType = CallableType::Unknown; + + // Remember whether the callable is an operator new or delete function, + // so that calls to them are reported more meaningfully, as memory + // allocations. + SpecialFuncType FuncType = SpecialFuncType::None; + + // We inevitably want to know the callable's declared effects, so cache them. + FunctionEffectKindSet Effects; + + CallableInfo(const Decl &CD, SpecialFuncType FT = SpecialFuncType::None) + : CDecl(&CD), FuncType(FT) { + FunctionEffectsRef DeclEffects; + if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) { + // Use the function's definition, if any. + if (const FunctionDecl *Def = FD->getDefinition()) + CDecl = FD = Def; + CType = CallableType::Function; + if (auto *Method = dyn_cast<CXXMethodDecl>(FD); + Method && Method->isVirtual()) + CType = CallableType::Virtual; + DeclEffects = FD->getFunctionEffects(); + } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) { + CType = CallableType::Block; + DeclEffects = BD->getFunctionEffects(); + } else if (auto *VD = dyn_cast<ValueDecl>(CDecl)) { + // ValueDecl is function, enum, or variable, so just look at its type. + DeclEffects = FunctionEffectsRef::get(VD->getType()); + } + Effects = FunctionEffectKindSet(DeclEffects); + } + + CallableType type() const { return CType; } + + bool isCalledDirectly() const { + return CType == CallableType::Function || CType == CallableType::Block; + } + + bool isVerifiable() const { + switch (CType) { + case CallableType::Unknown: + case CallableType::Virtual: + return false; + case CallableType::Block: + return true; + case CallableType::Function: + return functionIsVerifiable(dyn_cast<FunctionDecl>(CDecl)); + } + llvm_unreachable("undefined CallableType"); + } + + /// Generate a name for logging and diagnostics. + std::string name(Sema &S) const { + std::string Name; + llvm::raw_string_ostream OS(Name); + + if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) + FD->getNameForDiagnostic(OS, S.getPrintingPolicy(), + /*Qualified=*/true); + else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) + OS << "(block " << BD->getBlockManglingNumber() << ")"; + else if (auto *VD = dyn_cast<NamedDecl>(CDecl)) + VD->printQualifiedName(OS); + return Name; + } +}; + +// ---------- +// Map effects to single Violations, to hold the first (of potentially many) +// violations pertaining to an effect, per function. +class EffectToViolationMap { + // Since we currently only have a tiny number of effects (typically no more + // than 1), use a SmallVector with an inline capacity of 1. Since it + // is often empty, use a unique_ptr to the SmallVector. + // Note that Violation itself contains a FunctionEffect which is the key. + using ImplVec = llvm::SmallVector<Violation, 1>; + std::unique_ptr<ImplVec> Impl; + +public: + // Insert a new Violation if we do not already have one for its effect. + void maybeInsert(const Violation &Viol) { + if (Impl == nullptr) + Impl = std::make_unique<ImplVec>(); + else if (lookup(Viol.Effect) != nullptr) + return; + + Impl->push_back(Viol); + } + + const Violation *lookup(FunctionEffect Key) { + if (Impl == nullptr) + return nullptr; + + auto *Iter = + std::find_if(Impl->begin(), Impl->end(), + [&](const auto &Item) { return Item.Effect == Key; }); + return Iter != Impl->end() ? &*Iter : nullptr; + } + + size_t size() const { return Impl ? Impl->size() : 0; } +}; + +// ---------- +// State pertaining to a function whose AST is walked and whose effect analysis +// is dependent on a subsequent analysis of other functions. +class PendingFunctionAnalysis { + friend class CompleteFunctionAnalysis; + +public: + struct DirectCall { + const Decl *Callee; + SourceLocation CallLoc; + // Not all recursive calls are detected, just enough + // to break cycles. + bool Recursed = false; + ViolationSite VSite; + + DirectCall(const Decl *D, SourceLocation CallLoc, ViolationSite VSite) + : Callee(D), CallLoc(CallLoc), VSite(VSite) {} + }; + + // We always have two disjoint sets of effects to verify: + // 1. Effects declared explicitly by this function. + // 2. All other inferrable effects needing verification. + FunctionEffectKindSet DeclaredVerifiableEffects; + FunctionEffectKindSet EffectsToInfer; + +private: + // Violations pertaining to the function's explicit effects. + SmallVector<Violation, 0> ViolationsForExplicitEffects; + + // Violations pertaining to other, non-explicit, inferrable effects. + EffectToViolationMap InferrableEffectToFirstViolation; + + // These unverified direct calls are what keeps the analysis "pending", + // until the callees can be verified. + SmallVector<DirectCall, 0> UnverifiedDirectCalls; + +public: + PendingFunctionAnalysis(Sema &S, const CallableInfo &CInfo, + FunctionEffectKindSet AllInferrableEffectsToVerify) + : DeclaredVerifiableEffects(CInfo.Effects) { + // Check for effects we are not allowed to infer. + FunctionEffectKindSet InferrableEffects; + + for (FunctionEffect effect : AllInferrableEffectsToVerify) { + std::optional<FunctionEffect> ProblemCalleeEffect = + effect.effectProhibitingInference(*CInfo.CDecl, CInfo.Effects); + if (!ProblemCalleeEffect) + InferrableEffects.insert(effect); + else { + // Add a Violation for this effect if a caller were to + // try to infer it. + InferrableEffectToFirstViolation.maybeInsert(Violation( + effect, ViolationID::DeclDisallowsInference, ViolationSite{}, + CInfo.CDecl->getLocation(), nullptr, ProblemCalleeEffect)); + } + } + // InferrableEffects is now the set of inferrable effects which are not + // prohibited. + EffectsToInfer = FunctionEffectKindSet::difference( + InferrableEffects, DeclaredVerifiableEffects); + } + + // Hide the way that Violations for explicitly required effects vs. inferred + // ones are handled differently. + void checkAddViolation(bool Inferring, const Violation &NewViol) { + if (!Inferring) + ViolationsForExplicitEffects.push_back(NewViol); + else + InferrableEffectToFirstViolation.maybeInsert(NewViol); + } + + void addUnverifiedDirectCall(const Decl *D, SourceLocation CallLoc, + ViolationSite VSite) { + UnverifiedDirectCalls.emplace_back(D, CallLoc, VSite); + } + + // Analysis is complete when there are no unverified direct calls. + bool isComplete() const { return UnverifiedDirectCalls.empty(); } + + const Violation *violationForInferrableEffect(FunctionEffect effect) { + return InferrableEffectToFirstViolation.lookup(effect); + } + + // Mutable because caller may need to set a DirectCall's Recursing flag. + MutableArrayRef<DirectCall> unverifiedCalls() { + assert(!isComplete()); + return UnverifiedDirectCalls; + } + + ArrayRef<Violation> getSortedViolationsForExplicitEffects(SourceManager &SM) { + if (!ViolationsForExplicitEffects.empty()) + std::sort(ViolationsForExplicitEffects.begin(), + ViolationsForExplicitEffects.end(), + [&SM](const Violation &LHS, const Violation &RHS) { + return SM.isBeforeInTranslationUnit(LHS.Loc, RHS.Loc); + }); + return ViolationsForExplicitEffects; + } + + void dump(Sema &SemaRef, llvm::raw_ostream &OS) const { + OS << "Pending: Declared "; + DeclaredVerifiableEffects.dump(OS); + OS << ", " << ViolationsForExplicitEffects.size() << " violations; "; + OS << " Infer "; + EffectsToInfer.dump(OS); + OS << ", " << InferrableEffectToFirstViolation.size() << " violations"; + if (!UnverifiedDirectCalls.empty()) { + OS << "; Calls: "; + for (const DirectCall &Call : UnverifiedDirectCalls) { + CallableInfo CI(*Call.Callee); + OS << " " << CI.name(SemaRef); + } + } + OS << "\n"; + } +}; + +// ---------- +class CompleteFunctionAnalysis { + // Current size: 2 pointers +public: + // Has effects which are both the declared ones -- not to be inferred -- plus + // ones which have been successfully inferred. These are all considered + // "verified" for the purposes of callers; any issue with verifying declared + // effects has already been reported and is not the problem of any caller. + FunctionEffectKindSet VerifiedEffects; + +private: + // This is used to generate notes about failed inference. + EffectToViolationMap InferrableEffectToFirstViolation; + +public: + // The incoming Pending analysis is consumed (member(s) are moved-from). + CompleteFunctionAnalysis(ASTContext &Ctx, PendingFunctionAnalysis &&Pending, + FunctionEffectKindSet DeclaredEffects, + FunctionEffectKindSet AllInferrableEffectsToVerify) + : VerifiedEffects(DeclaredEffects) { + for (FunctionEffect effect : AllInferrableEffectsToVerify) + if (Pending.violationForInferrableEffect(effect) == nullptr) + VerifiedEffects.insert(effect); + + InferrableEffectToFirstViolation = + std::move(Pending.InferrableEffectToFirstViolation); + } + + const Violation *firstViolationForEffect(FunctionEffect Effect) { + return InferrableEffectToFirstViolation.lookup(Effect); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "Complete: Verified "; + VerifiedEffects.dump(OS); + OS << "; Infer "; + OS << InferrableEffectToFirstViolation.size() << " violations\n"; + } +}; + +// ========== +class Analyzer { + Sema &S; + + // Subset of Sema.AllEffectsToVerify + FunctionEffectKindSet AllInferrableEffectsToVerify; + + using FuncAnalysisPtr = + llvm::PointerUnion<PendingFunctionAnalysis *, CompleteFunctionAnalysis *>; + + // Map all Decls analyzed to FuncAnalysisPtr. Pending state is larger + // than complete state, so use different objects to represent them. + // The state pointers are owned by the container. + class AnalysisMap : llvm::DenseMap<const Decl *, FuncAnalysisPtr> { + using Base = llvm::DenseMap<const Decl *, FuncAnalysisPtr>; + + public: + ~AnalysisMap(); + + // Use non-public inheritance in order to maintain the invariant + // that lookups and insertions are via the canonical Decls. + + FuncAnalysisPtr lookup(const Decl *Key) const { + return Base::lookup(Key->getCanonicalDecl()); + } + + FuncAnalysisPtr &operator[](const Decl *Key) { + return Base::operator[](Key->getCanonicalDecl()); + } + + /// Shortcut for the case where we only care about completed analysis. + CompleteFunctionAnalysis *completedAnalysisForDecl(const Decl *D) const { + if (FuncAnalysisPtr AP = lookup(D); + isa_and_nonnull<CompleteFunctionAnalysis *>(AP)) + return AP.get<CompleteFunctionAnalysis *>(); + return nullptr; + } + + void dump(Sema &SemaRef, llvm::raw_ostream &OS) { + OS << "\nAnalysisMap:\n"; + for (const auto &item : *this) { + CallableInfo CI(*item.first); + const auto AP = item.second; + OS << item.first << " " << CI.name(SemaRef) << " : "; + if (AP.isNull()) + OS << "null\n"; + else if (isa<CompleteFunctionAnalysis *>(AP)) { ---------------- Sirraide wrote:
This one is mostly about not mixing the two haha. We generally prefer no braces if it’s a single-statement, but mixing the two in a single `else`-`if` chain is a bit weird imo: either every `if`-statment should have braces or none should. https://github.com/llvm/llvm-project/pull/99656 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits