================ @@ -2380,6 +2382,1239 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { }; } // namespace +// ============================================================================= + +// Temporary debugging option +#define FX_ANALYZER_VERIFY_DECL_LIST 1 + +namespace FXAnalysis { + +enum class DiagnosticID : uint8_t { + None = 0, // sentinel for an empty Diagnostic + Throws, + Catches, + CallsObjC, + AllocatesMemory, + HasStaticLocal, + AccessesThreadLocal, + + // These only apply to callees, where the analysis stops at the Decl + DeclWithoutConstraintOrInference, + + CallsUnsafeDecl, + CallsDisallowedExpr, +}; + +struct Diagnostic { + const FunctionEffect *Effect = nullptr; + const Decl *Callee = nullptr; // only valid for Calls* + SourceLocation Loc; + DiagnosticID ID = DiagnosticID::None; + + Diagnostic() = default; + + Diagnostic(const FunctionEffect *Effect, DiagnosticID ID, SourceLocation Loc, + const Decl *Callee = nullptr) + : Effect(Effect), Callee(Callee), Loc(Loc), ID(ID) {} +}; + +enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete }; +enum class CallType { + Unknown, + Function, + Virtual, + Block + // unknown: probably function pointer +}; + +// Return whether the function CAN be verified. +// The question of whether it SHOULD be verified is independent. +static bool functionIsVerifiable(const FunctionDecl *FD) { + if (!(FD->hasBody() || FD->isInlined())) { + // externally defined; we couldn't verify if we wanted to. + return false; + } + if (FD->isTrivial()) { + // Otherwise `struct x { int a; };` would have an unverifiable default + // constructor. + return true; + } + return true; +} + +// Transitory, more extended information about a callable, which can be a +// function, block, function pointer... +struct CallableInfo { + const Decl *CDecl; + mutable std::optional<std::string> + MaybeName; // mutable because built on demand in const method + SpecialFuncType FuncType = SpecialFuncType::None; + FunctionEffectSet Effects; + CallType CType = CallType::Unknown; + + CallableInfo(const Decl &CD, SpecialFuncType FT = SpecialFuncType::None) + : CDecl(&CD), FuncType(FT) { + // llvm::errs() << "CallableInfo " << name() << "\n"; + + if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) { + assert(FD->getCanonicalDecl() == FD); + // Use the function's definition, if any. + if (auto *Def = FD->getDefinition()) { + CDecl = FD = Def; + } + CType = CallType::Function; + if (auto *Method = dyn_cast<CXXMethodDecl>(FD)) { + if (Method->isVirtual()) { + CType = CallType::Virtual; + } + } + Effects = FD->getFunctionEffects(); + } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) { + CType = CallType::Block; + Effects = BD->getFunctionEffects(); + } else if (auto *VD = dyn_cast<ValueDecl>(CDecl)) { + // ValueDecl is function, enum, or variable, so just look at the type. + Effects = FunctionEffectSet::get(*VD->getType()); + } + } + + bool isDirectCall() const { + return CType == CallType::Function || CType == CallType::Block; + } + + bool isVerifiable() const { + switch (CType) { + case CallType::Unknown: + case CallType::Virtual: + break; + case CallType::Block: + return true; + case CallType::Function: + return functionIsVerifiable(dyn_cast<FunctionDecl>(CDecl)); + } + return false; + } + + /// Generate a name for logging and diagnostics. + std::string name(Sema &Sem) const { + if (!MaybeName) { + std::string Name; + llvm::raw_string_ostream OS(Name); + + if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) { + FD->getNameForDiagnostic(OS, Sem.getPrintingPolicy(), + /*Qualified=*/true); + } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) { + OS << "(block " << BD->getBlockManglingNumber() << ")"; + } else if (auto *VD = dyn_cast<NamedDecl>(CDecl)) { + VD->printQualifiedName(OS); + } + MaybeName = Name; + } + return *MaybeName; + } +}; + +// ---------- +// Map effects to single diagnostics. +class EffectToDiagnosticMap { + // Since we currently only have a tiny number of effects (typically no more + // than 1), use a sorted SmallVector. + using Element = std::pair<const FunctionEffect *, Diagnostic>; + using ImplVec = llvm::SmallVector<Element>; + std::unique_ptr<ImplVec> Impl; + +public: + Diagnostic &getOrInsertDefault(const FunctionEffect *Key) { + if (Impl == nullptr) { + Impl = std::make_unique<llvm::SmallVector<Element>>(); + auto &Item = Impl->emplace_back(); + Item.first = Key; + return Item.second; + } + Element Elem(Key, {}); + auto Iter = _find(Elem); + if (Iter != Impl->end() && Iter->first == Key) { + return Iter->second; + } + Iter = Impl->insert(Iter, Elem); + return Iter->second; + } + + const Diagnostic *lookup(const FunctionEffect *key) { + if (Impl == nullptr) { + return nullptr; + } + Element elem(key, {}); + auto iter = _find(elem); + if (iter != Impl->end() && iter->first == key) { + return &iter->second; + } + return nullptr; + } + + size_t size() const { return Impl ? Impl->size() : 0; } + +private: + ImplVec::iterator _find(const Element &elem) { + return std::lower_bound(Impl->begin(), Impl->end(), elem, + [](const Element &lhs, const Element &rhs) { + return lhs.first < rhs.first; + }); + } +}; + +// ---------- +// State pertaining to a function whose AST is walked. Since there are +// potentially a large number of these objects, it needs care about size. +class PendingFunctionAnalysis { + // Current size: 5 pointers + friend class CompleteFunctionAnalysis; + + struct DirectCall { + const Decl *Callee; + SourceLocation CallLoc; + }; + +public: + // We always have two disjoint sets of effects to verify: + // 1. Effects declared explicitly by this function. + // 2. All other inferrable effects needing verification. + FunctionEffectSet DeclaredVerifiableEffects; + FunctionEffectSet FXToInfer; + +private: + // Diagnostics pertaining to the function's explicit effects. Use a unique_ptr + // to optimize size for the case of 0 diagnostics. + std::unique_ptr<SmallVector<Diagnostic>> DiagnosticsForExplicitFX; + + // Potential diagnostics pertaining to other, non-explicit, inferrable + // effects. + EffectToDiagnosticMap InferrableEffectToFirstDiagnostic; + + std::unique_ptr<SmallVector<DirectCall>> UnverifiedDirectCalls; + +public: + PendingFunctionAnalysis(const CallableInfo &cinfo, + FunctionEffectSet AllInferrableEffectsToVerify) { + MutableFunctionEffectSet fx; + for (const auto *effect : cinfo.Effects) { + if (effect->flags() & FunctionEffect::kRequiresVerification) { + fx.insert(effect); + } + } + DeclaredVerifiableEffects = FunctionEffectSet::create(fx); + + // Check for effects we are not allowed to infer + fx.clear(); + for (const auto *effect : AllInferrableEffectsToVerify) { + if (effect->canInferOnDecl(cinfo.CDecl, cinfo.Effects)) { + fx.insert(effect); + } else { + // Add a diagnostic for this effect if a caller were to + // try to infer it. + auto &diag = + InferrableEffectToFirstDiagnostic.getOrInsertDefault(effect); + diag = + Diagnostic(effect, DiagnosticID::DeclWithoutConstraintOrInference, + cinfo.CDecl->getLocation()); + } + } + // fx is now the set of inferrable effects which are not prohibited + FXToInfer = FunctionEffectSet::create(FunctionEffectSet::create(fx) - + DeclaredVerifiableEffects); + } + + // Hide the way that diagnostics for explicitly required effects vs. inferred + // ones are handled differently. + void checkAddDiagnostic(bool Inferring, const Diagnostic &NewDiag) { + if (!Inferring) { + if (DiagnosticsForExplicitFX == nullptr) { + DiagnosticsForExplicitFX = std::make_unique<SmallVector<Diagnostic>>(); + } + DiagnosticsForExplicitFX->push_back(NewDiag); + } else { + auto &Diag = + InferrableEffectToFirstDiagnostic.getOrInsertDefault(NewDiag.Effect); + if (Diag.ID == DiagnosticID::None) { + Diag = NewDiag; + } + } + } + + void addUnverifiedDirectCall(const Decl *D, SourceLocation CallLoc) { + if (UnverifiedDirectCalls == nullptr) { + UnverifiedDirectCalls = std::make_unique<SmallVector<DirectCall>>(); + } + UnverifiedDirectCalls->emplace_back(DirectCall{D, CallLoc}); + } + + // Analysis is complete when there are no unverified direct calls. + bool isComplete() const { + return UnverifiedDirectCalls == nullptr || UnverifiedDirectCalls->empty(); + } + + const Diagnostic * + diagnosticForInferrableEffect(const FunctionEffect *effect) { + return InferrableEffectToFirstDiagnostic.lookup(effect); + } + + const SmallVector<DirectCall> &unverifiedCalls() const { + assert(!isComplete()); + return *UnverifiedDirectCalls; + } + + SmallVector<Diagnostic> *getDiagnosticsForExplicitFX() const { + return DiagnosticsForExplicitFX.get(); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "Pending: Declared "; + DeclaredVerifiableEffects.dump(OS); + OS << ", " + << (DiagnosticsForExplicitFX ? DiagnosticsForExplicitFX->size() : 0) + << " diags; "; + OS << " Infer "; + FXToInfer.dump(OS); + OS << ", " << InferrableEffectToFirstDiagnostic.size() << " diags\n"; + } +}; + +// ---------- +class CompleteFunctionAnalysis { + // Current size: 2 pointers +public: + // Has effects which are both the declared ones -- not to be inferred -- plus + // ones which have been successfully inferred. These are all considered + // "verified" for the purposes of callers; any issue with verifying declared + // effects has already been reported and is not the problem of any caller. + FunctionEffectSet VerifiedEffects; + +private: + // This is used to generate notes about failed inference. + EffectToDiagnosticMap InferrableEffectToFirstDiagnostic; + +public: + CompleteFunctionAnalysis(PendingFunctionAnalysis &pending, + FunctionEffectSet funcFX, + FunctionEffectSet AllInferrableEffectsToVerify) { + MutableFunctionEffectSet verified; + verified |= funcFX; + for (const auto *effect : AllInferrableEffectsToVerify) { + if (pending.diagnosticForInferrableEffect(effect) == nullptr) { + verified.insert(effect); + } + } + VerifiedEffects = FunctionEffectSet::create(verified); + + InferrableEffectToFirstDiagnostic = + std::move(pending.InferrableEffectToFirstDiagnostic); + } + + const Diagnostic *firstDiagnosticForEffect(const FunctionEffect *effect) { + // TODO: is this correct? + return InferrableEffectToFirstDiagnostic.lookup(effect); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "Complete: Verified "; + VerifiedEffects.dump(OS); + OS << "; Infer "; + OS << InferrableEffectToFirstDiagnostic.size() << " diags\n"; + } +}; + +/* + TODO: nolock and noalloc imply noexcept + if (auto* Method = dyn_cast<CXXMethodDecl>(CInfo.CDecl)) { + if (Method->getType()->castAs<FunctionProtoType>()->canThrow() + != clang::CT_Cannot) { + S.Diag(Callable->getBeginLoc(), + diag::warn_perf_annotation_implies_noexcept) + << getPerfAnnotationSpelling(CInfo.PerfAnnot); + } + } +*/ + +// ========== +class Analyzer { + constexpr static int kDebugLogLevel = 3; + + // -- + Sema &Sem; + + // used from Sema: + // SmallVector<const Decl *> DeclsWithUnverifiedEffects + + // Subset of Sema.AllEffectsToVerify + FunctionEffectSet AllInferrableEffectsToVerify; + + using FuncAnalysisPtr = + llvm::PointerUnion<PendingFunctionAnalysis *, CompleteFunctionAnalysis *>; + + // Map all Decls analyzed to FuncAnalysisPtr. Pending state is larger + // than complete state, so use different objects to represent them. + // The state pointers are owned by the container. + struct AnalysisMap : public llvm::DenseMap<const Decl *, FuncAnalysisPtr> { + + ~AnalysisMap(); + + // use lookup() + + /// Shortcut for the case where we only care about completed analysis. + CompleteFunctionAnalysis *completedAnalysisForDecl(const Decl *D) const { + if (auto AP = lookup(D)) { + if (isa<CompleteFunctionAnalysis *>(AP)) { + return AP.get<CompleteFunctionAnalysis *>(); + } + } + return nullptr; + } + + void dump(Sema &S, llvm::raw_ostream &OS) { + OS << "AnalysisMap:\n"; + for (const auto &item : *this) { + CallableInfo CI(*item.first); + const auto AP = item.second; + OS << item.first << " " << CI.name(S) << " : "; + if (AP.isNull()) { + OS << "null\n"; + } else if (isa<CompleteFunctionAnalysis *>(AP)) { + auto *CFA = AP.get<CompleteFunctionAnalysis *>(); + OS << CFA << " "; + CFA->dump(OS); + } else if (isa<PendingFunctionAnalysis *>(AP)) { + auto *PFA = AP.get<PendingFunctionAnalysis *>(); + OS << PFA << " "; + PFA->dump(OS); + } else + llvm_unreachable("never"); + } + } + }; + AnalysisMap DeclAnalysis; + +public: + Analyzer(Sema &S) : Sem(S) {} + + void run(const TranslationUnitDecl &TU) { +#if FX_ANALYZER_VERIFY_DECL_LIST + verifyRootDecls(TU); +#endif + // Gather all of the effects to be verified to see what operations need to + // be checked, and to see which ones are inferrable. + { + MutableFunctionEffectSet inferrableEffects; + for (const FunctionEffect *effect : Sem.AllEffectsToVerify) { + const auto Flags = effect->flags(); + if (Flags & FunctionEffect::kInferrableOnCallees) { + inferrableEffects.insert(effect); + } + } + AllInferrableEffectsToVerify = + FunctionEffectSet::create(inferrableEffects); + llvm::outs() << "AllInferrableEffectsToVerify: "; + AllInferrableEffectsToVerify.dump(llvm::outs()); + llvm::outs() << "\n"; + } + + SmallVector<const Decl *> &verifyQueue = Sem.DeclsWithUnverifiedEffects; + + // It's useful to use DeclsWithUnverifiedEffects as a stack for a + // depth-first traversal rather than have a secondary container. But first, + // reverse it, so Decls are verified in the order they are declared. + std::reverse(verifyQueue.begin(), verifyQueue.end()); + + while (!verifyQueue.empty()) { + const Decl *D = verifyQueue.back(); + if (auto AP = DeclAnalysis.lookup(D)) { + if (isa<CompleteFunctionAnalysis *>(AP)) { + // already done + verifyQueue.pop_back(); + continue; + } + if (isa<PendingFunctionAnalysis *>(AP)) { + // All children have been traversed; finish analysis. + auto *pending = AP.get<PendingFunctionAnalysis *>(); + finishPendingAnalysis(D, pending); + verifyQueue.pop_back(); + continue; + } + llvm_unreachable("shouldn't happen"); + } + + auto *Pending = verifyDecl(D); + if (Pending == nullptr) { + // completed now + verifyQueue.pop_back(); + continue; + } + + for (const auto &Call : Pending->unverifiedCalls()) { + // This lookup could be optimized out if the results could have been + // saved from followCall when we traversed the caller's AST. It would + // however make the check for recursion more complex. + auto AP = DeclAnalysis.lookup(Call.Callee); + if (AP.isNull()) { + verifyQueue.push_back(Call.Callee); + continue; + } + if (isa<PendingFunctionAnalysis *>(AP)) { + // $$$$$$$$$$$$$$$$$$$$$$$ recursion $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ + __builtin_trap(); ---------------- dougsonos wrote:
Yes, I wanted to think through what should happen in this case. Probably the call can just be ignored and thus treated as implicitly safe. It occurred to me that some effect might want to prohibit recursion but that could be dealt with later. https://github.com/llvm/llvm-project/pull/84983 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits