sammccall created this revision. Herald added a subscriber: wenlei. Herald added a reviewer: NoQ. Herald added a project: All. sammccall requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
This is just a draft and a starting point, and needs more design & tests. The goal is to be able to understand how the analysis executes, and what its incremental and final findings are, by enabling logging and reading the logs. This should include what the framework does, what the Ad-hoc printf-debugging doesn't seem sufficient for my understanding, at least. Being able to check in logging, turn it on in a production binary, and quickly find particular analysis steps within complex functions seem important. Important missing pieces here: - a logger implementation that produces an interactive report (HTML file) which can be navigated via timeline/code/CFG. (I think the Logger interface is sufficient for this, but need to prototype). - display of the application-specific lattice - more useful display for the built-in environment (e.g. meaningful & consistent names for values, hiding redundant variables in the flow condition, hiding unreachable expressions) This patch also contains a trivial standalone analyzer binary using libTooling. This is not strictly related, but I think useful for exploratory reasons. At least it belongs in a separate patch. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D144730 Files: clang/examples/CMakeLists.txt clang/examples/FlowSensitiveAnalysis/CMakeLists.txt clang/examples/FlowSensitiveAnalysis/SampleAnalysis.cpp clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h clang/include/clang/Analysis/FlowSensitive/Logger.h clang/lib/Analysis/FlowSensitive/CMakeLists.txt clang/lib/Analysis/FlowSensitive/Logger.cpp clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
Index: clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp =================================================================== --- clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -189,7 +189,10 @@ llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates) : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), - BlockStates(BlockStates) {} + Log(InitEnv.logger()), BlockStates(BlockStates) { + Log.beginAnalysis(CFCtx, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const ControlFlowContext &CFCtx; @@ -197,6 +200,7 @@ TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; + Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates; @@ -366,8 +370,11 @@ std::function<void(const CFGElement &, const TypeErasedDataflowAnalysisState &)> PostVisitCFG = nullptr) { + AC.Log.enterBlock(Block); auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); for (const auto &Element : Block) { + AC.Log.enterElement(Element); // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Element, State, AC); @@ -380,6 +387,7 @@ if (PostVisitCFG) { PostVisitCFG(Element, State); } + AC.Log.recordState(State); } return State; } @@ -460,15 +468,18 @@ LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && - Effect2 == LatticeJoinEffect::Unchanged) + Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; + } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; } } Index: clang/lib/Analysis/FlowSensitive/Logger.cpp =================================================================== --- /dev/null +++ clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,104 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + llvm::DenseMap<const CFGBlock *, unsigned> VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) : OS(OS) {} + + virtual void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + if (auto *D = CFG.getDecl()) + D->dump(OS); + CurrentCFG = &CFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), + /*ShowColors=*/true); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID() << " (iteration " + << Count << ") ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + /*ShowColors=*/true); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr<Logger> Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique<TextualLogger>(OS); +} + +} // namespace clang::dataflow Index: clang/lib/Analysis/FlowSensitive/CMakeLists.txt =================================================================== --- clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ ControlFlowContext.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp + Logger.cpp Transfer.cpp TypeErasedDataflowAnalysis.cpp Value.cpp Index: clang/include/clang/Analysis/FlowSensitive/Logger.h =================================================================== --- /dev/null +++ clang/include/clang/Analysis/FlowSensitive/Logger.h @@ -0,0 +1,84 @@ +//===-- Logger.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H + +#include "clang/Analysis/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> + +namespace clang::dataflow { + +class ControlFlowContext; +class TypeErasedDataflowAnalysis; +struct TypeErasedDataflowAnalysisState; + +// A logger is notified as the analysis progresses. +// It can produce a report of the analysis's findings and how it came to them. +// +// The framework reports key structural events (e.g. traversal of blocks). +// The specific analysis can add extra details to be presented in context. +class Logger { +public: + // Returns a dummy logger that does nothing. + static Logger &null(); + // A logger that simply writes messages to the specified ostream in real time. + static std::unique_ptr<Logger> textual(llvm::raw_ostream &); + + virtual ~Logger() = default; + + // Called by the framework as we start analyzing a new function or statement. + // Forms a pair with endAnalysis(). + virtual void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) {} + virtual void endAnalysis() {} + + // At any time during the analysis, we're computing the state for some target + // program point. + + // Called when we start (re-)processing a block in the CFG. + // The target program point is the entry to the specified block. + // Calls to log() describe transferBranch(), join() etc. + virtual void enterBlock(const CFGBlock &) {} + // Called when we start processing an element in the current CFG block. + // The target program point is after the specified element. + // Calls to log() describe the transfer() function. + virtual void enterElement(const CFGElement &) {} + + // Records the analysis state computed for the current program point. + virtual void recordState(TypeErasedDataflowAnalysisState &) {} + // Records that the analysis state for the current block is now final. + virtual void blockConverged() {} + + // Called by the framework or user code to report some event. + // The event is associated with the current context (program point). + // The Emit function produces the log message. It may or may not be called, + // depending on if the logger is interested; it should have no side effects. + void log(llvm::function_ref<void(llvm::raw_ostream &)> Emit) { + if (!ShouldLogText) + return; + std::string S; + llvm::raw_string_ostream OS(S); + Emit(OS); + logText(S); + } + +protected: + // ShouldLogText should be false for trivial loggers that ignore logText(). + // This allows log() to skip evaluating its Emit function. + Logger(bool ShouldLogText = true) : ShouldLogText(ShouldLogText) {} + +private: + bool ShouldLogText; + virtual void logText(llvm::StringRef) {} +}; + +} // namespace clang::dataflow + +#endif Index: clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h =================================================================== --- clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -22,6 +22,7 @@ #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" @@ -177,10 +178,14 @@ /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); - const DataflowAnalysisContext::Options &getAnalysisOptions() { + const DataflowAnalysisContext::Options &getAnalysisOptions() const { return DACtx->getOptions(); } + Logger &logger() const { + return DACtx->getOptions().Log ? *DACtx->getOptions().Log : Logger::null(); + } + /// Creates and returns an environment to use for an inline analysis of the /// callee. Uses the storage location from each argument in the `Call` as the /// storage location for the corresponding parameter in the callee. Index: clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h =================================================================== --- clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -34,6 +34,7 @@ namespace clang { namespace dataflow { +class Logger; /// Skip past nodes that the CFG does not emit. These nodes are invisible to /// flow-sensitive analysis, and should be ignored as they will effectively not @@ -67,6 +68,9 @@ /// fundamentally limited: some constructs, such as recursion, are /// explicitly unsupported. std::optional<ContextSensitiveOptions> ContextSensitiveOpts; + + /// If provided, analysis details will be recorded here. + Logger *Log; }; /// Constructs a dataflow analysis context. @@ -76,7 +80,8 @@ /// `S` must not be null. DataflowAnalysisContext(std::unique_ptr<Solver> S, Options Opts = Options{ - /*ContextSensitiveOpts=*/std::nullopt}) + /*ContextSensitiveOpts=*/std::nullopt, + /*Logger=*/nullptr}) : S(std::move(S)), TrueVal(createAtomicBoolValue()), FalseVal(createAtomicBoolValue()), Opts(Opts) { assert(this->S != nullptr); Index: clang/examples/FlowSensitiveAnalysis/SampleAnalysis.cpp =================================================================== --- /dev/null +++ clang/examples/FlowSensitiveAnalysis/SampleAnalysis.cpp @@ -0,0 +1,88 @@ +//===-- SampleAnalysis.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This tool runs a trivial analysis using the clang::dataflow framework. +// Currently this computes only the framework's built-in analysis. +// +// Usage is `clang-dataflow-sample test.cc -- <extra clang flags>` +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "clang/Tooling/Execution.h" +#include "llvm/Support/CommandLine.h" + +namespace { +void require(llvm::Error E) { + if (E) { + llvm::errs() << toString(std::move(E)) << "\n"; + exit(1); + } +} +template <typename T> T require(llvm::Expected<T> E) { + require(E.takeError()); + return std::move(*E); +} +} // namespace + +namespace clang::dataflow::sample { + +struct Lattice { + LatticeJoinEffect join(const Lattice &other) { + return LatticeJoinEffect::Unchanged; + } + bool operator==(const Lattice &Other) const { return true; } +}; + +struct Analysis : dataflow::DataflowAnalysis<Analysis, Lattice> { + using DataflowAnalysis::DataflowAnalysis; + + Lattice initialElement() { return {}; } + void transfer(const CFGElement &, Lattice &, Environment &Env) { + Env.logger().log([](llvm::raw_ostream &OS) { OS << "transfer!"; }); + } +}; + +class Consumer : public ASTConsumer { + bool HandleTopLevelDecl(DeclGroupRef DG) override { + for (const auto *D : DG) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) + if (FD->getDeclName().isIdentifier() && FD->getName() == "target" && + FD->hasBody()) { + DataflowAnalysisContext::Options Opts; + auto Log = Logger::textual(llvm::errs()); + Opts.Log = Log.get(); + DataflowAnalysisContext DACtx( + std::make_unique<WatchedLiteralsSolver>(), Opts); + auto &Ctx = FD->getDeclContext()->getParentASTContext(); + + auto CFCtx = + require(ControlFlowContext::build(FD, *FD->getBody(), Ctx)); + Analysis A(Ctx); + Environment Env(DACtx); + auto Out = + require(clang::dataflow::runDataflowAnalysis(CFCtx, A, Env)); + } + return true; + } +}; + +} // namespace clang::dataflow::sample + +int main(int argc, const char **argv) { + llvm::cl::OptionCategory OptCategory(""); + struct Factory { + std::unique_ptr<clang::ASTConsumer> newASTConsumer() { + return std::make_unique<clang::dataflow::sample::Consumer>(); + } + } F; + require(require(clang::tooling::createExecutorFromCommandLineArgs( + argc, argv, OptCategory)) + ->execute(clang::tooling::newFrontendActionFactory(&F))); +} Index: clang/examples/FlowSensitiveAnalysis/CMakeLists.txt =================================================================== --- /dev/null +++ clang/examples/FlowSensitiveAnalysis/CMakeLists.txt @@ -0,0 +1,9 @@ +add_clang_tool(clang-dataflow-sample + SampleAnalysis.cpp) + +target_link_libraries(clang-dataflow-sample + PRIVATE + clangAnalysisFlowSensitive + clangBasic + clangTooling +) Index: clang/examples/CMakeLists.txt =================================================================== --- clang/examples/CMakeLists.txt +++ clang/examples/CMakeLists.txt @@ -10,3 +10,5 @@ add_subdirectory(CallSuperAttribute) add_subdirectory(PluginsOrder) endif() + +add_subdirectory(FlowSensitiveAnalysis)
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits