https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/148222
>From 19d033cb6aa4a84c78a9b37e25020f4e33e8976b Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 15:13:00 +0000 Subject: [PATCH 1/3] [LifetimeSafety] Add script performance benchmarking --- .../Analysis/LifetimeSafety/CMakeLists.txt | 49 +++ .../test/Analysis/LifetimeSafety/benchmark.py | 307 ++++++++++++++++++ .../Analysis/LifetimeSafety/requirements.txt | 2 + clang/test/CMakeLists.txt | 2 + 4 files changed, 360 insertions(+) create mode 100644 clang/test/Analysis/LifetimeSafety/CMakeLists.txt create mode 100644 clang/test/Analysis/LifetimeSafety/benchmark.py create mode 100644 clang/test/Analysis/LifetimeSafety/requirements.txt diff --git a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000000000..ce37a29655668 --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,49 @@ +# ================================================================================= +# Lifetime Analysis Benchmarking Target +# ================================================================================= +# This target allows running performance benchmarks for the clang lifetime analysis +# using a Python script (with managed dependencies). + +find_package(Python3 COMPONENTS Interpreter REQUIRED) + +# Define paths for the virtual environment and requirements file. +set(LIFETIME_BENCHMARK_SCRIPT + "${CMAKE_CURRENT_SOURCE_DIR}/benchmark.py") +set(LIFETIME_BENCHMARK_VENV_DIR "${CMAKE_CURRENT_BINARY_DIR}/benchmark-venv") +set(LIFETIME_BENCHMARK_REQUIREMENTS + "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt") +set(LIFETIME_BENCHMARK_OUTPUT_DIR + "${CMAKE_CURRENT_BINARY_DIR}/benchmark_results") + + +if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREMENTS}) + + # Set up the virtual environment and install packages + add_custom_command( + OUTPUT ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg + COMMAND ${Python3_EXECUTABLE} -m venv ${LIFETIME_BENCHMARK_VENV_DIR} + COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} + DEPENDS ${LIFETIME_BENCHMARK_REQUIREMENTS} + COMMENT "Creating Python virtual environment and installing dependencies for benchmark..." + ) + add_custom_target(benchmark_venv_setup + DEPENDS ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg + ) + + # Main benchmark target + add_custom_target(benchmark_lifetime_safety_analysis + COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python ${LIFETIME_BENCHMARK_SCRIPT} + --clang-binary ${LLVM_BINARY_DIR}/bin/clang + --output-dir ${LIFETIME_BENCHMARK_OUTPUT_DIR} + + DEPENDS clang benchmark_venv_setup + + # Display the output directly in the console. + USES_TERMINAL + + COMMENT "Running Lifetime Analysis performance benchmarks..." + ) + + set_target_properties(benchmark_lifetime_safety_analysis + PROPERTIES FOLDER "Clang/Benchmarks") +endif() diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py new file mode 100644 index 0000000000000..9d5f36c51b9ee --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -0,0 +1,307 @@ +import sys +import argparse +import subprocess +import tempfile +import json +import os +from datetime import datetime +import numpy as np +from scipy.optimize import curve_fit +from scipy.stats import t + + +def generate_cpp_cycle_test(n: int) -> str: + """ + Generates a C++ code snippet with a specified number of pointers in a cycle. + Creates a while loop that rotates N pointers. + This pattern tests the convergence speed of the dataflow analysis when + reaching its fixed point. + + Example: + struct MyObj { int id; ~MyObj() {} }; + + void long_cycle_4(bool condition) { + MyObj v1{1}; + MyObj v2{1}; + MyObj v3{1}; + MyObj v4{1}; + + MyObj* p1 = &v1; + MyObj* p2 = &v2; + MyObj* p3 = &v3; + MyObj* p4 = &v4; + + while (condition) { + MyObj* temp = p1; + p1 = p2; + p2 = p3; + p3 = p4; + p4 = temp; + } + } + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void long_cycle_{n}(bool condition) {{\n" + for i in range(1, n + 1): + cpp_code += f" MyObj v{i}{{1}};\n" + cpp_code += "\n" + for i in range(1, n + 1): + cpp_code += f" MyObj* p{i} = &v{i};\n" + + cpp_code += "\n while (condition) {\n" + if n > 0: + cpp_code += f" MyObj* temp = p1;\n" + for i in range(1, n): + cpp_code += f" p{i} = p{i+1};\n" + cpp_code += f" p{n} = temp;\n" + cpp_code += " }\n}\n" + cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n" + return cpp_code + + +def generate_cpp_merge_test(n: int) -> str: + """ + Creates N independent if statements that merge at a single point. + This pattern specifically stresses the performance of the + 'LifetimeLattice::join' operation. + + Example: + struct MyObj { int id; ~MyObj() {} }; + + void conditional_merges_4(bool condition) { + MyObj v1, v2, v3, v4; + MyObj *p1 = nullptr, *p2 = nullptr, *p3 = nullptr, *p4 = nullptr; + + if(condition) { p1 = &v1; } + if(condition) { p2 = &v2; } + if(condition) { p3 = &v3; } + if(condition) { p4 = &v4; } + } + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void conditional_merges_{n}(bool condition) {{\n" + decls = [f"v{i}" for i in range(1, n + 1)] + cpp_code += f" MyObj {', '.join(decls)};\n" + ptr_decls = [f"*p{i} = nullptr" for i in range(1, n + 1)] + cpp_code += f" MyObj {', '.join(ptr_decls)};\n\n" + + for i in range(1, n + 1): + cpp_code += f" if(condition) {{ p{i} = &v{i}; }}\n" + + cpp_code += "}\n" + cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n" + return cpp_code + + +def analyze_trace_file(trace_path: str) -> tuple[float, float]: + """ + Parses the -ftime-trace JSON output to find durations. + + Returns: + A tuple of (lifetime_analysis_duration_us, total_clang_duration_us). + """ + lifetime_duration = 0.0 + total_duration = 0.0 + try: + with open(trace_path, "r") as f: + trace_data = json.load(f) + for event in trace_data.get("traceEvents", []): + if event.get("name") == "LifetimeSafetyAnalysis": + lifetime_duration += float(event.get("dur", 0)) + if event.get("name") == "ExecuteCompiler": + total_duration += float(event.get("dur", 0)) + + except (IOError, json.JSONDecodeError) as e: + print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr) + return 0.0, 0.0 + return lifetime_duration, total_duration + + +def power_law(n, c, k): + """Represents the power law function: y = c * n^k""" + return c * np.power(n, k) + + +def human_readable_time(ms: float) -> str: + """Converts milliseconds to a human-readable string (ms or s).""" + if ms >= 1000: + return f"{ms / 1000:.2f} s" + return f"{ms:.2f} ms" + + +def generate_markdown_report(results: dict) -> str: + """Generates a Markdown-formatted report from the benchmark results.""" + report = [] + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z") + report.append(f"# Lifetime Analysis Performance Report") + report.append(f"> Generated on: {timestamp}") + report.append("\n---\n") + + for test_name, data in results.items(): + title = data["title"] + report.append(f"## Test Case: {title}") + report.append("") + + # Table header + report.append("| N | Analysis Time | Total Clang Time |") + report.append("|:----|--------------:|-----------------:|") + + # Table rows + n_data = np.array(data["n"]) + analysis_data = np.array(data["lifetime_ms"]) + total_data = np.array(data["total_ms"]) + for i in range(len(n_data)): + analysis_str = human_readable_time(analysis_data[i]) + total_str = human_readable_time(total_data[i]) + report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |") + + report.append("") + + # Complexity analysis + report.append(f"**Complexity Analysis:**") + try: + # Curve fitting requires at least 3 points + if len(n_data) < 3: + raise ValueError("Not enough data points to perform curve fitting.") + + popt, pcov = curve_fit( + power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000 + ) + _, k = popt + + # Confidence Interval for k + alpha = 0.05 # 95% confidence + dof = max(0, len(n_data) - len(popt)) # degrees of freedom + t_val = t.ppf(1.0 - alpha / 2.0, dof) + # Standard error of the parameters + perr = np.sqrt(np.diag(pcov)) + k_stderr = perr[1] + k_ci_lower = k - t_val * k_stderr + k_ci_upper = k + t_val * k_stderr + + report.append( + f"- The performance for this case scales approx. as **O(n<sup>{k:.2f}</sup>)**." + ) + report.append( + f"- **95% Confidence interval for exponent:** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`." + ) + + except (RuntimeError, ValueError) as e: + report.append(f"- Could not determine a best-fit curve for the data: {e}") + + report.append("\n---\n") + + return "\n".join(report) + + +def run_single_test( + clang_binary: str, output_dir: str, test_name: str, generator_func, n: int +) -> tuple[float, float]: + """Generates, compiles, and benchmarks a single test case.""" + print(f"--- Running Test: {test_name.capitalize()} with N={n} ---") + + generated_code = generator_func(n) + + base_name = f"test_{test_name}_{n}" + source_file = os.path.join(output_dir, f"{base_name}.cpp") + trace_file = os.path.join(output_dir, f"{base_name}.json") + + with open(source_file, "w") as f: + f.write(generated_code) + + clang_command = [ + clang_binary, + "-c", + "-o", + "/dev/null", + "-ftime-trace=" + trace_file, + "-Wexperimental-lifetime-safety", + "-std=c++17", + source_file, + ] + + result = subprocess.run(clang_command, capture_output=True, text=True) + + if result.returncode != 0: + print(f"Compilation failed for N={n}!", file=sys.stderr) + print(result.stderr, file=sys.stderr) + return 0.0, 0.0 + + lifetime_us, total_us = analyze_trace_file(trace_file) + + return lifetime_us / 1000.0, total_us / 1000.0 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis." + ) + parser.add_argument( + "--clang-binary", type=str, required=True, help="Path to the Clang executable." + ) + parser.add_argument( + "--output-dir", + type=str, + default="benchmark_results", + help="Directory to save persistent benchmark files. (Default: ./benchmark_results)", + ) + + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + print(f"Benchmark files will be saved in: {os.path.abspath(args.output_dir)}\n") + + test_configurations = [ + { + "name": "cycle", + "title": "Pointer Cycle in Loop", + "generator_func": generate_cpp_cycle_test, + "n_values": [10, 25, 50, 75, 100, 150], + }, + { + "name": "merge", + "title": "CFG Merges", + "generator_func": generate_cpp_merge_test, + "n_values": [10, 50, 100, 200, 400, 800], + }, + ] + + results = {} + + print("Running performance benchmarks...") + for config in test_configurations: + test_name = config["name"] + results[test_name] = { + "title": config["title"], + "n": [], + "lifetime_ms": [], + "total_ms": [], + } + for n in config["n_values"]: + lifetime_ms, total_ms = run_single_test( + args.clang_binary, + args.output_dir, + test_name, + config["generator_func"], + n, + ) + if total_ms > 0: + results[test_name]["n"].append(n) + results[test_name]["lifetime_ms"].append(lifetime_ms) + results[test_name]["total_ms"].append(total_ms) + print( + f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}" + ) + + print("\n\n" + "=" * 80) + print("Generating Markdown Report...") + print("=" * 80 + "\n") + + markdown_report = generate_markdown_report(results) + print(markdown_report) diff --git a/clang/test/Analysis/LifetimeSafety/requirements.txt b/clang/test/Analysis/LifetimeSafety/requirements.txt new file mode 100644 index 0000000000000..6bad10388ecb1 --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/requirements.txt @@ -0,0 +1,2 @@ +numpy +scipy diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 416af9ab4d0aa..286c9d40d2dab 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -234,3 +234,5 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests) add_subdirectory(debuginfo-tests) endif() endif() + +add_subdirectory(Analysis/LifetimeSafety) >From eb33d75e1ad1faf621f90d0b8f6ac3deab267084 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Fri, 11 Jul 2025 11:11:47 +0000 Subject: [PATCH 2/3] [LifetimeSafety] Add expired loans analysis --- clang/lib/Analysis/LifetimeSafety.cpp | 140 ++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 20f3285249ac2..a6de79cffb371 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -729,6 +729,142 @@ class LifetimeDataflow { } }; +// ========================================================================= // +// Expired Loans Analysis +// ========================================================================= // + +/// The lattice for tracking expired loans. It is a set of loan IDs. +struct ExpiredLattice { + LoanSet Expired; + + ExpiredLattice() = default; + explicit ExpiredLattice(LoanSet S) : Expired(S) {} + + bool operator==(const ExpiredLattice &Other) const { + return Expired == Other.Expired; + } + bool operator!=(const ExpiredLattice &Other) const { + return !(*this == Other); + } + + /// Computes the union of two lattices. + ExpiredLattice join(const ExpiredLattice &Other, + LoanSet::Factory &Factory) const { + LoanSet JoinedSet = Expired; + for (LoanID LID : Other.Expired) + JoinedSet = Factory.add(JoinedSet, LID); + return ExpiredLattice(JoinedSet); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "ExpiredLattice State:\n"; + if (Expired.isEmpty()) + OS << " <empty>\n"; + for (const LoanID &LID : Expired) + OS << " Loan " << LID << " is expired\n"; + } +}; + +/// Transfer function for the expired loans analysis. +class ExpiredLoansTransferer { + FactManager &AllFacts; + LoanSet::Factory &SetFactory; + +public: + explicit ExpiredLoansTransferer(FactManager &F, LoanSet::Factory &SF) + : AllFacts(F), SetFactory(SF) {} + + /// Computes the exit state of a block by applying all its facts sequentially + /// to a given entry state. + ExpiredLattice transferBlock(const CFGBlock *Block, + ExpiredLattice EntryState) { + ExpiredLattice BlockState = EntryState; + llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(Block); + + for (const Fact *F : Facts) { + BlockState = transferFact(BlockState, F); + } + return BlockState; + } + +private: + ExpiredLattice transferFact(ExpiredLattice In, const Fact *F) { + if (const auto *EF = F->getAs<ExpireFact>()) + return ExpiredLattice(SetFactory.add(In.Expired, EF->getLoanID())); + + if (const auto *IF = F->getAs<IssueFact>()) + return ExpiredLattice(SetFactory.remove(In.Expired, IF->getLoanID())); + + return In; + } +}; + +/// Dataflow analysis driver for tracking expired loans. +class ExpiredLoansAnalysis { + const CFG &Cfg; + AnalysisDeclContext &AC; + LoanSet::Factory SetFactory; + ExpiredLoansTransferer Xfer; + + llvm::DenseMap<const CFGBlock *, ExpiredLattice> BlockEntryStates; + llvm::DenseMap<const CFGBlock *, ExpiredLattice> BlockExitStates; + +public: + ExpiredLoansAnalysis(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) + : Cfg(C), AC(AC), Xfer(FS, SetFactory) {} + + void run() { + llvm::TimeTraceScope TimeProfile("Expired Loans Analysis"); + ForwardDataflowWorklist Worklist(Cfg, AC); + const CFGBlock *Entry = &Cfg.getEntry(); + BlockEntryStates[Entry] = ExpiredLattice(SetFactory.getEmptySet()); + Worklist.enqueueBlock(Entry); + while (const CFGBlock *B = Worklist.dequeue()) { + ExpiredLattice EntryState = getEntryState(B); + ExpiredLattice ExitState = Xfer.transferBlock(B, EntryState); + BlockExitStates[B] = ExitState; + + for (const CFGBlock *Successor : B->succs()) { + auto SuccIt = BlockEntryStates.find(Successor); + ExpiredLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) + ? SuccIt->second + : ExpiredLattice{}; + ExpiredLattice NewSuccEntryState = + OldSuccEntryState.join(ExitState, SetFactory); + if (SuccIt == BlockEntryStates.end() || + NewSuccEntryState != OldSuccEntryState) { + BlockEntryStates[Successor] = NewSuccEntryState; + Worklist.enqueueBlock(Successor); + } + } + } + } + + void dump() const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Expired Loans Results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = Cfg.getExit(); + getExitState(&B).dump(llvm::dbgs()); + } + + ExpiredLattice getEntryState(const CFGBlock *B) const { + auto It = BlockEntryStates.find(B); + if (It != BlockEntryStates.end()) { + return It->second; + } + return ExpiredLattice(SetFactory.getEmptySet()); + } + + ExpiredLattice getExitState(const CFGBlock *B) const { + auto It = BlockExitStates.find(B); + if (It != BlockExitStates.end()) { + return It->second; + } + return ExpiredLattice(SetFactory.getEmptySet()); + } +}; + // ========================================================================= // // TODO: Analysing dataflow results and error reporting. // ========================================================================= // @@ -756,5 +892,9 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, LifetimeDataflow Dataflow(Cfg, FactMgr, AC); Dataflow.run(); DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump()); + + ExpiredLoansAnalysis ExpiredAnalysis(Cfg, FactMgr, AC); + ExpiredAnalysis.run(); + DEBUG_WITH_TYPE("ExpiredLoans", ExpiredAnalysis.dump()); } } // namespace clang >From 47425603375f6fd568da7cec2dbc7073406dbbed Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 14 Jul 2025 15:50:10 +0000 Subject: [PATCH 3/3] make the dataflow algorithm generic to avoid duplication --- clang/lib/Analysis/LifetimeSafety.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index a6de79cffb371..cdb3e5e47c90f 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -737,7 +737,7 @@ class LifetimeDataflow { struct ExpiredLattice { LoanSet Expired; - ExpiredLattice() = default; + ExpiredLattice() : Expired(nullptr) {}; explicit ExpiredLattice(LoanSet S) : Expired(S) {} bool operator==(const ExpiredLattice &Other) const { @@ -814,10 +814,10 @@ class ExpiredLoansAnalysis { : Cfg(C), AC(AC), Xfer(FS, SetFactory) {} void run() { - llvm::TimeTraceScope TimeProfile("Expired Loans Analysis"); + llvm::TimeTraceScope TimeProfile("ExpiredLoansAnalysis"); ForwardDataflowWorklist Worklist(Cfg, AC); const CFGBlock *Entry = &Cfg.getEntry(); - BlockEntryStates[Entry] = ExpiredLattice(SetFactory.getEmptySet()); + BlockEntryStates[Entry] = ExpiredLattice{}; Worklist.enqueueBlock(Entry); while (const CFGBlock *B = Worklist.dequeue()) { ExpiredLattice EntryState = getEntryState(B); @@ -849,24 +849,16 @@ class ExpiredLoansAnalysis { } ExpiredLattice getEntryState(const CFGBlock *B) const { - auto It = BlockEntryStates.find(B); - if (It != BlockEntryStates.end()) { - return It->second; - } - return ExpiredLattice(SetFactory.getEmptySet()); + return BlockEntryStates.lookup(B); } ExpiredLattice getExitState(const CFGBlock *B) const { - auto It = BlockExitStates.find(B); - if (It != BlockExitStates.end()) { - return It->second; - } - return ExpiredLattice(SetFactory.getEmptySet()); + return BlockExitStates.lookup(B); } }; // ========================================================================= // -// TODO: Analysing dataflow results and error reporting. +// TODO: Liveness analysis, analysing dataflow results and error reporting. // ========================================================================= // } // anonymous namespace @@ -895,6 +887,6 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, ExpiredLoansAnalysis ExpiredAnalysis(Cfg, FactMgr, AC); ExpiredAnalysis.run(); - DEBUG_WITH_TYPE("ExpiredLoans", ExpiredAnalysis.dump()); + DEBUG_WITH_TYPE("LifetimeExpiredLoans", ExpiredAnalysis.dump()); } } // namespace clang _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits