https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/147315
>From 0fbfd74d23b6cd26ef0480f7b9061b2f4a745338 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 15:13:00 +0000 Subject: [PATCH 1/2] [LifetimeSafety] Add script performance benchmarking --- clang/lib/Analysis/LifetimeSafety.cpp | 7 +- .../Analysis/lifetime_safety/benchmark.py | 215 ++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 clang/test/Analysis/lifetime_safety/benchmark.py diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e881e592ef59f..1c83b5051bad1 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -151,7 +151,12 @@ class OriginManager { OriginID get(const ValueDecl &D) { auto It = DeclToOriginID.find(&D); - assert(It != DeclToOriginID.end()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + return It->second; } diff --git a/clang/test/Analysis/lifetime_safety/benchmark.py b/clang/test/Analysis/lifetime_safety/benchmark.py new file mode 100644 index 0000000000000..ddf32e192de17 --- /dev/null +++ b/clang/test/Analysis/lifetime_safety/benchmark.py @@ -0,0 +1,215 @@ +import sys +import argparse +import subprocess +import tempfile +import json +import os +from datetime import datetime +import numpy as np +from scipy.optimize import curve_fit +from scipy.stats import t + +def generate_cpp_cycle_test(n: int) -> str: + """ + Generates a C++ code snippet with a specified number of pointers in a cycle. + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void long_cycle_{n}(bool condition) {{\n" + for i in range(1, n + 1): + cpp_code += f" MyObj v{i}{{1}};\n" + cpp_code += "\n" + for i in range(1, n + 1): + cpp_code += f" MyObj* p{i} = &v{i};\n" + + cpp_code += "\n while (condition) {\n" + if n > 0: + cpp_code += f" MyObj* temp = p1;\n" + for i in range(1, n): + cpp_code += f" p{i} = p{i+1};\n" + cpp_code += f" p{n} = temp;\n" + cpp_code += " }\n}\n" + cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n" + return cpp_code + +def generate_cpp_merge_test(n: int) -> str: + """ + Generates a C++ code snippet with N independent conditional assignments. + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void conditional_merges_{n}(bool condition) {{\n" + decls = [f"v{i}" for i in range(1, n + 1)] + cpp_code += f" MyObj {', '.join(decls)};\n" + ptr_decls = [f"*p{i} = nullptr" for i in range(1, n + 1)] + cpp_code += f" MyObj {', '.join(ptr_decls)};\n\n" + + for i in range(1, n + 1): + cpp_code += f" if(condition) {{ p{i} = &v{i}; }}\n" + + cpp_code += "}\n" + cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n" + return cpp_code + +def analyze_trace_file(trace_path: str) -> tuple[float, float]: + """ + Parses the -ftime-trace JSON output to find durations. + + Returns: + A tuple of (lifetime_analysis_duration_us, total_clang_duration_us). + """ + lifetime_duration = 0.0 + total_duration = 0.0 + try: + with open(trace_path, 'r') as f: + trace_data = json.load(f) + for event in trace_data.get('traceEvents', []): + if event.get('name') == 'LifetimeAnalysis': + lifetime_duration += float(event.get('dur', 0)) + if event.get('name') == 'ExecuteCompiler': + total_duration += float(event.get('dur', 0)) + + except (IOError, json.JSONDecodeError) as e: + print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr) + return 0.0, 0.0 + return lifetime_duration, total_duration + +def power_law(n, c, k): + """Represents the power law function: y = c * n^k""" + return c * np.power(n, k) + +def human_readable_time(ms: float) -> str: + """Converts milliseconds to a human-readable string (ms or s).""" + if ms >= 1000: + return f"{ms / 1000:.2f} s" + return f"{ms:.2f} ms" + +def generate_markdown_report(results: dict) -> str: + """Generates a Markdown-formatted report from the benchmark results.""" + report = [] + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z") + report.append(f"# Lifetime Analysis Performance Report") + report.append(f"> Generated on: {timestamp}") + report.append("\n---\n") + + for test_type, data in results.items(): + title = 'Pointer Cycle in Loop' if test_type == 'cycle' else 'CFG Merges' + report.append(f"## Test Case: {title}") + report.append("") + + # Table header + report.append("| N | Analysis Time | Total Clang Time |") + report.append("|:----|--------------:|-----------------:|") + + # Table rows + n_data = np.array(data['n']) + analysis_data = np.array(data['lifetime_ms']) + total_data = np.array(data['total_ms']) + for i in range(len(n_data)): + analysis_str = human_readable_time(analysis_data[i]) + total_str = human_readable_time(total_data[i]) + report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |") + + report.append("") + + # Complexity analysis + report.append(f"**Complexity Analysis:**") + try: + popt, pcov = curve_fit(power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000) + _, k = popt + + # R-squared calculation + residuals = analysis_data - power_law(n_data, *popt) + ss_res = np.sum(residuals**2) + ss_tot = np.sum((analysis_data - np.mean(analysis_data))**2) + r_squared = 1 - (ss_res / ss_tot) + + # Confidence Interval for k + alpha = 0.05 # 95% confidence + dof = max(0, len(n_data) - len(popt)) # degrees of freedom + t_val = t.ppf(1.0 - alpha / 2., dof) + # Standard error of the parameters + perr = np.sqrt(np.diag(pcov)) + k_stderr = perr[1] + k_ci_lower = k - t_val * k_stderr + k_ci_upper = k + t_val * k_stderr + + report.append(f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**.") + report.append(f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better).") + report.append(f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`.") + + except RuntimeError: + report.append("- Could not determine a best-fit curve for the data.") + + report.append("\n---\n") + + return "\n".join(report) + +def run_single_test(clang_binary: str, test_type: str, n: int) -> tuple[float, float]: + """Generates, compiles, and benchmarks a single test case.""" + print(f"--- Running Test: {test_type.capitalize()} with N={n} ---") + + generated_code = "" + if test_type == 'cycle': + generated_code = generate_cpp_cycle_test(n) + else: # merge + generated_code = generate_cpp_merge_test(n) + + with tempfile.NamedTemporaryFile(mode='w+', suffix='.cpp', delete=False) as tmp_cpp: + tmp_cpp.write(generated_code) + source_file = tmp_cpp.name + + trace_file = os.path.splitext(source_file)[0] + '.json' + + clang_command = [ + clang_binary, '-c', '-o', '/dev/null', '-ftime-trace=' + trace_file, + '-Wexperimental-lifetime-safety', '-std=c++17', source_file + ] + + result = subprocess.run(clang_command, capture_output=True, text=True) + + if result.returncode != 0: + print(f"Compilation failed for N={n}!", file=sys.stderr) + print(result.stderr, file=sys.stderr) + os.remove(source_file) + return 0.0, 0.0 + + lifetime_us, total_us = analyze_trace_file(trace_file) + os.remove(source_file) + os.remove(trace_file) + + return lifetime_us / 1000.0, total_us / 1000.0 + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis.") + parser.add_argument("--clang-binary", type=str, required=True, help="Path to the Clang executable.") + + args = parser.parse_args() + + n_values = [10, 25, 50, 75, 100, 150, 200] + results = { + 'cycle': {'n': [], 'lifetime_ms': [], 'total_ms': []}, + 'merge': {'n': [], 'lifetime_ms': [], 'total_ms': []} + } + + print("Running performance benchmarks...") + for test_type in ['cycle', 'merge']: + for n in n_values: + lifetime_ms, total_ms = run_single_test(args.clang_binary, test_type, n) + if total_ms > 0: + results[test_type]['n'].append(n) + results[test_type]['lifetime_ms'].append(lifetime_ms) + results[test_type]['total_ms'].append(total_ms) + print(f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}") + + print("\n\n" + "="*80) + print("Generating Markdown Report...") + print("="*80 + "\n") + + markdown_report = generate_markdown_report(results) + print(markdown_report) + >From b09513a7e91143450bf079badc54d19b3a873f95 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 15:39:55 +0000 Subject: [PATCH 2/2] format --- .../Analysis/lifetime_safety/benchmark.py | 117 +++++++++++------- 1 file changed, 72 insertions(+), 45 deletions(-) diff --git a/clang/test/Analysis/lifetime_safety/benchmark.py b/clang/test/Analysis/lifetime_safety/benchmark.py index ddf32e192de17..259be915cd043 100644 --- a/clang/test/Analysis/lifetime_safety/benchmark.py +++ b/clang/test/Analysis/lifetime_safety/benchmark.py @@ -9,6 +9,7 @@ from scipy.optimize import curve_fit from scipy.stats import t + def generate_cpp_cycle_test(n: int) -> str: """ Generates a C++ code snippet with a specified number of pointers in a cycle. @@ -34,6 +35,7 @@ def generate_cpp_cycle_test(n: int) -> str: cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n" return cpp_code + def generate_cpp_merge_test(n: int) -> str: """ Generates a C++ code snippet with N independent conditional assignments. @@ -55,6 +57,7 @@ def generate_cpp_merge_test(n: int) -> str: cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n" return cpp_code + def analyze_trace_file(trace_path: str) -> tuple[float, float]: """ Parses the -ftime-trace JSON output to find durations. @@ -65,29 +68,32 @@ def analyze_trace_file(trace_path: str) -> tuple[float, float]: lifetime_duration = 0.0 total_duration = 0.0 try: - with open(trace_path, 'r') as f: + with open(trace_path, "r") as f: trace_data = json.load(f) - for event in trace_data.get('traceEvents', []): - if event.get('name') == 'LifetimeAnalysis': - lifetime_duration += float(event.get('dur', 0)) - if event.get('name') == 'ExecuteCompiler': - total_duration += float(event.get('dur', 0)) + for event in trace_data.get("traceEvents", []): + if event.get("name") == "LifetimeAnalysis": + lifetime_duration += float(event.get("dur", 0)) + if event.get("name") == "ExecuteCompiler": + total_duration += float(event.get("dur", 0)) except (IOError, json.JSONDecodeError) as e: print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr) return 0.0, 0.0 return lifetime_duration, total_duration + def power_law(n, c, k): """Represents the power law function: y = c * n^k""" return c * np.power(n, k) + def human_readable_time(ms: float) -> str: """Converts milliseconds to a human-readable string (ms or s).""" if ms >= 1000: return f"{ms / 1000:.2f} s" return f"{ms:.2f} ms" + def generate_markdown_report(results: dict) -> str: """Generates a Markdown-formatted report from the benchmark results.""" report = [] @@ -97,7 +103,7 @@ def generate_markdown_report(results: dict) -> str: report.append("\n---\n") for test_type, data in results.items(): - title = 'Pointer Cycle in Loop' if test_type == 'cycle' else 'CFG Merges' + title = "Pointer Cycle in Loop" if test_type == "cycle" else "CFG Merges" report.append(f"## Test Case: {title}") report.append("") @@ -106,9 +112,9 @@ def generate_markdown_report(results: dict) -> str: report.append("|:----|--------------:|-----------------:|") # Table rows - n_data = np.array(data['n']) - analysis_data = np.array(data['lifetime_ms']) - total_data = np.array(data['total_ms']) + n_data = np.array(data["n"]) + analysis_data = np.array(data["lifetime_ms"]) + total_data = np.array(data["total_ms"]) for i in range(len(n_data)): analysis_str = human_readable_time(analysis_data[i]) total_str = human_readable_time(total_data[i]) @@ -119,28 +125,36 @@ def generate_markdown_report(results: dict) -> str: # Complexity analysis report.append(f"**Complexity Analysis:**") try: - popt, pcov = curve_fit(power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000) + popt, pcov = curve_fit( + power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000 + ) _, k = popt - + # R-squared calculation residuals = analysis_data - power_law(n_data, *popt) ss_res = np.sum(residuals**2) - ss_tot = np.sum((analysis_data - np.mean(analysis_data))**2) + ss_tot = np.sum((analysis_data - np.mean(analysis_data)) ** 2) r_squared = 1 - (ss_res / ss_tot) - + # Confidence Interval for k alpha = 0.05 # 95% confidence - dof = max(0, len(n_data) - len(popt)) # degrees of freedom - t_val = t.ppf(1.0 - alpha / 2., dof) + dof = max(0, len(n_data) - len(popt)) # degrees of freedom + t_val = t.ppf(1.0 - alpha / 2.0, dof) # Standard error of the parameters perr = np.sqrt(np.diag(pcov)) k_stderr = perr[1] k_ci_lower = k - t_val * k_stderr k_ci_upper = k + t_val * k_stderr - report.append(f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**.") - report.append(f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better).") - report.append(f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`.") + report.append( + f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**." + ) + report.append( + f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better)." + ) + report.append( + f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`." + ) except RuntimeError: report.append("- Could not determine a best-fit curve for the data.") @@ -149,67 +163,80 @@ def generate_markdown_report(results: dict) -> str: return "\n".join(report) + def run_single_test(clang_binary: str, test_type: str, n: int) -> tuple[float, float]: """Generates, compiles, and benchmarks a single test case.""" print(f"--- Running Test: {test_type.capitalize()} with N={n} ---") - + generated_code = "" - if test_type == 'cycle': + if test_type == "cycle": generated_code = generate_cpp_cycle_test(n) - else: # merge + else: # merge generated_code = generate_cpp_merge_test(n) - with tempfile.NamedTemporaryFile(mode='w+', suffix='.cpp', delete=False) as tmp_cpp: + with tempfile.NamedTemporaryFile(mode="w+", suffix=".cpp", delete=False) as tmp_cpp: tmp_cpp.write(generated_code) source_file = tmp_cpp.name - - trace_file = os.path.splitext(source_file)[0] + '.json' + + trace_file = os.path.splitext(source_file)[0] + ".json" clang_command = [ - clang_binary, '-c', '-o', '/dev/null', '-ftime-trace=' + trace_file, - '-Wexperimental-lifetime-safety', '-std=c++17', source_file + clang_binary, + "-c", + "-o", + "/dev/null", + "-ftime-trace=" + trace_file, + "-Wexperimental-lifetime-safety", + "-std=c++17", + source_file, ] result = subprocess.run(clang_command, capture_output=True, text=True) - + if result.returncode != 0: print(f"Compilation failed for N={n}!", file=sys.stderr) print(result.stderr, file=sys.stderr) os.remove(source_file) return 0.0, 0.0 - + lifetime_us, total_us = analyze_trace_file(trace_file) os.remove(source_file) os.remove(trace_file) - + return lifetime_us / 1000.0, total_us / 1000.0 + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis.") - parser.add_argument("--clang-binary", type=str, required=True, help="Path to the Clang executable.") - + parser = argparse.ArgumentParser( + description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis." + ) + parser.add_argument( + "--clang-binary", type=str, required=True, help="Path to the Clang executable." + ) + args = parser.parse_args() n_values = [10, 25, 50, 75, 100, 150, 200] results = { - 'cycle': {'n': [], 'lifetime_ms': [], 'total_ms': []}, - 'merge': {'n': [], 'lifetime_ms': [], 'total_ms': []} + "cycle": {"n": [], "lifetime_ms": [], "total_ms": []}, + "merge": {"n": [], "lifetime_ms": [], "total_ms": []}, } print("Running performance benchmarks...") - for test_type in ['cycle', 'merge']: + for test_type in ["cycle", "merge"]: for n in n_values: lifetime_ms, total_ms = run_single_test(args.clang_binary, test_type, n) if total_ms > 0: - results[test_type]['n'].append(n) - results[test_type]['lifetime_ms'].append(lifetime_ms) - results[test_type]['total_ms'].append(total_ms) - print(f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}") - - print("\n\n" + "="*80) + results[test_type]["n"].append(n) + results[test_type]["lifetime_ms"].append(lifetime_ms) + results[test_type]["total_ms"].append(total_ms) + print( + f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}" + ) + + print("\n\n" + "=" * 80) print("Generating Markdown Report...") - print("="*80 + "\n") - + print("=" * 80 + "\n") + markdown_report = generate_markdown_report(results) print(markdown_report) - _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits