adriangb commented on PR #16491: URL: https://github.com/apache/datafusion/pull/16491#issuecomment-2993585382
I think with these fixes to `Display<ScalarValue>` the tests will pass consistently. I used this script to test: ```python #!/usr/bin/env python3 import argparse import subprocess from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Event def run_test(command, run_num, total_runs, stop_event): """Run a single test and return result""" if stop_event.is_set(): return run_num, "SKIPPED", None try: result = subprocess.run(command, shell=True, capture_output=True, text=True) status = "PASS" if result.returncode == 0 else "FAIL" print(f"Run {run_num}/{total_runs}: {status}") return run_num, status, result except Exception as e: print(f"Run {run_num}/{total_runs}: ERROR - {e}") return run_num, "ERROR", None def main(): parser = argparse.ArgumentParser(description="Run a command multiple times and report failure rate") parser.add_argument("-P", "--parallel", type=int, default=1, help="Number of parallel jobs (default: 1)") parser.add_argument("-n", "--runs", type=int, default=100, help="Number of runs (default: 100)") parser.add_argument("-x", "--stop-on-failure", action="store_true", help="Stop at first failure") parser.add_argument("command", nargs=argparse.REMAINDER, help="Command to run") args = parser.parse_args() command = " ".join(args.command) print(f"Running command {args.runs} times with {args.parallel} parallel jobs...") print(f"Command: {command}") print("----------------------------------------") stop_event = Event() failures = 0 completed_runs = 0 failure_outputs = [] with ThreadPoolExecutor(max_workers=args.parallel) as executor: # Submit all jobs futures = [] for i in range(1, args.runs + 1): future = executor.submit(run_test, command, i, args.runs, stop_event) futures.append(future) # Process results as they complete for future in as_completed(futures): run_num, status, result = future.result() completed_runs += 1 if status == "FAIL" or status == "ERROR": failures += 1 if result and (result.stdout or result.stderr): failure_outputs.append((run_num, result.stdout, result.stderr)) if args.stop_on_failure: print(f"Stopping at first failure (run {run_num})") stop_event.set() # Cancel remaining futures for f in futures: f.cancel() break print("----------------------------------------") print("Results:") print(f"Total runs: {completed_runs}") print(f"Failures: {failures}") print(f"Passes: {completed_runs - failures}") if completed_runs > 0: failure_rate = (failures * 100) / completed_runs print(f"Failure rate: {failure_rate:.2f}%") else: print("Failure rate: 0%") # Print failure outputs if failure_outputs: print("\n" + "="*50) print("FAILURE OUTPUTS:") print("="*50) for run_num, stdout, stderr in failure_outputs: print(f"\n--- Run {run_num} ---") if stdout: print("STDOUT:") print(stdout) if stderr: print("STDERR:") print(stderr) if __name__ == "__main__": main() ``` And was able to run with no errors: ```bash ./run-test.py -P 10 -n 600 -x cargo test --package datafusion --test fuzz -- fuzz_cases::sort_query_fuzz::sort_query_fuzzer_runner --exact --show-output ``` I'm running a 1200 run to confirm now. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org