Hi, I would like to propose to add an option to pgbench so that benchmark can quit immediately when any client is aborted. Currently, when a client is aborted due to some error, for example, network trouble, other clients continue their run until a certain number of transactions specified -t is reached or the time specified by -T is expired. At the end, the results are printed, but they are not useful, as the message "Run was aborted; the above results are incomplete" shows.
For precise benchmark purpose, we would not want to wait to get such incomplete results, rather we would like to know some trouble happened to allow a quick retry. Therefore, it would be nice to add an option to make pgbench exit instead of continuing run in other clients when any client is aborted. I think adding the optional is better than whole behavioural change because some users that use pgbench just in order to stress on backends for testing purpose rather than benchmark might not want to stop pgbench even a client is aborted. Attached is the patch to add the option --exit-on-abort. If this option is specified, when any client is aborted, pgbench immediately quit by calling exit(2). What do you think about it? Regards, Yugo Nagata -- Yugo NAGATA <nag...@sraoss.co.jp>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 539c2795e2..6fae5a43e1 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -765,6 +765,8 @@ static int64 total_weight = 0; static bool verbose_errors = false; /* print verbose messages of all errors */ +static bool exit_on_abort = false; /* exit when any client is aborted */ + /* Builtin test scripts */ typedef struct BuiltinScript { @@ -911,6 +913,7 @@ usage(void) " -T, --time=NUM duration of benchmark test in seconds\n" " -v, --vacuum-all vacuum all four standard tables before tests\n" " --aggregate-interval=NUM aggregate data over NUM seconds\n" + " --exit-on-abort exit when any client is aborted\n" " --failures-detailed report the failures grouped by basic types\n" " --log-prefix=PREFIX prefix for transaction time log file\n" " (default: \"pgbench_log\")\n" @@ -6612,6 +6615,7 @@ main(int argc, char **argv) {"failures-detailed", no_argument, NULL, 13}, {"max-tries", required_argument, NULL, 14}, {"verbose-errors", no_argument, NULL, 15}, + {"exit-on-abort", no_argument, NULL, 16}, {NULL, 0, NULL, 0} }; @@ -6945,6 +6949,10 @@ main(int argc, char **argv) benchmarking_option_set = true; verbose_errors = true; break; + case 16: /* exit-on-abort */ + benchmarking_option_set = true; + exit_on_abort = true; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -7553,11 +7561,13 @@ threadRun(void *arg) advanceConnectionState(thread, st, &aggs); + if (exit_on_abort && st->state == CSTATE_ABORTED) + goto done; /* * If advanceConnectionState changed client to finished state, * that's one fewer client that remains. */ - if (st->state == CSTATE_FINISHED || st->state == CSTATE_ABORTED) + else if (st->state == CSTATE_FINISHED || st->state == CSTATE_ABORTED) remains--; } @@ -7592,6 +7602,15 @@ threadRun(void *arg) done: disconnect_all(state, nstate); + for (int i = 0; i < nstate; i++) + { + if (state[i].state != CSTATE_FINISHED) + { + pg_log_error("Run was aborted due to an error in thread %d", thread->tid); + exit(2); + } + } + if (thread->logfile) { if (agg_interval > 0)