Hi,

I would like to propose to add an option to pgbench so that benchmark
can quit immediately when any client is aborted. Currently, when a
client is aborted due to some error, for example, network trouble, 
other clients continue their run until a certain number of transactions
specified -t is reached or the time specified by -T is expired. At the
end, the results are printed, but they are not useful, as the message
"Run was aborted; the above results are incomplete" shows.

For precise benchmark purpose, we would not want to wait to get such
incomplete results, rather we would like to know some trouble happened
to allow a quick retry. Therefore, it would be nice to add an option to
make pgbench exit instead of continuing run in other clients when any
client is aborted. I think adding the optional is better than  whole
behavioural change because some users that use pgbench just in order
to stress on backends for testing purpose rather than benchmark might
not want to stop pgbench even a client is aborted. 

Attached is the patch to add the option --exit-on-abort.
If this option is specified, when any client is aborted, pgbench
immediately quit by calling exit(2).

What do you think about it?

Regards,
Yugo Nagata
-- 
Yugo NAGATA <nag...@sraoss.co.jp>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 539c2795e2..6fae5a43e1 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -765,6 +765,8 @@ static int64 total_weight = 0;
 
 static bool verbose_errors = false; /* print verbose messages of all errors */
 
+static bool exit_on_abort = false;	/* exit when any client is aborted */
+
 /* Builtin test scripts */
 typedef struct BuiltinScript
 {
@@ -911,6 +913,7 @@ usage(void)
 		   "  -T, --time=NUM           duration of benchmark test in seconds\n"
 		   "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
 		   "  --aggregate-interval=NUM aggregate data over NUM seconds\n"
+		   "  --exit-on-abort          exit when any client is aborted\n"
 		   "  --failures-detailed      report the failures grouped by basic types\n"
 		   "  --log-prefix=PREFIX      prefix for transaction time log file\n"
 		   "                           (default: \"pgbench_log\")\n"
@@ -6612,6 +6615,7 @@ main(int argc, char **argv)
 		{"failures-detailed", no_argument, NULL, 13},
 		{"max-tries", required_argument, NULL, 14},
 		{"verbose-errors", no_argument, NULL, 15},
+		{"exit-on-abort", no_argument, NULL, 16},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -6945,6 +6949,10 @@ main(int argc, char **argv)
 				benchmarking_option_set = true;
 				verbose_errors = true;
 				break;
+			case 16:			/* exit-on-abort */
+				benchmarking_option_set = true;
+				exit_on_abort = true;
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -7553,11 +7561,13 @@ threadRun(void *arg)
 
 			advanceConnectionState(thread, st, &aggs);
 
+			if (exit_on_abort && st->state == CSTATE_ABORTED)
+				goto done;
 			/*
 			 * If advanceConnectionState changed client to finished state,
 			 * that's one fewer client that remains.
 			 */
-			if (st->state == CSTATE_FINISHED || st->state == CSTATE_ABORTED)
+			else if (st->state == CSTATE_FINISHED || st->state == CSTATE_ABORTED)
 				remains--;
 		}
 
@@ -7592,6 +7602,15 @@ threadRun(void *arg)
 done:
 	disconnect_all(state, nstate);
 
+	for (int i = 0; i < nstate; i++)
+	{
+		if (state[i].state != CSTATE_FINISHED)
+		{
+			pg_log_error("Run was aborted due to an error in thread %d", thread->tid);
+			exit(2);
+		}
+	}
+
 	if (thread->logfile)
 	{
 		if (agg_interval > 0)

Reply via email to