Re: [HACKERS] Re: [COMMITTERS] pgsql: Remove pgbench "progress" test pending solution of its timing is (fwd)

Fabien COELHO Wed, 27 Mar 2019 06:33:09 -0700


Hello Heikki,


Indeed, yet again, I forgot the attachement:-(

I stared at the new test case for a while, and I must say it looks verycryptic. It's not exactly this patch's fault - all the pgbench tests arecryptic -
Perl is cryptic. Regexprs are cryptic.
but I think we need to do something about that before adding any moretests. I'm not sure what exactly, but I'd like them to be more likepg_regress tests, where you have an expected output and you compare it withthe actual output. I realize that's not easy, because there are a lot ofvarying numbers in the output, but we've got to do something.
As a good first step, I wish the pgbench() function used named arguments.[...]
You would have something like this:

my $elapsed = pgbench(
 test_name => 'pgbench progress',
 opts => '-T 2 -P 1 -l --aggregate-interval=1'
I do not like them much in perl because it changes the code significantly,but why not. That would be another patch anyway.
A lighter but efficient option would be to add a few comments on the largercalls, see attached.

Please really find the attachement, and do not hesitate to share sparea few grey cells so that I will not forget about them in the futur:-)


--
Fabien.

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index cdd21d7469..7d853a478f 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -6050,6 +6050,7 @@ threadRun(void *arg)
 	int			nstate = thread->nstate;
 	int			remains = nstate;	/* number of remaining clients */
 	socket_set *sockets = alloc_socket_set(nstate);
+	int			aborted = 0;
 	int			i;
 
 	/* for reporting progress: */
@@ -6279,6 +6280,10 @@ threadRun(void *arg)
 			 */
 			if (st->state == CSTATE_FINISHED || st->state == CSTATE_ABORTED)
 				remains--;
+
+			/* count aborted clients */
+			if (st->state == CSTATE_ABORTED)
+				aborted++;
 		}
 
 		/* progress report is made by thread 0 for all threads */
@@ -6295,7 +6300,10 @@ threadRun(void *arg)
 
 				/*
 				 * Ensure that the next report is in the future, in case
-				 * pgbench/postgres got stuck somewhere.
+				 * pgbench/postgres got stuck somewhere. This may skip
+				 * some progress reports if the thread does not get enough
+				 * cpu time. In such case, probably the whole bench should
+				 * be ignored.
 				 */
 				do
 				{
@@ -6306,6 +6314,52 @@ threadRun(void *arg)
 	}
 
 done:
+	/*
+	 * Under -R, comply with -T and -P even if there is nothing to do,
+	 * (unless all clients aborted) and ensure that one report is printed.
+	 * This special behavior allows tap tests to check that a run lasts
+	 * as expected and that some progress is shown, even on very slow hosts.
+	 */
+	if (duration && throttle_delay && aborted < nstate && thread->tid == 0)
+	{
+		int64		thread_end = thread_start + (int64) duration * 1000000;
+		instr_time	now_time;
+		int64		now;
+
+		INSTR_TIME_SET_CURRENT(now_time);
+		now = INSTR_TIME_GET_MICROSEC(now_time);
+
+		while (now < thread_end)
+		{
+			if (progress && next_report <= thread_end)
+			{
+				pg_usleep(next_report - now);
+				INSTR_TIME_SET_CURRENT(now_time);
+				now = INSTR_TIME_GET_MICROSEC(now_time);
+				printProgressReport(thread, thread_start, now, &last, &last_report);
+
+				/* schedule next report */
+				do
+				{
+					next_report += (int64) progress * 1000000;
+				} while (now >= next_report);
+			}
+			else
+			{
+				pg_usleep(thread_end - now);
+				INSTR_TIME_SET_CURRENT(now_time);
+				now = INSTR_TIME_GET_MICROSEC(now_time);
+			}
+		}
+
+		/*
+		 * Print a closing progress report if none were printed
+		 * and at least one was expected.
+		 */
+		if (progress && progress <= duration && last_report == thread_start)
+			printProgressReport(thread, thread_start, now, &last, &last_report);
+	}
+
 	INSTR_TIME_SET_CURRENT(start);
 	disconnect_all(state, nstate);
 	INSTR_TIME_SET_CURRENT(end);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index ad3a7a79f8..424fe074d6 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -4,13 +4,14 @@ use warnings;
 use PostgresNode;
 use TestLib;
 use Test::More;
+use Time::HiRes qw{time};
 
 # start a pgbench specific server
 my $node = get_new_node('main');
 $node->init;
 $node->start;
 
-# invoke pgbench, with parameters:
+# invoke pgbench, return elapsed time, with parameters:
 #   $opts: options as a string to be split on spaces
 #   $stat: expected exit status
 #   $out: reference to a regexp list that must match stdout
@@ -49,13 +50,14 @@ sub pgbench
 	}
 
 	push @cmd, @args;
+	my $t0 = time();
 
 	$node->command_checks_all(\@cmd, $stat, $out, $err, $name);
 
 	# cleanup?
 	#unlink @filenames or die "cannot unlink files (@filenames): $!";
 
-	return;
+	return time() - $t0;
 }
 
 # Test concurrent insertion into table with serial column.  This
@@ -894,16 +896,68 @@ sub check_pgbench_logs
 
 my $bdir = $node->basedir;
 
-# with sampling rate
-pgbench(
-	"-n -S -t 50 -c 2 --log --sampling-rate=0.5",
+# The point of this test is coverage of various time-related features
+# (-T, -P, --aggregate-interval, --rate, --latency-limit...), so it is
+# somehow time sensitive.
+# The checks performed are quite loose so as to still pass even under
+# degraded (high load, slow host, valgrind run) testing conditions.
+# The main point is to provide coverage, most of the time.
+my $elapsed = pgbench(
+	# main options
+	"-T 2 -P 1 -l --aggregate-interval=1"
+	  . ' -S -b se@2 --rate=20 --latency-limit=1000 -j ' . $nthreads
+	  . ' -c 3 -r',
+	# expected status
 	0,
+	# checks on stdout
+	[   qr{type: multiple},
+		qr{clients: 3},
+		qr{threads: $nthreads},
+		# the shown duration is really -T argument value
+		qr{duration: 2 s},
+		qr{script 1: .* select only},
+		qr{script 2: .* select only},
+		qr{statement latencies in milliseconds},
+		qr{FROM pgbench_accounts} ],
+	# checks on stderr
+	[	qr{vacuum},
+		qr{progress: \d\b} ],
+	# test name
+	'pgbench progress',
+	# additional files
+	undef,
+	# additional pgbench options that cannot be split
+	"--log-prefix=$bdir/001_pgbench_log_1");
+
+# this depends on pgbench to avoid a shortcut under --rate
+# if there is nothing to do.
+ok($elapsed >= 2.0, "-T 2 leads to at least 2 second run");
+
+# if the test has gone AWOL, coldly skip these detailed checks.
+if (abs($elapsed - 2.0) < 0.5)
+{
+	# $nthreads threads, 2 seconds, but due to timing imprecision we might get
+	# only 1 or as many as 3 progress reports per thread.
+	check_pgbench_logs($bdir, '001_pgbench_log_1', $nthreads, 1, 3,
+		qr{^\d+ \d{1,2} \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+$});
+}
+
+# with sampling rate, not time sensitive
+pgbench(
+	# options & expected status
+	"-n -S -t 50 -c 2 --log --sampling-rate=0.5", 0,
+	# checks on stdout & stderr
 	[ qr{select only}, qr{processed: 100/100} ],
 	[ qr{^$} ],
+	# test name
 	'pgbench logs',
+	# files
 	undef,
+	# additional options
 	"--log-prefix=$bdir/001_pgbench_log_2");
 
+# random 50% of 2*50 transactions, accept between 8 and 92
+# probability of failure is about 2 * 2^-42 (?)
 check_pgbench_logs($bdir, '001_pgbench_log_2', 1, 8, 92,
 	qr{^0 \d{1,2} \d+ \d \d+ \d+$});

Re: [HACKERS] Re: [COMMITTERS] pgsql: Remove pgbench "progress" test pending solution of its timing is (fwd)

Reply via email to