Re: pgbench logging broken by time logic changes

Thomas Munro Thu, 17 Jun 2021 05:50:38 -0700

On Thu, Jun 17, 2021 at 7:24 PM Fabien COELHO <coe...@cri.ensmp.fr> wrote:
> > Seems like it should be straightforward to fix, though, with fixes
> > already proposed (though I haven't studied them yet, will do).
>
> I think that fixing logging is simple enough, thus a revert is not
> necessary.


I prepared a draft revert patch for discussion, just in case it comes
in handy.  This reverts "pgbench: Improve time logic.", but "pgbench:
Synchronize client threads." remains (slightly rearranged).

I'm on the fence TBH, I can see that it's really small things and it
seems we have the patches, but it's late, late enough that
benchmarking gurus are showing up to benchmark with it for real, and
it's not great to be getting in the way of that with what is mostly
refactoring work, so I don't think it would be a bad thing if we
agreed to try again in 15.  (A number of arguments for and against
moving pgbench out of the postgresql source tree and release cycle
occur to me, but I guess that's a topic for another thread.)

> > [...] For that reason, I'm not super attached to that new pg_time_usec_t
> > stuff at all, and wouldn't be sad if we reverted that piece.
>
> Well, I was sooo happy to get rid of INSTR_TIME ugly and inefficient
> macros in pgbench… so anything looks better to me.

I don't love it either, in this code or the executor.  (I know you
also don't like the THREAD_CREATE etc macros.  I have something to
propose to improve that for 15....)

> Note that Michaël is having a look at fixing pgbench logging issues.

Yeah I've been catching up with these threads.

From 07d2102ed7faee6c1ed7cbacf44b3c0d95d3e2d5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 17 Jun 2021 22:36:56 +1200
Subject: [PATCH] Revert "pgbench: Improve time logic."

Discussion: https://postgr.es/m/CAHLJuCW_8Vpcr0%3Dt6O_gozrg3wXXWXZXDioYJd3NhvKriqgpfQ%40mail.gmail.com
---
 doc/src/sgml/ref/pgbench.sgml    |  39 ++-
 src/bin/pgbench/pgbench.c        | 443 ++++++++++++++++---------------
 src/tools/pgindent/typedefs.list |   1 -
 3 files changed, 248 insertions(+), 235 deletions(-)

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 0c60077e1f..eba6409148 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -58,10 +58,8 @@ number of clients: 10
 number of threads: 1
 number of transactions per client: 1000
 number of transactions actually processed: 10000/10000
-latency average = 11.013 ms
-latency stddev = 7.351 ms
-initial connection time = 45.758 ms
-tps = 896.967014 (without initial connection time)
+tps = 85.184871 (including connections establishing)
+tps = 85.296346 (excluding connections establishing)
 </screen>
 
   The first six lines report some of the most important parameter
@@ -70,7 +68,8 @@ tps = 896.967014 (without initial connection time)
   and number of transactions per client); these will be equal unless the run
   failed before completion.  (In <option>-T</option> mode, only the actual
   number of transactions is printed.)
-  The last line reports the number of transactions per second.
+  The last two lines report the number of transactions per second,
+  figured with and without counting the time to start database sessions.
  </para>
 
   <para>
@@ -2339,22 +2338,22 @@ number of clients: 10
 number of threads: 1
 number of transactions per client: 1000
 number of transactions actually processed: 10000/10000
-latency average = 10.870 ms
-latency stddev = 7.341 ms
-initial connection time = 30.954 ms
-tps = 907.949122 (without initial connection time)
+latency average = 15.844 ms
+latency stddev = 2.715 ms
+tps = 618.764555 (including connections establishing)
+tps = 622.977698 (excluding connections establishing)
 statement latencies in milliseconds:
-    0.001  \set aid random(1, 100000 * :scale)
-    0.001  \set bid random(1, 1 * :scale)
-    0.001  \set tid random(1, 10 * :scale)
-    0.000  \set delta random(-5000, 5000)
-    0.046  BEGIN;
-    0.151  UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
-    0.107  SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
-    4.241  UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
-    5.245  UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
-    0.102  INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
-    0.974  END;
+        0.002  \set aid random(1, 100000 * :scale)
+        0.005  \set bid random(1, 1 * :scale)
+        0.002  \set tid random(1, 10 * :scale)
+        0.001  \set delta random(-5000, 5000)
+        0.326  BEGIN;
+        0.603  UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+        0.454  SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        5.528  UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+        7.335  UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+        0.371  INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
+        1.212  END;
 </screen>
   </para>
 
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d7479925cb..49a7cabf0f 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -321,20 +321,13 @@ typedef struct SimpleStats
 	double		sum2;			/* sum of squared values */
 } SimpleStats;
 
-/*
- * The instr_time type is expensive when dealing with time arithmetic.  Define
- * a type to hold microseconds instead.  Type int64 is good enough for about
- * 584500 years.
- */
-typedef int64 pg_time_usec_t;
-
 /*
  * Data structure to hold various statistics: per-thread and per-script stats
  * are maintained and merged together.
  */
 typedef struct StatsData
 {
-	pg_time_usec_t start_time;	/* interval start time, for aggregates */
+	time_t		start_time;		/* interval start time, for aggregates */
 	int64		cnt;			/* number of transactions, including skipped */
 	int64		skipped;		/* number of transactions skipped under --rate
 								 * and --latency-limit */
@@ -463,11 +456,11 @@ typedef struct
 	int			nvariables;		/* number of variables */
 	bool		vars_sorted;	/* are variables sorted by name? */
 
-	/* various times about current transaction in microseconds */
-	pg_time_usec_t txn_scheduled;	/* scheduled start time of transaction */
-	pg_time_usec_t sleep_until; /* scheduled start time of next cmd */
-	pg_time_usec_t txn_begin;	/* used for measuring schedule lag times */
-	pg_time_usec_t stmt_begin;	/* used for measuring statement latencies */
+	/* various times about current transaction */
+	int64		txn_scheduled;	/* scheduled start time of transaction (usec) */
+	int64		sleep_until;	/* scheduled start time of next cmd (usec) */
+	instr_time	txn_begin;		/* used for measuring schedule lag times */
+	instr_time	stmt_begin;		/* used for measuring statement latencies */
 
 	bool		prepared[MAX_SCRIPTS];	/* whether client prepared the script */
 
@@ -497,15 +490,11 @@ typedef struct
 	int64		throttle_trigger;	/* previous/next throttling (us) */
 	FILE	   *logfile;		/* where to log, or NULL */
 
-	/* per thread collected stats in microseconds */
-	pg_time_usec_t create_time; /* thread creation time */
-	pg_time_usec_t started_time;	/* thread is running */
-	pg_time_usec_t bench_start; /* thread is benchmarking */
-	pg_time_usec_t conn_duration;	/* cumulated connection and deconnection
-									 * delays */
-
+	/* per thread collected stats */
+	instr_time	start_time;		/* thread start time */
+	instr_time	conn_time;
 	StatsData	stats;
-	int64		latency_late;	/* count executed but late transactions */
+	int64		latency_late;	/* executed but late transactions */
 } TState;
 
 /*
@@ -647,10 +636,10 @@ static void setIntValue(PgBenchValue *pv, int64 ival);
 static void setDoubleValue(PgBenchValue *pv, double dval);
 static bool evaluateExpr(CState *st, PgBenchExpr *expr,
 						 PgBenchValue *retval);
-static ConnectionStateEnum executeMetaCommand(CState *st, pg_time_usec_t *now);
+static ConnectionStateEnum executeMetaCommand(CState *st, instr_time *now);
 static void doLog(TState *thread, CState *st,
 				  StatsData *agg, bool skipped, double latency, double lag);
-static void processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
+static void processXactStats(TState *thread, CState *st, instr_time *now,
 							 bool skipped, StatsData *agg);
 static void addScript(ParsedScript script);
 static THREAD_FUNC_RETURN_TYPE THREAD_FUNC_CC threadRun(void *arg);
@@ -669,24 +658,6 @@ static const PsqlScanCallbacks pgbench_callbacks = {
 	NULL,						/* don't need get_variable functionality */
 };
 
-static inline pg_time_usec_t
-pg_time_now(void)
-{
-	instr_time	now;
-
-	INSTR_TIME_SET_CURRENT(now);
-
-	return (pg_time_usec_t) INSTR_TIME_GET_MICROSEC(now);
-}
-
-static inline void
-pg_time_now_lazy(pg_time_usec_t *now)
-{
-	if ((*now) == 0)
-		(*now) = pg_time_now();
-}
-
-#define PG_TIME_GET_DOUBLE(t) (0.000001 * (t))
 
 static void
 usage(void)
@@ -1279,9 +1250,9 @@ mergeSimpleStats(SimpleStats *acc, SimpleStats *ss)
  * the given value.
  */
 static void
-initStats(StatsData *sd, pg_time_usec_t start)
+initStats(StatsData *sd, time_t start_time)
 {
-	sd->start_time = start;
+	sd->start_time = start_time;
 	sd->cnt = 0;
 	sd->skipped = 0;
 	initSimpleStats(&sd->latency);
@@ -3118,6 +3089,7 @@ evaluateSleep(CState *st, int argc, char **argv, int *usecs)
 static void
 advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 {
+	instr_time	now;
 
 	/*
 	 * gettimeofday() isn't free, so we get the current timestamp lazily the
@@ -3127,7 +3099,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 	 * means "not set yet".  Reset "now" when we execute shell commands or
 	 * expressions, which might take a non-negligible amount of time, though.
 	 */
-	pg_time_usec_t now = 0;
+	INSTR_TIME_SET_ZERO(now);
 
 	/*
 	 * Loop in the state machine, until we have to wait for a result from the
@@ -3162,30 +3134,29 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 
 				/* Start new transaction (script) */
 			case CSTATE_START_TX:
-				pg_time_now_lazy(&now);
 
 				/* establish connection if needed, i.e. under --connect */
 				if (st->con == NULL)
 				{
-					pg_time_usec_t start = now;
+					instr_time	start;
 
+					INSTR_TIME_SET_CURRENT_LAZY(now);
+					start = now;
 					if ((st->con = doConnect()) == NULL)
 					{
 						pg_log_error("client %d aborted while establishing connection", st->id);
 						st->state = CSTATE_ABORTED;
 						break;
 					}
-
-					/* reset now after connection */
-					now = pg_time_now();
-
-					thread->conn_duration += now - start;
+					INSTR_TIME_SET_CURRENT(now);
+					INSTR_TIME_ACCUM_DIFF(thread->conn_time, now, start);
 
 					/* Reset session-local state */
 					memset(st->prepared, 0, sizeof(st->prepared));
 				}
 
 				/* record transaction start time */
+				INSTR_TIME_SET_CURRENT_LAZY(now);
 				st->txn_begin = now;
 
 				/*
@@ -3193,7 +3164,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 * scheduled start time.
 				 */
 				if (!throttle_delay)
-					st->txn_scheduled = now;
+					st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now);
 
 				/* Begin with the first command */
 				st->state = CSTATE_START_COMMAND;
@@ -3229,9 +3200,12 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 */
 				if (latency_limit)
 				{
-					pg_time_now_lazy(&now);
+					int64		now_us;
+
+					INSTR_TIME_SET_CURRENT_LAZY(now);
+					now_us = INSTR_TIME_GET_MICROSEC(now);
 
-					while (thread->throttle_trigger < now - latency_limit &&
+					while (thread->throttle_trigger < now_us - latency_limit &&
 						   (nxacts <= 0 || st->cnt < nxacts))
 					{
 						processXactStats(thread, st, &now, true, agg);
@@ -3264,9 +3238,9 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 * Wait until it's time to start next transaction.
 				 */
 			case CSTATE_THROTTLE:
-				pg_time_now_lazy(&now);
+				INSTR_TIME_SET_CURRENT_LAZY(now);
 
-				if (now < st->txn_scheduled)
+				if (INSTR_TIME_GET_MICROSEC(now) < st->txn_scheduled)
 					return;		/* still sleeping, nothing to do here */
 
 				/* done sleeping, but don't start transaction if we're done */
@@ -3289,7 +3263,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				/* record begin time of next command, and initiate it */
 				if (report_per_command)
 				{
-					pg_time_now_lazy(&now);
+					INSTR_TIME_SET_CURRENT_LAZY(now);
 					st->stmt_begin = now;
 				}
 
@@ -3485,8 +3459,8 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 * instead of CSTATE_START_TX.
 				 */
 			case CSTATE_SLEEP:
-				pg_time_now_lazy(&now);
-				if (now < st->sleep_until)
+				INSTR_TIME_SET_CURRENT_LAZY(now);
+				if (INSTR_TIME_GET_MICROSEC(now) < st->sleep_until)
 					return;		/* still sleeping, nothing to do here */
 				/* Else done sleeping. */
 				st->state = CSTATE_END_COMMAND;
@@ -3506,12 +3480,13 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				{
 					Command    *command;
 
-					pg_time_now_lazy(&now);
+					INSTR_TIME_SET_CURRENT_LAZY(now);
 
 					command = sql_script[st->use_file].commands[st->command];
 					/* XXX could use a mutex here, but we choose not to */
 					addToSimpleStats(&command->stats,
-									 PG_TIME_GET_DOUBLE(now - st->stmt_begin));
+									 INSTR_TIME_GET_DOUBLE(now) -
+									 INSTR_TIME_GET_DOUBLE(st->stmt_begin));
 				}
 
 				/* Go ahead with next command, to be executed or skipped */
@@ -3537,7 +3512,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				if (is_connect)
 				{
 					finishCon(st);
-					now = 0;
+					INSTR_TIME_SET_ZERO(now);
 				}
 
 				if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded)
@@ -3575,7 +3550,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
  * take no time to execute.
  */
 static ConnectionStateEnum
-executeMetaCommand(CState *st, pg_time_usec_t *now)
+executeMetaCommand(CState *st, instr_time *now)
 {
 	Command    *command = sql_script[st->use_file].commands[st->command];
 	int			argc;
@@ -3617,8 +3592,8 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
 			return CSTATE_ABORTED;
 		}
 
-		pg_time_now_lazy(now);
-		st->sleep_until = (*now) + usec;
+		INSTR_TIME_SET_CURRENT_LAZY(*now);
+		st->sleep_until = INSTR_TIME_GET_MICROSEC(*now) + usec;
 		return CSTATE_SLEEP;
 	}
 	else if (command->meta == META_SET)
@@ -3760,7 +3735,7 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
 	 * executing the expression or shell command might have taken a
 	 * non-negligible amount of time, so reset 'now'
 	 */
-	*now = 0;
+	INSTR_TIME_SET_ZERO(*now);
 
 	return CSTATE_END_COMMAND;
 }
@@ -3770,15 +3745,14 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
  *
  * We print Unix-epoch timestamps in the log, so that entries can be
  * correlated against other logs.  On some platforms this could be obtained
- * from the caller, but rather than get entangled with that, we just eat
- * the cost of an extra syscall in all cases.
+ * from the instr_time reading the caller has, but rather than get entangled
+ * with that, we just eat the cost of an extra syscall in all cases.
  */
 static void
 doLog(TState *thread, CState *st,
 	  StatsData *agg, bool skipped, double latency, double lag)
 {
 	FILE	   *logfile = thread->logfile;
-	pg_time_usec_t now = pg_time_now();
 
 	Assert(use_log);
 
@@ -3798,12 +3772,13 @@ doLog(TState *thread, CState *st,
 		 * any empty intervals in between (this may happen with very low tps,
 		 * e.g. --rate=0.1).
 		 */
+		time_t		now = time(NULL);
 
 		while (agg->start_time + agg_interval <= now)
 		{
 			/* print aggregated report to logfile */
-			fprintf(logfile, INT64_FORMAT " " INT64_FORMAT " %.0f %.0f %.0f %.0f",
-					agg->start_time,
+			fprintf(logfile, "%ld " INT64_FORMAT " %.0f %.0f %.0f %.0f",
+					(long) agg->start_time,
 					agg->cnt,
 					agg->latency.sum,
 					agg->latency.sum2,
@@ -3831,15 +3806,17 @@ doLog(TState *thread, CState *st,
 	else
 	{
 		/* no, print raw transactions */
+		struct timeval tv;
+
+		gettimeofday(&tv, NULL);
 		if (skipped)
-			fprintf(logfile, "%d " INT64_FORMAT " skipped %d " INT64_FORMAT " "
-					INT64_FORMAT,
-					st->id, st->cnt, st->use_file, now / 1000000, now % 1000000);
+			fprintf(logfile, "%d " INT64_FORMAT " skipped %d %ld %ld",
+					st->id, st->cnt, st->use_file,
+					(long) tv.tv_sec, (long) tv.tv_usec);
 		else
-			fprintf(logfile, "%d " INT64_FORMAT " %.0f %d " INT64_FORMAT " "
-					INT64_FORMAT,
+			fprintf(logfile, "%d " INT64_FORMAT " %.0f %d %ld %ld",
 					st->id, st->cnt, latency, st->use_file,
-					now / 1000000, now % 1000000);
+					(long) tv.tv_sec, (long) tv.tv_usec);
 		if (throttle_delay)
 			fprintf(logfile, " %.0f", lag);
 		fputc('\n', logfile);
@@ -3853,7 +3830,7 @@ doLog(TState *thread, CState *st,
  * Note that even skipped transactions are counted in the "cnt" fields.)
  */
 static void
-processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
+processXactStats(TState *thread, CState *st, instr_time *now,
 				 bool skipped, StatsData *agg)
 {
 	double		latency = 0.0,
@@ -3863,11 +3840,11 @@ processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
 
 	if (detailed && !skipped)
 	{
-		pg_time_now_lazy(now);
+		INSTR_TIME_SET_CURRENT_LAZY(*now);
 
 		/* compute latency & lag */
-		latency = (*now) - st->txn_scheduled;
-		lag = st->txn_begin - st->txn_scheduled;
+		latency = INSTR_TIME_GET_MICROSEC(*now) - st->txn_scheduled;
+		lag = INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled;
 	}
 
 	if (thread_details)
@@ -4118,7 +4095,10 @@ initGenerateDataClientSide(PGconn *con)
 	int64		k;
 
 	/* used to track elapsed time and estimate of the remaining time */
-	pg_time_usec_t start;
+	instr_time	start,
+				diff;
+	double		elapsed_sec,
+				remaining_sec;
 	int			log_interval = 1;
 
 	/* Stay on the same line if reporting to a terminal */
@@ -4170,7 +4150,7 @@ initGenerateDataClientSide(PGconn *con)
 	}
 	PQclear(res);
 
-	start = pg_time_now();
+	INSTR_TIME_SET_CURRENT(start);
 
 	for (k = 0; k < (int64) naccounts * scale; k++)
 	{
@@ -4195,8 +4175,11 @@ initGenerateDataClientSide(PGconn *con)
 		 */
 		if ((!use_quiet) && (j % 100000 == 0))
 		{
-			double		elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
-			double		remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
+			remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
 
 			fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
 					j, (int64) naccounts * scale,
@@ -4206,8 +4189,11 @@ initGenerateDataClientSide(PGconn *con)
 		/* let's not call the timing for each row, but only each 100 rows */
 		else if (use_quiet && (j % 100 == 0))
 		{
-			double		elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
-			double		remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
+			remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
 
 			/* have we reached the next interval (or end)? */
 			if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
@@ -4412,8 +4398,10 @@ runInitSteps(const char *initialize_steps)
 
 	for (step = initialize_steps; *step != '\0'; step++)
 	{
+		instr_time	start;
 		char	   *op = NULL;
-		pg_time_usec_t start = pg_time_now();
+
+		INSTR_TIME_SET_CURRENT(start);
 
 		switch (*step)
 		{
@@ -4455,7 +4443,12 @@ runInitSteps(const char *initialize_steps)
 
 		if (op != NULL)
 		{
-			double		elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
+			instr_time	diff;
+			double		elapsed_sec;
+
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
 
 			if (!first)
 				appendPQExpBufferStr(&stats, ", ");
@@ -5403,12 +5396,12 @@ addScript(ParsedScript script)
  * progress report.  On exit, they are updated with the new stats.
  */
 static void
-printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
+printProgressReport(TState *threads, int64 test_start, int64 now,
 					StatsData *last, int64 *last_report)
 {
 	/* generate and show report */
-	pg_time_usec_t run = now - *last_report;
-	int64		ntx;
+	int64		run = now - *last_report,
+				ntx;
 	double		tps,
 				total_run,
 				latency,
@@ -5455,7 +5448,16 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
 
 	if (progress_timestamp)
 	{
-		snprintf(tbuf, sizeof(tbuf), "%.3f s", PG_TIME_GET_DOUBLE(now));
+		/*
+		 * On some platforms the current system timestamp is available in
+		 * now_time, but rather than get entangled with that, we just eat the
+		 * cost of an extra syscall in all cases.
+		 */
+		struct timeval tv;
+
+		gettimeofday(&tv, NULL);
+		snprintf(tbuf, sizeof(tbuf), "%ld.%03ld s",
+				 (long) tv.tv_sec, (long) (tv.tv_usec / 1000));
 	}
 	else
 	{
@@ -5495,18 +5497,21 @@ printSimpleStats(const char *prefix, SimpleStats *ss)
 
 /* print out results */
 static void
-printResults(StatsData *total,
-			 pg_time_usec_t total_duration, /* benchmarking time */
-			 pg_time_usec_t conn_total_duration,	/* is_connect */
-			 pg_time_usec_t conn_elapsed_duration,	/* !is_connect */
-			 int64 latency_late)
+printResults(StatsData *total, instr_time total_time,
+			 instr_time conn_total_time, int64 latency_late)
 {
-	/* tps is about actually executed transactions during benchmarking */
+	double		time_include,
+				tps_include,
+				tps_exclude;
 	int64		ntx = total->cnt - total->skipped;
-	double		bench_duration = PG_TIME_GET_DOUBLE(total_duration);
-	double		tps = ntx / bench_duration;
 
-	printf("pgbench (PostgreSQL) %d.%d\n", PG_VERSION_NUM / 10000, PG_VERSION_NUM % 100);
+	time_include = INSTR_TIME_GET_DOUBLE(total_time);
+
+	/* tps is about actually executed transactions */
+	tps_include = ntx / time_include;
+	tps_exclude = ntx /
+		(time_include - (INSTR_TIME_GET_DOUBLE(conn_total_time) / nclients));
+
 	/* Report test parameters. */
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
@@ -5537,7 +5542,8 @@ printResults(StatsData *total,
 
 	if (throttle_delay && latency_limit)
 		printf("number of transactions skipped: " INT64_FORMAT " (%.3f %%)\n",
-			   total->skipped, 100.0 * total->skipped / total->cnt);
+			   total->skipped,
+			   100.0 * total->skipped / total->cnt);
 
 	if (latency_limit)
 		printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT "/" INT64_FORMAT " (%.3f %%)\n",
@@ -5550,7 +5556,7 @@ printResults(StatsData *total,
 	{
 		/* no measurement, show average latency computed from run time */
 		printf("latency average = %.3f ms\n",
-			   0.001 * total_duration * nclients / total->cnt);
+			   1000.0 * time_include * nclients / total->cnt);
 	}
 
 	if (throttle_delay)
@@ -5565,25 +5571,8 @@ printResults(StatsData *total,
 			   0.001 * total->lag.sum / total->cnt, 0.001 * total->lag.max);
 	}
 
-	/*
-	 * Under -C/--connect, each transaction incurs a significant connection
-	 * cost, it would not make much sense to ignore it in tps, and it would
-	 * not be tps anyway.
-	 *
-	 * Otherwise connections are made just once at the beginning of the run
-	 * and should not impact performance but for very short run, so they are
-	 * (right)fully ignored in tps.
-	 */
-	if (is_connect)
-	{
-		printf("average connection time = %.3f ms\n", 0.001 * conn_total_duration / total->cnt);
-		printf("tps = %f (including reconnection times)\n", tps);
-	}
-	else
-	{
-		printf("initial connection time = %.3f ms\n", 0.001 * conn_elapsed_duration);
-		printf("tps = %f (without initial connection time)\n", tps);
-	}
+	printf("tps = %f (including connections establishing)\n", tps_include);
+	printf("tps = %f (excluding connections establishing)\n", tps_exclude);
 
 	/* Report per-script/command statistics */
 	if (per_script_stats || report_per_command)
@@ -5604,7 +5593,7 @@ printResults(StatsData *total,
 					   100.0 * sql_script[i].weight / total_weight,
 					   sstats->cnt,
 					   100.0 * sstats->cnt / total->cnt,
-					   (sstats->cnt - sstats->skipped) / bench_duration);
+					   (sstats->cnt - sstats->skipped) / time_include);
 
 				if (throttle_delay && latency_limit && sstats->cnt > 0)
 					printf(" - number of transactions skipped: " INT64_FORMAT " (%.3f%%)\n",
@@ -5652,7 +5641,10 @@ set_random_seed(const char *seed)
 	if (seed == NULL || strcmp(seed, "time") == 0)
 	{
 		/* rely on current time */
-		iseed = pg_time_now();
+		instr_time	now;
+
+		INSTR_TIME_SET_CURRENT(now);
+		iseed = (uint64) INSTR_TIME_GET_MICROSEC(now);
 	}
 	else if (strcmp(seed, "rand") == 0)
 	{
@@ -5755,11 +5747,9 @@ main(int argc, char **argv)
 	CState	   *state;			/* status of clients */
 	TState	   *threads;		/* array of thread */
 
-	pg_time_usec_t
-				start_time,		/* start up time */
-				bench_start = 0,	/* first recorded benchmarking time */
-				conn_total_duration;	/* cumulated connection time in
-										 * threads */
+	instr_time	start_time;		/* start up time */
+	instr_time	total_time;
+	instr_time	conn_total_time;
 	int64		latency_late = 0;
 	StatsData	stats;
 	int			weight;
@@ -6428,55 +6418,67 @@ main(int argc, char **argv)
 	/* all clients must be assigned to a thread */
 	Assert(nclients_dealt == nclients);
 
-	/* get start up time for the whole computation */
-	start_time = pg_time_now();
+	/* get start up time */
+	INSTR_TIME_SET_CURRENT(start_time);
 
 	/* set alarm if duration is specified. */
 	if (duration > 0)
 		setalarm(duration);
 
+	/* set up the barrier that we'll use to synchronize threads */
 	errno = THREAD_BARRIER_INIT(&barrier, nthreads);
 	if (errno != 0)
 		pg_log_fatal("could not initialize barrier: %m");
 
+	/* start threads */
 #ifdef ENABLE_THREAD_SAFETY
-	/* start all threads but thread 0 which is executed directly later */
-	for (i = 1; i < nthreads; i++)
+	for (i = 0; i < nthreads; i++)
 	{
 		TState	   *thread = &threads[i];
 
-		thread->create_time = pg_time_now();
-		errno = THREAD_CREATE(&thread->thread, threadRun, thread);
+		INSTR_TIME_SET_CURRENT(thread->start_time);
+
+		/* compute when to stop */
+		if (duration > 0)
+			end_time = INSTR_TIME_GET_MICROSEC(thread->start_time) +
+				(int64) 1000000 * duration;
 
-		if (errno != 0)
+		/* the first thread (i = 0) is executed by main thread */
+		if (i > 0)
 		{
-			pg_log_fatal("could not create thread: %m");
-			exit(1);
+			errno = THREAD_CREATE(&thread->thread, threadRun, thread);
+
+			if (errno != 0)
+			{
+				pg_log_fatal("could not create thread: %m");
+				exit(1);
+			}
 		}
 	}
 #else
-	Assert(nthreads == 1);
-#endif							/* ENABLE_THREAD_SAFETY */
-
+	INSTR_TIME_SET_CURRENT(threads[0].start_time);
 	/* compute when to stop */
-	threads[0].create_time = pg_time_now();
 	if (duration > 0)
-		end_time = threads[0].create_time + (int64) 1000000 * duration;
-
-	/* run thread 0 directly */
-	(void) threadRun(&threads[0]);
+		end_time = INSTR_TIME_GET_MICROSEC(threads[0].start_time) +
+			(int64) 1000000 * duration;
+#endif							/* ENABLE_THREAD_SAFETY */
 
-	/* wait for other threads and accumulate results */
+	/* wait for threads and accumulate results */
 	initStats(&stats, 0);
-	conn_total_duration = 0;
-
+	INSTR_TIME_SET_ZERO(conn_total_time);
 	for (i = 0; i < nthreads; i++)
 	{
 		TState	   *thread = &threads[i];
 
 #ifdef ENABLE_THREAD_SAFETY
-		if (i > 0)
+		if (i == 0)
+			/* actually run this thread directly in the main thread */
+			(void) threadRun(thread);
+		else
+			/* wait of other threads. should check that 0 is returned? */
 			THREAD_JOIN(thread->thread);
+#else
+		(void) threadRun(thread);
 #endif							/* ENABLE_THREAD_SAFETY */
 
 		for (int j = 0; j < thread->nstate; j++)
@@ -6489,25 +6491,23 @@ main(int argc, char **argv)
 		stats.cnt += thread->stats.cnt;
 		stats.skipped += thread->stats.skipped;
 		latency_late += thread->latency_late;
-		conn_total_duration += thread->conn_duration;
-
-		/* first recorded benchmarking start time */
-		if (bench_start == 0 || thread->bench_start < bench_start)
-			bench_start = thread->bench_start;
+		INSTR_TIME_ADD(conn_total_time, thread->conn_time);
 	}
-
-	/* XXX should this be connection time? */
 	disconnect_all(state, nclients);
 
 	/*
-	 * Beware that performance of short benchmarks with many threads and
-	 * possibly long transactions can be deceptive because threads do not
-	 * start and finish at the exact same time. The total duration computed
-	 * here encompasses all transactions so that tps shown is somehow slightly
-	 * underestimated.
+	 * XXX We compute results as though every client of every thread started
+	 * and finished at the same time.  That model can diverge noticeably from
+	 * reality for a short benchmark run involving relatively many threads.
+	 * The first thread may process notably many transactions before the last
+	 * thread begins.  Improving the model alone would bring limited benefit,
+	 * because performance during those periods of partial thread count can
+	 * easily exceed steady state performance.  This is one of the many ways
+	 * short runs convey deceptive performance figures.
 	 */
-	printResults(&stats, pg_time_now() - bench_start, conn_total_duration,
-				 bench_start - start_time, latency_late);
+	INSTR_TIME_SET_CURRENT(total_time);
+	INSTR_TIME_SUBTRACT(total_time, start_time);
+	printResults(&stats, total_time, conn_total_time, latency_late);
 
 	THREAD_BARRIER_DESTROY(&barrier);
 
@@ -6522,16 +6522,34 @@ threadRun(void *arg)
 {
 	TState	   *thread = (TState *) arg;
 	CState	   *state = thread->state;
-	pg_time_usec_t start;
+	instr_time	start,
+				end;
 	int			nstate = thread->nstate;
 	int			remains = nstate;	/* number of remaining clients */
 	socket_set *sockets = alloc_socket_set(nstate);
-	int64		thread_start,
-				last_report,
-				next_report;
+	int			i;
+
+	/* for reporting progress: */
+	int64		thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
+	int64		last_report = thread_start;
+	int64		next_report = last_report + (int64) progress * 1000000;
 	StatsData	last,
 				aggs;
 
+	/*
+	 * Initialize throttling rate target for all of the thread's clients.  It
+	 * might be a little more accurate to reset thread->start_time here too.
+	 * The possible drift seems too small relative to typical throttle delay
+	 * times to worry about it.
+	 */
+	INSTR_TIME_SET_CURRENT(start);
+	thread->throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
+
+	INSTR_TIME_SET_ZERO(thread->conn_time);
+
+	initStats(&aggs, time(NULL));
+	last = aggs;
+
 	/* open log file if requested */
 	if (use_log)
 	{
@@ -6552,23 +6570,10 @@ threadRun(void *arg)
 		}
 	}
 
-	/* explicitly initialize the state machines */
-	for (int i = 0; i < nstate; i++)
-		state[i].state = CSTATE_CHOOSE_SCRIPT;
-
-	/* READY */
-	THREAD_BARRIER_WAIT(&barrier);
-
-	thread_start = pg_time_now();
-	thread->started_time = thread_start;
-	last_report = thread_start;
-	next_report = last_report + (int64) 1000000 * progress;
-
-	/* STEADY */
 	if (!is_connect)
 	{
 		/* make connections to the database before starting */
-		for (int i = 0; i < nstate; i++)
+		for (i = 0; i < nstate; i++)
 		{
 			if ((state[i].con = doConnect()) == NULL)
 			{
@@ -6584,32 +6589,26 @@ threadRun(void *arg)
 				goto done;
 			}
 		}
-
-		/* compute connection delay */
-		thread->conn_duration = pg_time_now() - thread->started_time;
-	}
-	else
-	{
-		/* no connection delay to record */
-		thread->conn_duration = 0;
 	}
 
-	/* GO */
-	THREAD_BARRIER_WAIT(&barrier);
+	/* time after thread and connections set up */
+	INSTR_TIME_SET_CURRENT(thread->conn_time);
+	INSTR_TIME_SUBTRACT(thread->conn_time, thread->start_time);
 
-	start = pg_time_now();
-	thread->bench_start = start;
-	thread->throttle_trigger = start;
+	THREAD_BARRIER_WAIT(&barrier);
 
-	initStats(&aggs, start);
-	last = aggs;
+	/* explicitly initialize the state machines */
+	for (i = 0; i < nstate; i++)
+	{
+		state[i].state = CSTATE_CHOOSE_SCRIPT;
+	}
 
 	/* loop till all clients have terminated */
 	while (remains > 0)
 	{
 		int			nsocks;		/* number of sockets to be waited for */
-		pg_time_usec_t min_usec;
-		pg_time_usec_t now = 0; /* set this only if needed */
+		int64		min_usec;
+		int64		now_usec = 0;	/* set this only if needed */
 
 		/*
 		 * identify which client sockets should be checked for input, and
@@ -6618,21 +6617,27 @@ threadRun(void *arg)
 		clear_socket_set(sockets);
 		nsocks = 0;
 		min_usec = PG_INT64_MAX;
-		for (int i = 0; i < nstate; i++)
+		for (i = 0; i < nstate; i++)
 		{
 			CState	   *st = &state[i];
 
 			if (st->state == CSTATE_SLEEP || st->state == CSTATE_THROTTLE)
 			{
 				/* a nap from the script, or under throttling */
-				pg_time_usec_t this_usec;
+				int64		this_usec;
 
 				/* get current time if needed */
-				pg_time_now_lazy(&now);
+				if (now_usec == 0)
+				{
+					instr_time	now;
+
+					INSTR_TIME_SET_CURRENT(now);
+					now_usec = INSTR_TIME_GET_MICROSEC(now);
+				}
 
 				/* min_usec should be the minimum delay across all clients */
 				this_usec = (st->state == CSTATE_SLEEP ?
-							 st->sleep_until : st->txn_scheduled) - now;
+							 st->sleep_until : st->txn_scheduled) - now_usec;
 				if (min_usec > this_usec)
 					min_usec = this_usec;
 			}
@@ -6667,12 +6672,19 @@ threadRun(void *arg)
 		/* also wake up to print the next progress report on time */
 		if (progress && min_usec > 0 && thread->tid == 0)
 		{
-			pg_time_now_lazy(&now);
+			/* get current time if needed */
+			if (now_usec == 0)
+			{
+				instr_time	now;
 
-			if (now >= next_report)
+				INSTR_TIME_SET_CURRENT(now);
+				now_usec = INSTR_TIME_GET_MICROSEC(now);
+			}
+
+			if (now_usec >= next_report)
 				min_usec = 0;
-			else if ((next_report - now) < min_usec)
-				min_usec = next_report - now;
+			else if ((next_report - now_usec) < min_usec)
+				min_usec = next_report - now_usec;
 		}
 
 		/*
@@ -6721,7 +6733,7 @@ threadRun(void *arg)
 
 		/* ok, advance the state machine of each connection */
 		nsocks = 0;
-		for (int i = 0; i < nstate; i++)
+		for (i = 0; i < nstate; i++)
 		{
 			CState	   *st = &state[i];
 
@@ -6759,8 +6771,11 @@ threadRun(void *arg)
 		/* progress report is made by thread 0 for all threads */
 		if (progress && thread->tid == 0)
 		{
-			pg_time_usec_t now = pg_time_now();
+			instr_time	now_time;
+			int64		now;
 
+			INSTR_TIME_SET_CURRENT(now_time);
+			now = INSTR_TIME_GET_MICROSEC(now_time);
 			if (now >= next_report)
 			{
 				/*
@@ -6778,17 +6793,17 @@ threadRun(void *arg)
 				 */
 				do
 				{
-					next_report += (int64) 1000000 * progress;
+					next_report += (int64) progress * 1000000;
 				} while (now >= next_report);
 			}
 		}
 	}
 
 done:
-	start = pg_time_now();
+	INSTR_TIME_SET_CURRENT(start);
 	disconnect_all(state, nstate);
-	thread->conn_duration += pg_time_now() - start;
-
+	INSTR_TIME_SET_CURRENT(end);
+	INSTR_TIME_ACCUM_DIFF(thread->conn_time, end, start);
 	if (thread->logfile)
 	{
 		if (agg_interval > 0)
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index abdb08319c..ce7963a674 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3347,7 +3347,6 @@ pg_sha512_ctx
 pg_snapshot
 pg_stack_base_t
 pg_time_t
-pg_time_usec_t
 pg_tz
 pg_tz_cache
 pg_tzenum
-- 
2.30.2

Re: pgbench logging broken by time logic changes

Reply via email to