Robert Haas wrote:
I took a look at this and it looks generally good, but I'm wondering
why md.c is converting the results from an exact value to a floating
point, only to have xlog.c turn around and convert back to an integer.
 I think it could just return milliseconds directly, or if you're
worried about a checkpoint that takes more than 24 days to complete,
seconds and microseconds.

Attached patch now does something like this, except without the roll-over concern. INSTR_TIME_GET_MICROSEC returns a uint64 value. I just made that the storage format for all these values until they're converted for display. Test output:

LOG:  checkpoint starting: xlog
DEBUG:  checkpoint sync: number=1 file=base/16385/16480 time=10422.859 msec
DEBUG: checkpoint sync: number=2 file=base/16385/16475_vm time=2896.614 msec
DEBUG:  checkpoint sync: number=3 file=base/16385/16475.1 time=57.836 msec
DEBUG:  checkpoint sync: number=4 file=base/16385/16466 time=20.080 msec
DEBUG:  checkpoint sync: number=5 file=base/16385/16463 time=74.926 msec
DEBUG:  checkpoint sync: number=6 file=base/16385/16482 time=74.263 msec
DEBUG:  checkpoint sync: number=7 file=base/16385/16475_fsm time=7.062 msec
DEBUG:  checkpoint sync: number=8 file=base/16385/16475 time=35.164 msec
LOG: checkpoint complete: wrote 2143 buffers (52.3%); 0 transaction log file(s) added, 0 removed, 3 recycled; write=1.213 s, sync=13.589 s, total=24.744 s; sync files=8, longest=10.422 s, average=1.698 s

This shows the hard truncation used, so 10422.859 msec becomes 10.422 s. I don't think allowing up to 0.999ms of error there is a problem given the expected scale. But since none of the precision is lost until the end, that could be changed with only touching the final display formatting conversion of the value. Following your general idea further, why throw away any resolution inside of md.c; let xlog.c decide how to show it.

Note that I also fixed the DEBUG level lines to only show their actual precision. Before that was printing 6 digits to the right of the decimal point each time, the last three of which were always 0.

--
Greg Smith   2ndQuadrant US    g...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services and Support        www.2ndQuadrant.us
"PostgreSQL 9.0 High Performance": http://www.2ndQuadrant.com/books

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1ed9687..c9778df 100644
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
*************** LogCheckpointEnd(bool restartpoint)
*** 6955,6964 ****
  {
  	long		write_secs,
  				sync_secs,
! 				total_secs;
  	int			write_usecs,
  				sync_usecs,
! 				total_usecs;
  
  	CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
  
--- 6955,6969 ----
  {
  	long		write_secs,
  				sync_secs,
! 				total_secs,
! 				longest_secs,
! 				average_secs;
  	int			write_usecs,
  				sync_usecs,
! 				total_usecs,
! 				longest_usecs,
! 				average_usecs;
! 	uint64		average_sync_time;
  
  	CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
  
*************** LogCheckpointEnd(bool restartpoint)
*** 6974,6991 ****
  						CheckpointStats.ckpt_sync_end_t,
  						&sync_secs, &sync_usecs);
  
  	if (restartpoint)
  		elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
! 			 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
  			 CheckpointStats.ckpt_bufs_written,
  			 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
  			 write_secs, write_usecs / 1000,
  			 sync_secs, sync_usecs / 1000,
! 			 total_secs, total_usecs / 1000);
  	else
  		elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
  			 "%d transaction log file(s) added, %d removed, %d recycled; "
! 			 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
  			 CheckpointStats.ckpt_bufs_written,
  			 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
  			 CheckpointStats.ckpt_segs_added,
--- 6979,7017 ----
  						CheckpointStats.ckpt_sync_end_t,
  						&sync_secs, &sync_usecs);
  
+ 	/*
+ 	 * Timing values returned from CheckpointStats are in microseconds.
+ 	 * Convert to the second plus microsecond form that TimestampDifference
+ 	 * returns for homogeneous printing.
+ 	 */
+ 	longest_secs = (long) (CheckpointStats.ckpt_longest_sync / 1000000);
+ 	longest_usecs = CheckpointStats.ckpt_longest_sync -
+ 		(uint64) longest_secs * 1000000;
+ 
+ 	average_sync_time = 0;
+ 	if (CheckpointStats.ckpt_sync_rels > 0) 
+ 		average_sync_time = CheckpointStats.ckpt_agg_sync_time /
+ 			CheckpointStats.ckpt_sync_rels;
+ 	average_secs = (long) (average_sync_time / 1000000);
+ 	average_usecs = average_sync_time - (uint64) average_secs * 1000000;
+ 
  	if (restartpoint)
  		elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
! 			 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
! 			 "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
  			 CheckpointStats.ckpt_bufs_written,
  			 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
  			 write_secs, write_usecs / 1000,
  			 sync_secs, sync_usecs / 1000,
! 			 total_secs, total_usecs / 1000,
! 			 CheckpointStats.ckpt_sync_rels,
! 			 longest_secs, longest_usecs / 1000,
! 			 average_secs, average_usecs / 1000);
  	else
  		elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
  			 "%d transaction log file(s) added, %d removed, %d recycled; "
! 			 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
! 			 "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
  			 CheckpointStats.ckpt_bufs_written,
  			 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
  			 CheckpointStats.ckpt_segs_added,
*************** LogCheckpointEnd(bool restartpoint)
*** 6993,6999 ****
  			 CheckpointStats.ckpt_segs_recycled,
  			 write_secs, write_usecs / 1000,
  			 sync_secs, sync_usecs / 1000,
! 			 total_secs, total_usecs / 1000);
  }
  
  /*
--- 7019,7028 ----
  			 CheckpointStats.ckpt_segs_recycled,
  			 write_secs, write_usecs / 1000,
  			 sync_secs, sync_usecs / 1000,
! 			 total_secs, total_usecs / 1000,
! 			 CheckpointStats.ckpt_sync_rels,
! 			 longest_secs, longest_usecs / 1000,
! 			 average_secs, average_usecs / 1000);
  }
  
  /*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 604db21..64f9027 100644
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 20,25 ****
--- 20,26 ----
  
  #include "catalog/catalog.h"
  #include "miscadmin.h"
+ #include "portability/instr_time.h"
  #include "postmaster/bgwriter.h"
  #include "storage/fd.h"
  #include "storage/bufmgr.h"
*************** mdsync(void)
*** 927,932 ****
--- 928,940 ----
  	PendingOperationEntry *entry;
  	int			absorb_counter;
  
+ 	/* Statistics on sync times */
+ 	int processed = 0;
+ 	instr_time sync_start, sync_end, sync_diff; 
+ 	uint64 elapsed;
+ 	uint64 longest = 0;
+ 	uint64 total_elapsed = 0;
+ 
  	/*
  	 * This is only called during checkpoints, and checkpoints should only
  	 * occur in processes that have created a pendingOpsTable.
*************** mdsync(void)
*** 1069,1077 ****
--- 1077,1107 ----
  				seg = _mdfd_getseg(reln, entry->tag.forknum,
  							  entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
  								   false, EXTENSION_RETURN_NULL);
+ 
+ 				if (log_checkpoints)
+ 					INSTR_TIME_SET_CURRENT(sync_start);
+ 				else
+ 					INSTR_TIME_SET_ZERO(sync_start);
+ 
  				if (seg != NULL &&
  					FileSync(seg->mdfd_vfd) >= 0)
+ 				{
+ 					if (log_checkpoints && (! INSTR_TIME_IS_ZERO(sync_start)))
+ 					{
+ 						INSTR_TIME_SET_CURRENT(sync_end);
+ 						sync_diff = sync_end;
+ 						INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+ 						elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+ 						if (elapsed > longest)
+ 							longest = elapsed;
+ 						total_elapsed += elapsed;
+ 						processed++;
+ 						elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec", 
+ 							processed, FilePathName(seg->mdfd_vfd), (double) elapsed / 1000);
+ 					}
+ 
  					break;		/* success; break out of retry loop */
+ 				}
  
  				/*
  				 * XXX is there any point in allowing more than one retry?
*************** mdsync(void)
*** 1113,1118 ****
--- 1143,1153 ----
  			elog(ERROR, "pendingOpsTable corrupted");
  	}							/* end loop over hashtable entries */
  
+ 	/* Return sync performance metrics for report at checkpoint end */
+ 	CheckpointStats.ckpt_sync_rels = processed;
+ 	CheckpointStats.ckpt_longest_sync = longest;
+ 	CheckpointStats.ckpt_agg_sync_time = total_elapsed;
+ 
  	/* Flag successful completion of mdsync */
  	mdsync_in_progress = false;
  }
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index fa7ae2a..484cb6c 100644
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
*************** typedef struct CheckpointStatsData
*** 257,262 ****
--- 257,269 ----
  	int			ckpt_segs_added;	/* # of new xlog segments created */
  	int			ckpt_segs_removed;		/* # of xlog segments deleted */
  	int			ckpt_segs_recycled;		/* # of xlog segments recycled */
+ 
+ 	int			ckpt_sync_rels;		/* # of relations synced */
+ 	uint64		ckpt_longest_sync;	/* Longest sync for one relation */
+ 	uint64		ckpt_agg_sync_time;	/* The sum of all the individual sync
+ 									   times, which is not necessarily the
+ 									   same as the total elapsed time for
+ 									   the entire sync phase. */
  } CheckpointStatsData;
  
  extern CheckpointStatsData CheckpointStats;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to