Attached is the pg_test_timing utility portion of this submission, broken out into its own patch. It's a contrib module modeled on pg_test_fsync.

The documentation is still a bit rough, I'm not done with that yet. I have included an example of good timing results, switching to a bad clock source, and the resulting bad results. Code review found some formatting things to nitpick I've already fixed: non-standard brace locations and not including enough spaces in expressions were the main two.

This is now referenced by the existing cryptic documentation comment around EXPLAIN ANALYZE, which says that overhead can be high because gettimeofday is slow on some systems. Since this utility measures that directly, I think it's a clear win to include it just for that purpose. The fact that there are more places coming where timing overhead matters is also true. But this existing one is already bad enough to justify shipping something to help measure/manage it in my mind.

--
Greg Smith   2ndQuadrant US    g...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com

diff --git a/contrib/Makefile b/contrib/Makefile
index 0c238aa..45b601c 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -35,6 +35,7 @@ SUBDIRS = \
 		pg_standby	\
 		pg_stat_statements \
 		pg_test_fsync	\
+		pg_test_timing	\
 		pg_trgm		\
 		pg_upgrade	\
 		pg_upgrade_support \
diff --git a/contrib/pg_test_timing/Makefile b/contrib/pg_test_timing/Makefile
new file mode 100644
index 0000000..b8b266a
--- /dev/null
+++ b/contrib/pg_test_timing/Makefile
@@ -0,0 +1,18 @@
+# contrib/pg_test_timing/Makefile
+
+PGFILEDESC = "pg_test_timing - test timing overhead"
+PGAPPICON = win32
+
+PROGRAM  = pg_test_timing
+OBJS = pg_test_timing.o
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_test_timing
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_test_timing/pg_test_timing.c b/contrib/pg_test_timing/pg_test_timing.c
new file mode 100644
index 0000000..bcf3c3a
--- /dev/null
+++ b/contrib/pg_test_timing/pg_test_timing.c
@@ -0,0 +1,157 @@
+/*
+ *	pg_test_timing.c
+ *		tests overhead of timing calls and their monoticity:  that
+ * 		they always move forward
+ */
+
+#include "postgres_fe.h"
+
+#include "getopt_long.h"
+#include "portability/instr_time.h"
+
+static const char *progname;
+
+static int32 test_duration = 3;
+
+static void handle_args(int argc, char *argv[]);
+static void test_timing(int32);
+
+int
+main(int argc, char *argv[])
+{
+	progname = get_progname(argv[0]);
+
+	handle_args(argc, argv);
+
+	test_timing(test_duration);
+
+	return 0;
+}
+
+static void
+handle_args(int argc, char *argv[])
+{
+	static struct option long_options[] = {
+		{"duration", required_argument, NULL, 'd'},
+		{NULL, 0, NULL, 0}
+	};
+	int option;			/* Command line option */
+	int	optindex = 0;	/* used by getopt_long */
+
+	if (argc > 1)
+	{
+		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0 ||
+			strcmp(argv[1], "-?") == 0)
+		{
+			printf("Usage: %s [-d DURATION]\n", progname);
+			exit(0);
+		}
+		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+		{
+			puts("pg_test_timing (PostgreSQL) " PG_VERSION);
+			exit(0);
+		}
+	}
+
+	while ((option = getopt_long(argc, argv, "d:",
+								 long_options, &optindex)) != -1)
+	{
+		switch (option)
+		{
+			case 'd':
+				test_duration = atoi(optarg);
+				break;
+
+			default:
+				fprintf(stderr, "Try \"%s --help\" for more information.\n",
+						progname);
+				exit(1);
+				break;
+		}
+	}
+
+	if (argc > optind)
+	{
+		fprintf(stderr,
+				"%s: too many command-line arguments (first is \"%s\")\n",
+				progname, argv[optind]);
+		fprintf(stderr, "Try \"%s --help\" for more information.\n",
+				progname);
+		exit(1);
+	}
+
+	if (test_duration > 0)
+	{
+		printf("Testing timing overhead for %d seconds.\n", test_duration);
+	}
+	else
+	{
+		printf("Testing timing was interrupted.\n");
+	}
+}
+
+static void
+test_timing(int32 duration)
+{
+	uint64 total_time;
+	int64 time_elapsed = 0;
+	uint64 loop_count = 0;
+	uint64 prev, cur;
+	int32 diff, i, bits, found;
+
+	instr_time start_time, end_time, temp;
+
+	static int64 histogram[32];
+
+	total_time = duration > 0 ? duration * 1000000 : 0;
+
+	INSTR_TIME_SET_CURRENT(start_time);
+	cur = INSTR_TIME_GET_MICROSEC(start_time);
+
+	while (time_elapsed < total_time)
+	{
+		prev = cur;
+		INSTR_TIME_SET_CURRENT(temp);
+		cur = INSTR_TIME_GET_MICROSEC(temp);
+		diff = cur - prev;
+
+		if (diff < 0)
+		{
+			printf("Detected clock going backwards in time.\n");
+			printf("Time warp: %d microseconds\n", diff);
+			exit(1);
+		}
+
+		bits = 0;
+		while (diff)
+		{
+			diff >>= 1;
+			bits++;
+		}
+		histogram[bits]++;
+
+		loop_count++;
+		INSTR_TIME_SUBTRACT(temp, start_time);
+		time_elapsed = INSTR_TIME_GET_MICROSEC(temp);
+	}
+
+	INSTR_TIME_SET_CURRENT(end_time);
+
+	INSTR_TIME_SUBTRACT(end_time, start_time);
+
+	printf("Per timing duration including loop overhead: %0.2f ns\n",
+			INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count);
+	printf("Histogram of timing durations:\n");
+	printf("%9s: %10s %9s\n", "< usec", "count", "percent");
+
+	found = 0;
+    for (i = 31; i >= 0; i--)
+    {
+        if (found || histogram[i])
+        {
+            found = 1;
+            printf("%9ld: %10ld %8.5f%%\n", 1l << i, histogram[i],
+                (double) histogram[i] * 100 / loop_count);
+        }
+    }
+}
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index adf09ca..b418688 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -121,6 +121,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
  &pgstatstatements;
  &pgstattuple;
  &pgtestfsync;
+ &pgtesttiming;
  &pgtrgm;
  &pgupgrade;
  &seg;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index b96dd65..38c9334 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -129,6 +129,7 @@
 <!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
 <!ENTITY pgstattuple     SYSTEM "pgstattuple.sgml">
 <!ENTITY pgtestfsync     SYSTEM "pgtestfsync.sgml">
+<!ENTITY pgtesttiming    SYSTEM "pgtesttiming.sgml">
 <!ENTITY pgtrgm          SYSTEM "pgtrgm.sgml">
 <!ENTITY pgupgrade       SYSTEM "pgupgrade.sgml">
 <!ENTITY seg             SYSTEM "seg.sgml">
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index 8e695fd..5a8c6fc 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -770,7 +770,9 @@ ROLLBACK;
     network transmission costs and I/O conversion costs are not included.
     Second, the measurement overhead added by <command>EXPLAIN
     ANALYZE</command> can be significant, especially on machines with slow
-    <function>gettimeofday()</> operating-system calls.
+    <function>gettimeofday()</> operating-system calls. You can use the
+    <xref linkend="pgtesttiming"> tool to measure the overhead of timing
+    on your system.
    </para>
 
    <para>
diff --git a/doc/src/sgml/pgtesttiming.sgml b/doc/src/sgml/pgtesttiming.sgml
new file mode 100644
index 0000000..f330265
--- /dev/null
+++ b/doc/src/sgml/pgtesttiming.sgml
@@ -0,0 +1,116 @@
+<!-- doc/src/sgml/pgtesttiming.sgml -->
+
+<sect1 id="pgtesttiming" xreflabel="pg_test_timing">
+ <title>pg_test_timing</title>
+
+ <indexterm zone="pgtesttiming">
+  <primary>pg_test_timing</primary>
+ </indexterm>
+
+ <para>
+  <application>pg_test_timing</> is a tool to measure the timing overhead
+  on your system and confirm that the system time never moves backwards.
+ </para>
+
+ <sect2>
+  <title>Usage</title>
+
+<synopsis>
+pg_test_timing [options]
+</synopsis>
+
+   <para>
+    <application>pg_test_timing</application> accepts the following
+    command-line options:
+
+    <variablelist>
+
+     <varlistentry>
+      <term><option>-d</option></term>
+      <term><option>--duration</option></term>
+      <listitem>
+       <para>
+        Specifies the number of seconds to run the test. Longer durations
+        give slightly better accuracy, and are more likely to discover
+        problems with the system clock moving backwards. The default
+        test duration is 3 seconds.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+
+ </sect2>
+
+ <sect2>
+  <title>Interpreting results</title>
+  <para>
+        Collecting accurate timing information on 
+        Linux includes the options tsc, hpet, and acpi_pm 
+  </para>
+  <para>
+        Good results will show most individual timing calls take less
+        than one nanoseconds, such as this example from an Intel i7-860
+        system using the tsc clock source:
+
+<screen>
+Testing timing overhead for 3 seconds.
+Per timing duration including loop overhead: 35.96 ns
+Histogram of timing durations:
+   < usec:      count   percent
+       16:          2  0.00000%
+        8:         13  0.00002%
+        4:        126  0.00015%
+        2:    2999652  3.59518%
+        1:   80435604 96.40465%
+</screen>
+
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Changing time sources</title>
+  <para>
+        On Linux systems, it's possible to change the clock source
+        used to collect timing data at any time.  A second example
+        shows the slowdown possible from switching to the slower acpi_pm
+        time source, on the same system used for the fast results above:
+
+<screen>
+# cat /sys/devices/system/clocksource/clocksource0/available_clocksource 
+tsc hpet acpi_pm
+# echo acpi_pm > /sys/devices/system/clocksource/clocksource0/current_clocksource
+# pg_test_timing
+Per timing duration including loop overhead: 722.92 ns
+Histogram of timing durations:
+   < usec:      count   percent
+       16:          3  0.00007%
+        8:        563  0.01357%
+        4:       3241  0.07810%
+        2:    2990371 72.05956%
+        1:    1155682 27.84870%
+</screen>
+
+  </para>
+
+  <para>
+        The tsc clock source is the most accurate one available on current
+        generation CPUs.  However, trying to use it on older CPUs sometimes
+        resulted in the reported time being inconsistent among multiple cores.
+        Newer versions of Linux will check for this and switch to a slower,
+        more stable clock sources instead.  Older ones would allow using tsc
+        in situations where it's now known to be inaccurate, which can
+        result in system instability.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Author</title>
+
+  <para>
+   Ants Aasma <email>ants.aa...@eesti.ee</email>
+  </para>
+ </sect2>
+
+</sect1>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to