Attached is the pg_test_timing utility portion of this submission,
broken out into its own patch. It's a contrib module modeled on
pg_test_fsync.
The documentation is still a bit rough, I'm not done with that yet. I
have included an example of good timing results, switching to a bad
clock source, and the resulting bad results. Code review found some
formatting things to nitpick I've already fixed: non-standard brace
locations and not including enough spaces in expressions were the main two.
This is now referenced by the existing cryptic documentation comment
around EXPLAIN ANALYZE, which says that overhead can be high because
gettimeofday is slow on some systems. Since this utility measures that
directly, I think it's a clear win to include it just for that purpose.
The fact that there are more places coming where timing overhead matters
is also true. But this existing one is already bad enough to justify
shipping something to help measure/manage it in my mind.
--
Greg Smith 2ndQuadrant US g...@2ndquadrant.com Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com
diff --git a/contrib/Makefile b/contrib/Makefile
index 0c238aa..45b601c 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -35,6 +35,7 @@ SUBDIRS = \
pg_standby \
pg_stat_statements \
pg_test_fsync \
+ pg_test_timing \
pg_trgm \
pg_upgrade \
pg_upgrade_support \
diff --git a/contrib/pg_test_timing/Makefile b/contrib/pg_test_timing/Makefile
new file mode 100644
index 0000000..b8b266a
--- /dev/null
+++ b/contrib/pg_test_timing/Makefile
@@ -0,0 +1,18 @@
+# contrib/pg_test_timing/Makefile
+
+PGFILEDESC = "pg_test_timing - test timing overhead"
+PGAPPICON = win32
+
+PROGRAM = pg_test_timing
+OBJS = pg_test_timing.o
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_test_timing
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_test_timing/pg_test_timing.c b/contrib/pg_test_timing/pg_test_timing.c
new file mode 100644
index 0000000..bcf3c3a
--- /dev/null
+++ b/contrib/pg_test_timing/pg_test_timing.c
@@ -0,0 +1,157 @@
+/*
+ * pg_test_timing.c
+ * tests overhead of timing calls and their monoticity: that
+ * they always move forward
+ */
+
+#include "postgres_fe.h"
+
+#include "getopt_long.h"
+#include "portability/instr_time.h"
+
+static const char *progname;
+
+static int32 test_duration = 3;
+
+static void handle_args(int argc, char *argv[]);
+static void test_timing(int32);
+
+int
+main(int argc, char *argv[])
+{
+ progname = get_progname(argv[0]);
+
+ handle_args(argc, argv);
+
+ test_timing(test_duration);
+
+ return 0;
+}
+
+static void
+handle_args(int argc, char *argv[])
+{
+ static struct option long_options[] = {
+ {"duration", required_argument, NULL, 'd'},
+ {NULL, 0, NULL, 0}
+ };
+ int option; /* Command line option */
+ int optindex = 0; /* used by getopt_long */
+
+ if (argc > 1)
+ {
+ if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0 ||
+ strcmp(argv[1], "-?") == 0)
+ {
+ printf("Usage: %s [-d DURATION]\n", progname);
+ exit(0);
+ }
+ if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+ {
+ puts("pg_test_timing (PostgreSQL) " PG_VERSION);
+ exit(0);
+ }
+ }
+
+ while ((option = getopt_long(argc, argv, "d:",
+ long_options, &optindex)) != -1)
+ {
+ switch (option)
+ {
+ case 'd':
+ test_duration = atoi(optarg);
+ break;
+
+ default:
+ fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ progname);
+ exit(1);
+ break;
+ }
+ }
+
+ if (argc > optind)
+ {
+ fprintf(stderr,
+ "%s: too many command-line arguments (first is \"%s\")\n",
+ progname, argv[optind]);
+ fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ progname);
+ exit(1);
+ }
+
+ if (test_duration > 0)
+ {
+ printf("Testing timing overhead for %d seconds.\n", test_duration);
+ }
+ else
+ {
+ printf("Testing timing was interrupted.\n");
+ }
+}
+
+static void
+test_timing(int32 duration)
+{
+ uint64 total_time;
+ int64 time_elapsed = 0;
+ uint64 loop_count = 0;
+ uint64 prev, cur;
+ int32 diff, i, bits, found;
+
+ instr_time start_time, end_time, temp;
+
+ static int64 histogram[32];
+
+ total_time = duration > 0 ? duration * 1000000 : 0;
+
+ INSTR_TIME_SET_CURRENT(start_time);
+ cur = INSTR_TIME_GET_MICROSEC(start_time);
+
+ while (time_elapsed < total_time)
+ {
+ prev = cur;
+ INSTR_TIME_SET_CURRENT(temp);
+ cur = INSTR_TIME_GET_MICROSEC(temp);
+ diff = cur - prev;
+
+ if (diff < 0)
+ {
+ printf("Detected clock going backwards in time.\n");
+ printf("Time warp: %d microseconds\n", diff);
+ exit(1);
+ }
+
+ bits = 0;
+ while (diff)
+ {
+ diff >>= 1;
+ bits++;
+ }
+ histogram[bits]++;
+
+ loop_count++;
+ INSTR_TIME_SUBTRACT(temp, start_time);
+ time_elapsed = INSTR_TIME_GET_MICROSEC(temp);
+ }
+
+ INSTR_TIME_SET_CURRENT(end_time);
+
+ INSTR_TIME_SUBTRACT(end_time, start_time);
+
+ printf("Per timing duration including loop overhead: %0.2f ns\n",
+ INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count);
+ printf("Histogram of timing durations:\n");
+ printf("%9s: %10s %9s\n", "< usec", "count", "percent");
+
+ found = 0;
+ for (i = 31; i >= 0; i--)
+ {
+ if (found || histogram[i])
+ {
+ found = 1;
+ printf("%9ld: %10ld %8.5f%%\n", 1l << i, histogram[i],
+ (double) histogram[i] * 100 / loop_count);
+ }
+ }
+}
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index adf09ca..b418688 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -121,6 +121,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
&pgstatstatements;
&pgstattuple;
&pgtestfsync;
+ &pgtesttiming;
&pgtrgm;
&pgupgrade;
&seg;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index b96dd65..38c9334 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -129,6 +129,7 @@
<!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
<!ENTITY pgstattuple SYSTEM "pgstattuple.sgml">
<!ENTITY pgtestfsync SYSTEM "pgtestfsync.sgml">
+<!ENTITY pgtesttiming SYSTEM "pgtesttiming.sgml">
<!ENTITY pgtrgm SYSTEM "pgtrgm.sgml">
<!ENTITY pgupgrade SYSTEM "pgupgrade.sgml">
<!ENTITY seg SYSTEM "seg.sgml">
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index 8e695fd..5a8c6fc 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -770,7 +770,9 @@ ROLLBACK;
network transmission costs and I/O conversion costs are not included.
Second, the measurement overhead added by <command>EXPLAIN
ANALYZE</command> can be significant, especially on machines with slow
- <function>gettimeofday()</> operating-system calls.
+ <function>gettimeofday()</> operating-system calls. You can use the
+ <xref linkend="pgtesttiming"> tool to measure the overhead of timing
+ on your system.
</para>
<para>
diff --git a/doc/src/sgml/pgtesttiming.sgml b/doc/src/sgml/pgtesttiming.sgml
new file mode 100644
index 0000000..f330265
--- /dev/null
+++ b/doc/src/sgml/pgtesttiming.sgml
@@ -0,0 +1,116 @@
+<!-- doc/src/sgml/pgtesttiming.sgml -->
+
+<sect1 id="pgtesttiming" xreflabel="pg_test_timing">
+ <title>pg_test_timing</title>
+
+ <indexterm zone="pgtesttiming">
+ <primary>pg_test_timing</primary>
+ </indexterm>
+
+ <para>
+ <application>pg_test_timing</> is a tool to measure the timing overhead
+ on your system and confirm that the system time never moves backwards.
+ </para>
+
+ <sect2>
+ <title>Usage</title>
+
+<synopsis>
+pg_test_timing [options]
+</synopsis>
+
+ <para>
+ <application>pg_test_timing</application> accepts the following
+ command-line options:
+
+ <variablelist>
+
+ <varlistentry>
+ <term><option>-d</option></term>
+ <term><option>--duration</option></term>
+ <listitem>
+ <para>
+ Specifies the number of seconds to run the test. Longer durations
+ give slightly better accuracy, and are more likely to discover
+ problems with the system clock moving backwards. The default
+ test duration is 3 seconds.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </para>
+
+ </sect2>
+
+ <sect2>
+ <title>Interpreting results</title>
+ <para>
+ Collecting accurate timing information on
+ Linux includes the options tsc, hpet, and acpi_pm
+ </para>
+ <para>
+ Good results will show most individual timing calls take less
+ than one nanoseconds, such as this example from an Intel i7-860
+ system using the tsc clock source:
+
+<screen>
+Testing timing overhead for 3 seconds.
+Per timing duration including loop overhead: 35.96 ns
+Histogram of timing durations:
+ < usec: count percent
+ 16: 2 0.00000%
+ 8: 13 0.00002%
+ 4: 126 0.00015%
+ 2: 2999652 3.59518%
+ 1: 80435604 96.40465%
+</screen>
+
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Changing time sources</title>
+ <para>
+ On Linux systems, it's possible to change the clock source
+ used to collect timing data at any time. A second example
+ shows the slowdown possible from switching to the slower acpi_pm
+ time source, on the same system used for the fast results above:
+
+<screen>
+# cat /sys/devices/system/clocksource/clocksource0/available_clocksource
+tsc hpet acpi_pm
+# echo acpi_pm > /sys/devices/system/clocksource/clocksource0/current_clocksource
+# pg_test_timing
+Per timing duration including loop overhead: 722.92 ns
+Histogram of timing durations:
+ < usec: count percent
+ 16: 3 0.00007%
+ 8: 563 0.01357%
+ 4: 3241 0.07810%
+ 2: 2990371 72.05956%
+ 1: 1155682 27.84870%
+</screen>
+
+ </para>
+
+ <para>
+ The tsc clock source is the most accurate one available on current
+ generation CPUs. However, trying to use it on older CPUs sometimes
+ resulted in the reported time being inconsistent among multiple cores.
+ Newer versions of Linux will check for this and switch to a slower,
+ more stable clock sources instead. Older ones would allow using tsc
+ in situations where it's now known to be inaccurate, which can
+ result in system instability.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Author</title>
+
+ <para>
+ Ants Aasma <email>ants.aa...@eesti.ee</email>
+ </para>
+ </sect2>
+
+</sect1>
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers