Hi Tom, On various Intel CPUs I got either steps close to single nanosecond or sometimes a little more on older ones
One specific CPU moved in in 2 tick increments while the ration to ns was 2,1/1 or 2100 ticks per microsecond. On Zen4 AMD the step seems to be 10 ns, even though the tick-to-ns ratio is 2.6 / 1 , so reading ticks directly gives 26, 54, ... Also, reading directly in ticks on M1 gave "loop time including overhead: 2.13 ns" (attached code works on Clang, not sure about GCC) I'll also take a look at the docs and try to propose something Do we also need tests for this one ? ---- Hannu On Tue, Jul 2, 2024 at 7:20 PM Tom Lane <t...@sss.pgh.pa.us> wrote: > > BTW, getting back to the original point of the thread: I duplicated > Hannu's result showing that on Apple M1 the clock tick seems to be > about 40ns. But look at what I got with the v2 patch on my main > workstation (full output attached): > > $ ./pg_test_timing > ... > Per loop time including overhead: 16.60 ns > ... > Timing durations less than 128 ns: > ns % of total running % count > 15 3.2738 3.2738 5914914 > 16 49.0772 52.3510 88668783 > 17 36.4662 88.8172 65884173 > 18 9.5639 98.3810 17279249 > 19 1.5746 99.9556 2844873 > 20 0.0416 99.9972 75125 > 21 0.0004 99.9976 757 > ... > > It sure looks like this is exact-to-the-nanosecond results, > since the modal values match the overall per-loop timing, > and there are no zero measurements. > > This is a Dell tower from 2021, running RHEL8 on an Intel Xeon W-2245. > Not exactly top-of-the-line stuff. > > regards, tom lane >
/* * pg_test_timing.c * tests overhead of timing calls and their monotonicity: that * they always move forward */ #include "postgres_fe.h" #include <limits.h> #include "getopt_long.h" #include "portability/instr_time.h" #include "port/pg_bitutils.h" static const char *progname; static unsigned int test_duration = 3; static void handle_args(int argc, char *argv[]); static uint64 test_timing(unsigned int duration); static void output(uint64 loop_count); /* record duration in powers of 2 nanoseconds */ long long int histogram[64]; /* record duration up to 64 ticks directly */ long long int direct_histogram[64]; /* Current CPU speed in ticks per second*/ uint64 ticks_per_sec; int main(int argc, char *argv[]) { uint64 loop_count; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_timing")); progname = get_progname(argv[0]); handle_args(argc, argv); loop_count = test_timing(test_duration); output(loop_count); return 0; } static void handle_args(int argc, char *argv[]) { static struct option long_options[] = { {"duration", required_argument, NULL, 'd'}, {NULL, 0, NULL, 0} }; int option; /* Command line option */ int optindex = 0; /* used by getopt_long */ unsigned long optval; /* used for option parsing */ char *endptr; if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { printf(_("Usage: %s [-d DURATION]\n"), progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("pg_test_timing (PostgreSQL) " PG_VERSION); exit(0); } } while ((option = getopt_long(argc, argv, "d:", long_options, &optindex)) != -1) { switch (option) { case 'd': errno = 0; optval = strtoul(optarg, &endptr, 10); if (endptr == optarg || *endptr != '\0' || errno != 0 || optval != (unsigned int) optval) { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "--duration"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } test_duration = (unsigned int) optval; if (test_duration == 0) { fprintf(stderr, _("%s: %s must be in range %u..%u\n"), progname, "--duration", 1, UINT_MAX); exit(1); } break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); break; } } if (argc > optind) { fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"), progname, argv[optind]); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } printf(ngettext("Testing timing overhead for %u second.\n", "Testing timing overhead for %u seconds.\n", test_duration), test_duration); } inline uint64_t read_tsc(void) { return __builtin_readcyclecounter();; } static uint64 test_timing(unsigned int duration) { // uint64 total_time; int64 time_elapsed = 0; uint64 loop_count = 0; uint64 same_count = 0; uint64 min_diff = UINT64_MAX; uint64 ticks_elapsed = 0; uint64 start_tick, end_tick, total_ticks, prev, cur; instr_time start_time, end_time, temp; // total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0; // measure tick timing INSTR_TIME_SET_CURRENT(start_time); start_tick = read_tsc(); while (time_elapsed < INT64CONST(1000000000)) { INSTR_TIME_SET_CURRENT(temp); INSTR_TIME_SUBTRACT(temp, start_time); time_elapsed = INSTR_TIME_GET_NANOSEC(temp); } end_tick = read_tsc(); INSTR_TIME_SET_CURRENT(end_time); INSTR_TIME_SUBTRACT(end_time, start_time); printf(_("Total %ld ticks in %ld ns, %0.6f ticks per ns\n"), end_tick - start_tick, INSTR_TIME_GET_NANOSEC(end_time), ((double)(end_tick - start_tick)) / INSTR_TIME_GET_DOUBLE(end_time)); ticks_per_sec = (end_tick - start_tick) * 1000000000 / INSTR_TIME_GET_NANOSEC(end_time); total_ticks = duration > 0 ? duration * ticks_per_sec : 0; printf("This CPU is running at %ld ticks / second, will run test for %ld ticks\n", ticks_per_sec, total_ticks); // return 0; INSTR_TIME_SET_CURRENT(start_time); cur = start_tick = read_tsc(); while (ticks_elapsed < total_ticks) { int64 diff; int32 bits; prev = cur; cur = read_tsc(); diff = cur - prev; /* Did time go backwards? */ if (unlikely(diff <= 0)) { if (likely (diff == 0 )) same_count ++; else { fprintf(stderr, _("Detected clock going backwards in time.\n")); fprintf(stderr, _("Time warp: %ld ms\n"), diff); exit(1); } } if (unlikely(min_diff > diff)) min_diff = diff; if(likely(diff < 64)) direct_histogram[diff]++; bits = pg_leftmost_one_pos64(diff+1); histogram[bits]++; loop_count++; ticks_elapsed = cur - start_tick; } end_tick = read_tsc(); INSTR_TIME_SET_CURRENT(end_time); INSTR_TIME_SUBTRACT(end_time, start_time); printf("loop_count %ld", loop_count); printf(_("Per loop time including overhead: %0.2f ns, min: %ld ticks (%0.1f ns), same: %ld\n"), INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count, min_diff, (float) min_diff * 1000000000 / ticks_per_sec , same_count); printf(_("Total ticks in: %ld, in: %ld nr\n"), end_tick - start_tick, INSTR_TIME_GET_NANOSEC(end_time) ); return loop_count; } static void output(uint64 loop_count) { int64 max_bit = 63, i; char *header1 = _("<= ticks"); char *header1a = _("<= ns"); char *header2 = /* xgettext:no-c-format */ _("% of total"); char *header2a = /* xgettext:no-c-format */ _("running %"); char *header3 = _("count"); int len1 = strlen(header1); int len1a = strlen(header1a); int len2 = strlen(header2); int len2a = strlen(header2a); int len3 = strlen(header3); float rprct; /* find highest bit value */ while (max_bit > 0 && histogram[max_bit] == 0) max_bit--; printf(_("Log2(x+1) histogram of timing durations:\n")); printf("%*s (%*s) %*s %*s %*s\n", Max(8, len1), header1, Max(8, len1a), header1a, Max(10, len2), header2, Max(10, len2a), header2a, Max(8, len3), header3); for (i = 0, rprct=0; i <= max_bit; i++) { rprct += (double) histogram[i] * 100 / loop_count; printf("%*ld (%*.1f) %*.4f %*.4f %*lld\n", Max(8, len1), (1l << (i+1)) - 2, Max(8, len1a), (float) (1l << i) * 1000000000 / ticks_per_sec, Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count, Max(10, len2) - 1, rprct, Max(10, len3), histogram[i]); } printf("First 64 ticks --\n"); for (i = 0, rprct=0; i < 64; i++) { rprct += (double) histogram[i] * 100 / loop_count; if (direct_histogram[i]) printf("%*ld (%*.1f) %*.4f %*.4f %*lld\n", Max(8, len1), (long int) i, Max(8, len1a), (float) (i) * 1000000000 / ticks_per_sec, Max(10, len2) - 1, (double) direct_histogram[i] * 100 / loop_count, Max(10, len2) - 1, rprct, Max(10, len3), direct_histogram[i]); } }