Hi Tom,

On various Intel CPUs I got either steps close to single nanosecond or
sometimes a little more on older ones

One specific CPU moved in in 2 tick increments while the ration to ns
was 2,1/1 or 2100 ticks per microsecond.

On Zen4 AMD the step seems to  be 10 ns, even though the tick-to-ns
ratio is 2.6 / 1 , so reading ticks directly gives 26, 54, ...

Also, reading directly in ticks on M1 gave "loop time including
overhead: 2.13 ns" (attached code works on Clang, not sure about GCC)


I'll also take a look at the docs and try to propose something

Do we also need tests for this one ?

----
Hannu



On Tue, Jul 2, 2024 at 7:20 PM Tom Lane <t...@sss.pgh.pa.us> wrote:
>
> BTW, getting back to the original point of the thread: I duplicated
> Hannu's result showing that on Apple M1 the clock tick seems to be
> about 40ns.  But look at what I got with the v2 patch on my main
> workstation (full output attached):
>
> $ ./pg_test_timing
> ...
> Per loop time including overhead: 16.60 ns
> ...
> Timing durations less than 128 ns:
>       ns   % of total  running %      count
>       15       3.2738     3.2738    5914914
>       16      49.0772    52.3510   88668783
>       17      36.4662    88.8172   65884173
>       18       9.5639    98.3810   17279249
>       19       1.5746    99.9556    2844873
>       20       0.0416    99.9972      75125
>       21       0.0004    99.9976        757
> ...
>
> It sure looks like this is exact-to-the-nanosecond results,
> since the modal values match the overall per-loop timing,
> and there are no zero measurements.
>
> This is a Dell tower from 2021, running RHEL8 on an Intel Xeon W-2245.
> Not exactly top-of-the-line stuff.
>
>                         regards, tom lane
>
/*
 *	pg_test_timing.c
 *		tests overhead of timing calls and their monotonicity:	that
 *		they always move forward
 */

#include "postgres_fe.h"

#include <limits.h>

#include "getopt_long.h"
#include "portability/instr_time.h"
#include "port/pg_bitutils.h"

static const char *progname;

static unsigned int test_duration = 3;

static void handle_args(int argc, char *argv[]);
static uint64 test_timing(unsigned int duration);
static void output(uint64 loop_count);

/* record duration in powers of 2 nanoseconds */
long long int histogram[64];

/* record duration up to 64 ticks directly */
long long int direct_histogram[64];

/* Current CPU speed in ticks per second*/
uint64 ticks_per_sec;

int
main(int argc, char *argv[])
{
	uint64		loop_count;

	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_timing"));
	progname = get_progname(argv[0]);

	handle_args(argc, argv);

	loop_count = test_timing(test_duration);

	output(loop_count);

	return 0;
}

static void
handle_args(int argc, char *argv[])
{
	static struct option long_options[] = {
		{"duration", required_argument, NULL, 'd'},
		{NULL, 0, NULL, 0}
	};

	int			option;			/* Command line option */
	int			optindex = 0;	/* used by getopt_long */
	unsigned long optval;		/* used for option parsing */
	char	   *endptr;

	if (argc > 1)
	{
		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
		{
			printf(_("Usage: %s [-d DURATION]\n"), progname);
			exit(0);
		}
		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
		{
			puts("pg_test_timing (PostgreSQL) " PG_VERSION);
			exit(0);
		}
	}

	while ((option = getopt_long(argc, argv, "d:",
								 long_options, &optindex)) != -1)
	{
		switch (option)
		{
			case 'd':
				errno = 0;
				optval = strtoul(optarg, &endptr, 10);

				if (endptr == optarg || *endptr != '\0' ||
					errno != 0 || optval != (unsigned int) optval)
				{
					fprintf(stderr, _("%s: invalid argument for option %s\n"),
							progname, "--duration");
					fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
					exit(1);
				}

				test_duration = (unsigned int) optval;
				if (test_duration == 0)
				{
					fprintf(stderr, _("%s: %s must be in range %u..%u\n"),
							progname, "--duration", 1, UINT_MAX);
					exit(1);
				}
				break;

			default:
				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
						progname);
				exit(1);
				break;
		}
	}

	if (argc > optind)
	{
		fprintf(stderr,
				_("%s: too many command-line arguments (first is \"%s\")\n"),
				progname, argv[optind]);
		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
				progname);
		exit(1);
	}


	printf(ngettext("Testing timing overhead for %u second.\n",
					"Testing timing overhead for %u seconds.\n",
					test_duration),
		   test_duration);
}

inline uint64_t read_tsc(void) {
    return __builtin_readcyclecounter();;
}

static uint64
test_timing(unsigned int duration)
{
//	uint64		total_time;
	int64		time_elapsed = 0;
	uint64		loop_count = 0;
	uint64		same_count = 0;
	uint64		min_diff = UINT64_MAX;
	uint64		ticks_elapsed = 0;
	uint64		start_tick,
	            end_tick,
				total_ticks,
	            prev,
				cur;
	instr_time	start_time,
				end_time,
				temp;

//	total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;

    // measure tick timing
	INSTR_TIME_SET_CURRENT(start_time);
	start_tick = read_tsc();
	while (time_elapsed < INT64CONST(1000000000))
	{
		INSTR_TIME_SET_CURRENT(temp);
		INSTR_TIME_SUBTRACT(temp, start_time);
		time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
	}

    end_tick = read_tsc();

	INSTR_TIME_SET_CURRENT(end_time);
	INSTR_TIME_SUBTRACT(end_time, start_time);

	printf(_("Total %ld ticks in %ld ns, %0.6f ticks per ns\n"),
		   end_tick - start_tick, INSTR_TIME_GET_NANOSEC(end_time), ((double)(end_tick - start_tick)) / INSTR_TIME_GET_DOUBLE(end_time));

    ticks_per_sec = (end_tick - start_tick) * 1000000000 / INSTR_TIME_GET_NANOSEC(end_time);
    total_ticks = duration > 0 ? duration * ticks_per_sec : 0;

	printf("This CPU is running at %ld ticks / second, will run test for %ld ticks\n", ticks_per_sec, total_ticks);

//    return 0;


	INSTR_TIME_SET_CURRENT(start_time);

	cur = start_tick = read_tsc();

	while (ticks_elapsed < total_ticks)
	{
		int64		diff;
		int32		bits;

		prev = cur;
		cur = read_tsc();
		diff = cur - prev;

		/* Did time go backwards? */
		if (unlikely(diff <= 0))
		{
			if (likely (diff == 0 ))
				same_count ++;
			else
			{
			fprintf(stderr, _("Detected clock going backwards in time.\n"));
			fprintf(stderr, _("Time warp: %ld ms\n"), diff);
			exit(1);
			}
		}
		if (unlikely(min_diff > diff))
			min_diff = diff;

		if(likely(diff < 64))
			direct_histogram[diff]++;

        bits = pg_leftmost_one_pos64(diff+1);

		histogram[bits]++;


		loop_count++;
		ticks_elapsed = cur - start_tick;
	}

    end_tick = read_tsc();

	INSTR_TIME_SET_CURRENT(end_time);

	INSTR_TIME_SUBTRACT(end_time, start_time);

	printf("loop_count %ld", loop_count);

	printf(_("Per loop time including overhead: %0.2f ns, min: %ld ticks (%0.1f ns), same: %ld\n"),
		   INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count, min_diff, (float) min_diff * 1000000000 / ticks_per_sec , same_count);

	printf(_("Total ticks in: %ld, in: %ld nr\n"),
		   end_tick - start_tick, INSTR_TIME_GET_NANOSEC(end_time) );


	return loop_count;
}

static void
output(uint64 loop_count)
{
	int64		max_bit = 63,
				i;
	char	   *header1 = _("<= ticks");
	char	   *header1a = _("<= ns");
	char	   *header2 = /* xgettext:no-c-format */ _("% of total");
	char	   *header2a = /* xgettext:no-c-format */ _("running %");
	char	   *header3 = _("count");
	int			len1 = strlen(header1);
	int			len1a = strlen(header1a);
	int			len2 = strlen(header2);
	int			len2a = strlen(header2a);
	int			len3 = strlen(header3);
	float       rprct;

	/* find highest bit value */
	while (max_bit > 0 && histogram[max_bit] == 0)
		max_bit--;

	printf(_("Log2(x+1) histogram of timing durations:\n"));
	printf("%*s (%*s)   %*s %*s %*s\n",
		   Max(8, len1), header1,
		   Max(8, len1a), header1a,
		   Max(10, len2), header2,
		   Max(10, len2a), header2a,
		   Max(8, len3), header3);

	for (i = 0, rprct=0; i <= max_bit; i++) {
	    rprct += (double) histogram[i] * 100 / loop_count;
		printf("%*ld (%*.1f)    %*.4f %*.4f %*lld\n",
			   Max(8, len1), (1l << (i+1)) - 2,
			   Max(8, len1a), (float) (1l << i) * 1000000000 / ticks_per_sec,
			   Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count,
			   Max(10, len2) - 1, rprct,
			   Max(10, len3), histogram[i]);
	}
    printf("First 64 ticks --\n");
	for (i = 0, rprct=0; i < 64; i++) {
	    rprct += (double) histogram[i] * 100 / loop_count;
	    if (direct_histogram[i])
			printf("%*ld (%*.1f)    %*.4f %*.4f %*lld\n",
				Max(8, len1), (long int) i,
				Max(8, len1a), (float) (i) * 1000000000 / ticks_per_sec,
				Max(10, len2) - 1, (double) direct_histogram[i] * 100 / loop_count,
		 		Max(10, len2) - 1, rprct,
				Max(10, len3), direct_histogram[i]);
	}

}

Reply via email to