On Wed, Sep 14, 2016 at 14:53:14 +0100, Alex Bennée wrote: > Richard Henderson <r...@twiddle.net> writes: > > From: "Emilio G. Cota" <c...@braap.org> > > QEMU_CFLAGS += -I$(SRC_PATH)/tests > > @@ -465,6 +466,7 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o > > $(test-util-obj-y) > > tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y) > > tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) > > $(test-util-obj-y) > > tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y) > > +tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o > > $(test-util-obj-y) > > This probably more properly lives in tests/tcg/generic or some such but > that needs the tcg/tests being rehabilitated into the build system so at > least here it gets built.
I didn't know where to put it; tests/ was easy enough :-) > > tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ > > hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ > > diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c > > new file mode 100644 > > index 0000000..5bbecf6 > > --- /dev/null > > +++ b/tests/atomic_add-bench.c > > I wonder if this would be worth making atomic-bench and adding the other > atomic operations into the benchmark? I know given the current helper > overhead its unlikely to show much difference between the ops but if we > move to backend support for the tcg atomics it would be a useful tool to > have. I'd rather add more ops later if necessary, but if you insist I can do it. (snip) > > +static void create_threads(void) > > +{ > > + unsigned int i; > > + > > + threads = g_new(QemuThread, n_threads); > > + th_info = g_new(struct thread_info, n_threads); > > + counts = qemu_memalign(64, sizeof(*counts) * range); > > This fails on my setup as AFAICT qemu_memalign doesn't give you zeroed > memory. I added a memset after to zero it out. Yes I fixed this more than a month ago, among other things in this program, e.g., running for -d seconds instead of -n operations (much easier way to fairly measure throughput). Obviously forgot to tell anyone about it :/ sorry for making you waste time. I'm appending the appropriate delta -- just checked it applies cleanly over rth's atomic-3 branch on github. Thanks, Emilio >From f4a1a6fe2ffcf9572353f0b85a21ed27cd1765e1 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" <c...@braap.org> Date: Tue, 9 Aug 2016 23:14:13 -0400 Subject: [PATCH] tests: fix atomic_add_bench Signed-off-by: Emilio G. Cota <c...@braap.org> --- tests/atomic_add-bench.c | 51 ++++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c index 06300ba..dc97441 100644 --- a/tests/atomic_add-bench.c +++ b/tests/atomic_add-bench.c @@ -17,14 +17,14 @@ static struct thread_info *th_info; static unsigned int n_threads = 1; static unsigned int n_ready_threads; static struct count *counts; -static unsigned long n_ops = 10000; -static double duration; -static unsigned int range = 1; +static unsigned int duration = 1; +static unsigned int range = 1024; static bool test_start; +static bool test_stop; static const char commands_string[] = " -n = number of threads\n" - " -o = number of ops per thread\n" + " -d = duration in seconds\n" " -r = range (will be rounded up to pow2)"; static void usage_complete(char *argv[]) @@ -49,14 +49,13 @@ static uint64_t xorshift64star(uint64_t x) static void *thread_func(void *arg) { struct thread_info *info = arg; - unsigned long i; atomic_inc(&n_ready_threads); while (!atomic_mb_read(&test_start)) { cpu_relax(); } - for (i = 0; i < n_ops; i++) { + while (!atomic_read(&test_stop)) { unsigned int index; info->r = xorshift64star(info->r); @@ -66,32 +65,23 @@ static void *thread_func(void *arg) return NULL; } -static inline -uint64_t ts_subtract(const struct timespec *a, const struct timespec *b) -{ - uint64_t ns; - - ns = (b->tv_sec - a->tv_sec) * 1000000000ULL; - ns += (b->tv_nsec - a->tv_nsec); - return ns; -} - static void run_test(void) { + unsigned int remaining; unsigned int i; - struct timespec ts_start, ts_end; while (atomic_read(&n_ready_threads) != n_threads) { cpu_relax(); } atomic_mb_set(&test_start, true); + do { + remaining = sleep(duration); + } while (remaining); + atomic_mb_set(&test_stop, true); - clock_gettime(CLOCK_MONOTONIC, &ts_start); for (i = 0; i < n_threads; i++) { qemu_thread_join(&threads[i]); } - clock_gettime(CLOCK_MONOTONIC, &ts_end); - duration = ts_subtract(&ts_start, &ts_end) / 1e9; } static void create_threads(void) @@ -101,6 +91,7 @@ static void create_threads(void) threads = g_new(QemuThread, n_threads); th_info = g_new(struct thread_info, n_threads); counts = qemu_memalign(64, sizeof(*counts) * range); + memset(counts, 0, sizeof(*counts) * range); for (i = 0; i < n_threads; i++) { struct thread_info *info = &th_info[i]; @@ -115,7 +106,7 @@ static void pr_params(void) { printf("Parameters:\n"); printf(" # of threads: %u\n", n_threads); - printf(" n_ops: %lu\n", n_ops); + printf(" duration: %u\n", duration); printf(" ops' range: %u\n", range); } @@ -128,22 +119,20 @@ static void pr_stats(void) for (i = 0; i < range; i++) { val += counts[i].val; } - assert(val == n_threads * n_ops); tx = val / duration / 1e6; printf("Results:\n"); - printf("Duration: %.2f s\n", duration); + printf("Duration: %u s\n", duration); printf(" Throughput: %.2f Mops/s\n", tx); printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads); } static void parse_args(int argc, char *argv[]) { - unsigned long long n_ops_ull; int c; for (;;) { - c = getopt(argc, argv, "hn:o:r:"); + c = getopt(argc, argv, "hd:n:r:"); if (c < 0) { break; } @@ -151,18 +140,12 @@ static void parse_args(int argc, char *argv[]) case 'h': usage_complete(argv); exit(0); + case 'd': + duration = atoi(optarg); + break; case 'n': n_threads = atoi(optarg); break; - case 'o': - n_ops_ull = atoll(optarg); - if (n_ops_ull > ULONG_MAX) { - fprintf(stderr, - "fatal: -o cannot be greater than %lu\n", ULONG_MAX); - exit(1); - } - n_ops = n_ops_ull; - break; case 'r': range = pow2ceil(atoi(optarg)); break; -- 2.5.0