From: Niklas Haas <g...@haasn.dev> Sometimes, when measuring very small functions, rdtsc is not accurate enough to get a reliable measurement. This increases the number of runs inside the inner loop from 4 to 32, which should help a lot. Less important when using the more precise linux-perf API, but still useful.
There should be no user-visible change since the number of runs is adjusted to keep the total time spent measuring the same. --- tests/checkasm/checkasm.c | 2 +- tests/checkasm/checkasm.h | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 412b8b2cd1..87b75ec36c 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -624,7 +624,7 @@ static inline double avg_cycles_per_call(const CheckasmPerf *const p) if (p->iterations) { const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time; if (cycles > 0.0) - return cycles / 4.0; /* 4 calls per iteration */ + return cycles / 32.0; /* 32 calls per iteration */ } return 0.0; } diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index ad239fb2a4..215d64e076 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -340,6 +340,22 @@ typedef struct CheckasmPerf { #define PERF_STOP(t) t = AV_READ_TIME() - t #endif +#define CALL4(...)\ + do {\ + tfunc(__VA_ARGS__); \ + tfunc(__VA_ARGS__); \ + tfunc(__VA_ARGS__); \ + tfunc(__VA_ARGS__); \ + } while (0) + +#define CALL16(...)\ + do {\ + CALL4(__VA_ARGS__); \ + CALL4(__VA_ARGS__); \ + CALL4(__VA_ARGS__); \ + CALL4(__VA_ARGS__); \ + } while (0) + /* Benchmark the function */ #define bench_new(...)\ do {\ @@ -350,14 +366,12 @@ typedef struct CheckasmPerf { uint64_t tsum = 0;\ uint64_t ti, tcount = 0;\ uint64_t t = 0; \ - const uint64_t truns = bench_runs;\ + const uint64_t truns = FFMAX(bench_runs >> 3, 1);\ checkasm_set_signal_handler_state(1);\ for (ti = 0; ti < truns; ti++) {\ PERF_START(t);\ - tfunc(__VA_ARGS__);\ - tfunc(__VA_ARGS__);\ - tfunc(__VA_ARGS__);\ - tfunc(__VA_ARGS__);\ + CALL16(__VA_ARGS__);\ + CALL16(__VA_ARGS__);\ PERF_STOP(t);\ if (t*tcount <= tsum*4 && ti > 0) {\ tsum += t;\ -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".