From: Niklas Haas <g...@haasn.dev>

Sometimes, when measuring very small functions, rdtsc is not accurate enough
to get a reliable measurement. This increases the number of runs inside the
inner loop from 4 to 32, which should help a lot. Less important when using
the more precise linux-perf API, but still useful.

There should be no user-visible change since the number of runs is adjusted
to keep the total time spent measuring the same.
---
 tests/checkasm/checkasm.c |  2 +-
 tests/checkasm/checkasm.h | 24 +++++++++++++++++++-----
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 412b8b2cd1..87b75ec36c 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -624,7 +624,7 @@ static inline double avg_cycles_per_call(const CheckasmPerf 
*const p)
     if (p->iterations) {
         const double cycles = (double)(10 * p->cycles) / p->iterations - 
state.nop_time;
         if (cycles > 0.0)
-            return cycles / 4.0; /* 4 calls per iteration */
+            return cycles / 32.0; /* 32 calls per iteration */
     }
     return 0.0;
 }
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index ad239fb2a4..215d64e076 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -340,6 +340,22 @@ typedef struct CheckasmPerf {
 #define PERF_STOP(t)  t = AV_READ_TIME() - t
 #endif
 
+#define CALL4(...)\
+    do {\
+        tfunc(__VA_ARGS__); \
+        tfunc(__VA_ARGS__); \
+        tfunc(__VA_ARGS__); \
+        tfunc(__VA_ARGS__); \
+    } while (0)
+
+#define CALL16(...)\
+    do {\
+        CALL4(__VA_ARGS__); \
+        CALL4(__VA_ARGS__); \
+        CALL4(__VA_ARGS__); \
+        CALL4(__VA_ARGS__); \
+    } while (0)
+
 /* Benchmark the function */
 #define bench_new(...)\
     do {\
@@ -350,14 +366,12 @@ typedef struct CheckasmPerf {
             uint64_t tsum = 0;\
             uint64_t ti, tcount = 0;\
             uint64_t t = 0; \
-            const uint64_t truns = bench_runs;\
+            const uint64_t truns = FFMAX(bench_runs >> 3, 1);\
             checkasm_set_signal_handler_state(1);\
             for (ti = 0; ti < truns; ti++) {\
                 PERF_START(t);\
-                tfunc(__VA_ARGS__);\
-                tfunc(__VA_ARGS__);\
-                tfunc(__VA_ARGS__);\
-                tfunc(__VA_ARGS__);\
+                CALL16(__VA_ARGS__);\
+                CALL16(__VA_ARGS__);\
                 PERF_STOP(t);\
                 if (t*tcount <= tsum*4 && ti > 0) {\
                     tsum += t;\
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to