The branch main has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=36ae5ce2f2fda35763c2655a19bf1b0ee22fdf3c

commit 36ae5ce2f2fda35763c2655a19bf1b0ee22fdf3c
Author:     Mark Johnston <ma...@freebsd.org>
AuthorDate: 2025-03-10 03:01:13 +0000
Commit:     Mark Johnston <ma...@freebsd.org>
CommitDate: 2025-03-10 03:01:13 +0000

    dtrace/arm64: Fix dtrace_gethrtime()
    
    This routine returns a monotonic count of the number of nanoseconds elapsed
    since the previous call.  On arm64 it uses the generic system timer.  The
    implementation multiplies the counter value by 10**9 then divides by the 
counter
    frequency, but this multiplication can overflow.  This can result in trace
    records with non-monotonic timestamps, which breaks libdtrace's temporal
    ordering algorithm.
    
    An easy fix is to reverse the order of operations, since the counter 
frequency
    will in general be smaller than 10**9.  (In fact, it's mandated to be 1Ghz 
in
    ARMv9, which makes life simple.)  However, this can give a fair bit of 
error.
    Adopt the calculation used on amd64, with tweaks to handle frequencies as 
low as
    1MHz: the ARM generic timer documentation suggests that ARMv8 timers are
    typically in the 1MHz-50MHz range, which is true on arm64 systems that I 
have
    access to.
    
    MFC after:      2 weeks
    Sponsored by:   Innovate UK
    Differential Revision:  https://reviews.freebsd.org/D49244
---
 sys/cddl/dev/dtrace/aarch64/dtrace_subr.c | 33 +++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/sys/cddl/dev/dtrace/aarch64/dtrace_subr.c 
b/sys/cddl/dev/dtrace/aarch64/dtrace_subr.c
index 20418e3a475b..32e84d8fbfe9 100644
--- a/sys/cddl/dev/dtrace/aarch64/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/aarch64/dtrace_subr.c
@@ -151,6 +151,32 @@ dtrace_sync(void)
        dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
 }
 
+static uint64_t nsec_scale;
+
+#define SCALE_SHIFT    25
+
+/*
+ * Choose scaling factors which let us convert a cntvct_el0 value to 
nanoseconds
+ * without overflow, as in the amd64 implementation.
+ *
+ * Documentation for the ARM generic timer states that typical counter
+ * frequencies are in the range 1Mhz-50Mhz; in ARMv9 the frequency is fixed at
+ * 1GHz.  The lower bound of 1MHz forces the shift to be at most 25 bits.  At
+ * that frequency, the calculation (hi * scale) << (32 - shift) will not
+ * overflow for over 100 years, assuming that the counter value starts at 0 
upon
+ * boot.
+ */
+static void
+dtrace_gethrtime_init(void *arg __unused)
+{
+       uint64_t freq;
+
+       freq = READ_SPECIALREG(cntfrq_el0);
+       nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / freq;
+}
+SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,
+    dtrace_gethrtime_init, NULL);
+
 /*
  * DTrace needs a high resolution time function which can be called from a
  * probe context and guaranteed not to have instrumented with probes itself.
@@ -161,10 +187,13 @@ uint64_t
 dtrace_gethrtime(void)
 {
        uint64_t count, freq;
+       uint32_t lo, hi;
 
        count = READ_SPECIALREG(cntvct_el0);
-       freq = READ_SPECIALREG(cntfrq_el0);
-       return ((1000000000UL * count) / freq);
+       lo = count;
+       hi = count >> 32;
+       return (((lo * nsec_scale) >> SCALE_SHIFT) +
+           ((hi * nsec_scale) << (32 - SCALE_SHIFT)));
 }
 
 /*

Reply via email to