Timestamping in the hardware latency detector uses sched_clock() underneath and depends on CONFIG_GENERIC_SCHED_CLOCK=n because sched clocks from that subsystem are not NMI safe.
ktime_get_mono_fast_ns() is NMI safe and available on all architectures. Replace the time getter, get rid of the CONFIG_GENERIC_SCHED_CLOCK=n dependency and cleanup the horrible macro maze which encapsulates u64 math in u64 macros. Signed-off-by: Thomas Gleixner <t...@linutronix.de> --- kernel/trace/trace_hwlat.c | 59 +++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 34 deletions(-) --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -131,29 +131,19 @@ static void trace_hwlat_sample(struct hw trace_buffer_unlock_commit_nostack(buffer, event); } -/* Macros to encapsulate the time capturing infrastructure */ -#define time_type u64 -#define time_get() trace_clock_local() -#define time_to_us(x) div_u64(x, 1000) -#define time_sub(a, b) ((a) - (b)) -#define init_time(a, b) (a = b) -#define time_u64(a) a - +/* + * Timestamping uses ktime_get_mono_fast(), the NMI safe access to + * CLOCK_MONOTONIC. + */ void trace_hwlat_callback(bool enter) { if (smp_processor_id() != nmi_cpu) return; - /* - * Currently trace_clock_local() calls sched_clock() and the - * generic version is not NMI safe. - */ - if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { - if (enter) - nmi_ts_start = time_get(); - else - nmi_total_ts += time_get() - nmi_ts_start; - } + if (enter) + nmi_ts_start = ktime_get_mono_fast_ns(); + else + nmi_total_ts += ktime_get_mono_fast_ns() - nmi_ts_start; if (enter) nmi_count++; @@ -165,20 +155,22 @@ void trace_hwlat_callback(bool enter) * Used to repeatedly capture the CPU TSC (or similar), looking for potential * hardware-induced latency. Called with interrupts disabled and with * hwlat_data.lock held. + * + * Use ktime_get_mono_fast() here as well because it does not wait on the + * timekeeping seqcount like ktime_get_mono(). */ static int get_sample(void) { struct trace_array *tr = hwlat_trace; struct hwlat_sample s; - time_type start, t1, t2, last_t2; + u64 start, t1, t2, last_t2, thresh; s64 diff, outer_diff, total, last_total = 0; u64 sample = 0; - u64 thresh = tracing_thresh; u64 outer_sample = 0; int ret = -1; unsigned int count = 0; - do_div(thresh, NSEC_PER_USEC); /* modifies interval value */ + thresh = div_u64(tracing_thresh, NSEC_PER_USEC); nmi_cpu = smp_processor_id(); nmi_total_ts = 0; @@ -188,18 +180,20 @@ static int get_sample(void) trace_hwlat_callback_enabled = true; - init_time(last_t2, 0); - start = time_get(); /* start timestamp */ + /* start timestamp */ + start = ktime_get_mono_fast_ns(); outer_diff = 0; + last_t2 = 0; do { - t1 = time_get(); /* we'll look for a discontinuity */ - t2 = time_get(); + /* we'll look for a discontinuity */ + t1 = ktime_get_mono_fast_ns(); + t2 = ktime_get_mono_fast_ns(); - if (time_u64(last_t2)) { + if (last_t2) { /* Check the delta from outer loop (t2 to next t1) */ - outer_diff = time_to_us(time_sub(t1, last_t2)); + outer_diff = div_u64(t1 - last_t2, NSEC_PER_USEC); /* This shouldn't happen */ if (outer_diff < 0) { pr_err(BANNER "time running backwards\n"); @@ -210,7 +204,8 @@ static int get_sample(void) } last_t2 = t2; - total = time_to_us(time_sub(t2, start)); /* sample width */ + /* sample width */ + total = div_u64(t2 - start, NSEC_PER_USEC); /* Check for possible overflows */ if (total < last_total) { @@ -220,7 +215,7 @@ static int get_sample(void) last_total = total; /* This checks the inner loop (t1 to t2) */ - diff = time_to_us(time_sub(t2, t1)); /* current diff */ + diff = div_u64(t2 - t1, NSEC_PER_USEC); if (diff > thresh || outer_diff > thresh) { if (!count) @@ -251,15 +246,11 @@ static int get_sample(void) ret = 1; - /* We read in microseconds */ - if (nmi_total_ts) - do_div(nmi_total_ts, NSEC_PER_USEC); - hwlat_data.count++; s.seqnum = hwlat_data.count; s.duration = sample; s.outer_duration = outer_sample; - s.nmi_total_ts = nmi_total_ts; + s.nmi_total_ts = div_u64(nmi_total_ts, NSEC_PER_USEC); s.nmi_count = nmi_count; s.count = count; trace_hwlat_sample(&s);