Allow Ftrace, Perf, and LTTng to hook on the irqsoff tracer and trace
irqsoff and preemptoff critical sections.

How to setup the irqsoff tracer for this use-case (needed for use with
Ftrace, Perf, or LTTng): if the tracing_thresh value is set, all
durations beyond the specified threshold will emit a
"core_critical_timing_hit" tracepoint. If unset, then the max duration
will be tracked. The current max duration can be reset using the
tracing_max_latency file.

echo 0 > /sys/kernel/debug/tracing/options/function-trace  # don't need 
function tracing
echo 1 > /sys/kernel/debug/tracing/options/quiet       # silence ftrace buffers
echo preemptirqsoff > /sys/kernel/debug/tracing/current_tracer
echo 2 > /sys/kernel/debug/tracing/tracing_thresh      # set threshold (e.g. 2 
µs)
echo 1 > /sys/kernel/debug/tracing/tracing_on

The following two commands can also be useful to tweak the
irqsoff tracer:
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency # reset max latency
echo 0 > /sys/kernel/debug/tracing/tracing_thresh      # disable threshold

*** An example usage of Ftrace hooking on this tracepoint: ***

echo 1 > /sys/kernel/debug/tracing/events/core/core_critical_timing_hit/enable
cat /sys/kernel/debug/tracing/trace_pipe
[...]
  <idle>-0      17d..3 189981402us : core_critical_timing_hit: 
ip=0xffffffff810fa2f9 parent_ip=0xffffffff8108c31b start_ip=0xffffffff8108c31b 
delta_ns=2193 irqs_disabled=1 preempt_disabled=1 in_softirq=0 in_irq=0 in_nmi=0

*** An example usage of Perf hooking on this tracepoint: ***

perf record -g -e core:core_critical_timing_hit -c 1 sleep 30
perf report -g

+  100.00%   100.00%  sleep    [kernel.vmlinux]  [k] check_critical_timing
+   92.75%     0.00%  sleep    [kernel.vmlinux]  [k] preempt_count_sub
+   92.75%     0.00%  sleep    [kernel.vmlinux]  [k] trace_preempt_on
+   65.22%     0.00%  sleep    [kernel.vmlinux]  [k] page_fault
+   65.22%     0.00%  sleep    [kernel.vmlinux]  [k] do_page_fault
+   63.77%     0.00%  sleep    [kernel.vmlinux]  [k] __do_page_fault
+   63.77%     0.00%  sleep    [kernel.vmlinux]  [k] handle_mm_fault
+   56.52%     0.00%  sleep    [kernel.vmlinux]  [k] _raw_spin_unlock
+   30.43%     0.00%  sleep    ld-2.19.so        [.] _dl_sysdep_start
[...]

*** An example usage of LTTng hooking on this tracepoint: ***

(as root)
lttng-sessiond -d

(then, as root or tracing group)
lttng create --live
lttng enable-event -k core_critical_timing_hit
lttng start
lttng view
[...]

[05:13:25.432692322] (+0.026847647) compudjdev core_critical_timing_hit: { 
cpu_id = 3 }, { ip = 0xFFFFFFFF810FA2F9, parent_ip = 0xFFFFFFFF8108C31B, 
start_ip = 0xFFFFFFFF8108C31B, delta_ns = 2060, irqs_disabled = 1, 
preempt_disabled = 1, in_softirq = 0, in_irq = 0, in_nmi = 0 }

Tracepoints core_critical_timing_start and core_critical_timing_stop are
also available. They trigger at a much higher rate than
core_critical_timing_hit.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoy...@efficios.com>
CC: Thomas Gleixner <t...@linutronix.de>
CC: Steven Rostedt <rost...@goodmis.org>
CC: Ingo Molnar <mi...@redhat.com>
CC: Peter Zijlstra <pet...@infradead.org>
---
Open question about perf use-case: we currently get
"  Warning: [core:core_critical_timing_hit] bad op token {"
warning when doing "perf record -g -e core:core_critical_timing_hit -c 1 sleep 
30".
This should be investigated.

Changes since v2:
- add start ip to hit event,
- rebase on per-tracer "quiet" flag commit.

Changes since v1:
- Remove unneeded tracepoint jump label bypass.
- Enhance TP_printk() formatting.
- Use new ftrace-buffer option in changelog.
- Add core_critical_timing_start/stop tracepoints.
---
 include/trace/events/core.h  | 98 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_irqsoff.c | 18 +++++---
 2 files changed, 111 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/core.h

diff --git a/include/trace/events/core.h b/include/trace/events/core.h
new file mode 100644
index 0000000..76e0efb
--- /dev/null
+++ b/include/trace/events/core.h
@@ -0,0 +1,98 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM core
+
+#if !defined(_TRACE_CORE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CORE_H
+
+#include <linux/tracepoint.h>
+#include <linux/irqflags.h>
+#include <linux/preempt.h>
+
+/*
+ * Tracepoint for critical timings max/threshold hit.
+ */
+TRACE_EVENT(core_critical_timing_hit,
+
+       TP_PROTO(unsigned long ip, unsigned long parent_ip,
+                       unsigned long start_ip, unsigned long flags,
+                       int preempt_cnt, cycles_t delta_ns),
+
+       TP_ARGS(ip, parent_ip, start_ip, flags, preempt_cnt, delta_ns),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long, ip               )
+               __field(        unsigned long, parent_ip        )
+               __field(        unsigned long, start_ip         )
+               __field(        cycles_t, delta_ns              )
+               __field(        unsigned long, flags            )
+               __field(        int, preempt_cnt                )
+       ),
+
+       TP_fast_assign(
+               __entry->ip             = ip;
+               __entry->parent_ip      = parent_ip;
+               __entry->start_ip       = start_ip;
+               __entry->delta_ns       = delta_ns;
+               __entry->flags          = flags;
+               __entry->preempt_cnt    = preempt_cnt;
+       ),
+
+       TP_printk("ip=0x%lx parent_ip=0x%lx start_ip=0x%lx delta_ns=%llu 
irqs_disabled=%u preempt_disabled=%u in_softirq=%u in_irq=%u in_nmi=%u",
+                 __entry->ip, __entry->parent_ip, __entry->start_ip,
+                 (unsigned long long) __entry->delta_ns,
+                 !!raw_irqs_disabled_flags(__entry->flags),
+                 !!(__entry->preempt_cnt & PREEMPT_MASK),
+                 !!(__entry->preempt_cnt & SOFTIRQ_MASK),
+                 !!(__entry->preempt_cnt & HARDIRQ_MASK),
+                 !!(__entry->preempt_cnt & NMI_MASK))
+);
+
+/*
+ * Tracepoint for critical timings start/stop.
+ */
+DECLARE_EVENT_CLASS(core_critical_timing,
+
+       TP_PROTO(unsigned long ip, unsigned long parent_ip,
+                       unsigned long flags, int preempt_cnt),
+
+       TP_ARGS(ip, parent_ip, flags, preempt_cnt),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long, ip               )
+               __field(        unsigned long, parent_ip        )
+               __field(        unsigned long, flags            )
+               __field(        int, preempt_cnt                )
+       ),
+
+       TP_fast_assign(
+               __entry->ip             = ip;
+               __entry->parent_ip      = parent_ip;
+               __entry->flags          = flags;
+               __entry->preempt_cnt    = preempt_cnt;
+       ),
+
+       TP_printk("ip=0x%lx parent_ip=0x%lx irqs_disabled=%u 
preempt_disabled=%u in_softirq=%u in_irq=%u in_nmi=%u",
+                 __entry->ip, __entry->parent_ip,
+                 !!raw_irqs_disabled_flags(__entry->flags),
+                 !!(__entry->preempt_cnt & PREEMPT_MASK),
+                 !!(__entry->preempt_cnt & SOFTIRQ_MASK),
+                 !!(__entry->preempt_cnt & HARDIRQ_MASK),
+                 !!(__entry->preempt_cnt & NMI_MASK))
+);
+
+DEFINE_EVENT(core_critical_timing, core_critical_timing_start,
+       TP_PROTO(unsigned long ip, unsigned long parent_ip,
+               unsigned long flags, int preempt_cnt),
+       TP_ARGS(ip, parent_ip, flags, preempt_cnt)
+);
+
+DEFINE_EVENT(core_critical_timing, core_critical_timing_stop,
+       TP_PROTO(unsigned long ip, unsigned long parent_ip,
+               unsigned long flags, int preempt_cnt),
+       TP_ARGS(ip, parent_ip, flags, preempt_cnt)
+);
+
+#endif /* _TRACE_CORE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 4ed348a..33295b6 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -14,6 +14,9 @@
 #include <linux/module.h>
 #include <linux/ftrace.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/core.h>
+
 #include "trace.h"
 
 static struct trace_array              *irqsoff_trace __read_mostly;
@@ -328,6 +331,8 @@ check_critical_timing(struct trace_array *tr,
                /* Skip 5 functions to get to the irq/preempt enable function */
                __trace_stack(tr, flags, 5, pc);
        }
+       trace_core_critical_timing_hit(CALLER_ADDR0, parent_ip,
+                       data->critical_start, flags, pc, delta);
 
        if (data->critical_sequence != max_sequence)
                goto out_unlock;
@@ -354,7 +359,7 @@ out:
 static inline void
 start_critical_timing(unsigned long ip, unsigned long parent_ip)
 {
-       int cpu;
+       int cpu, pc;
        struct trace_array *tr = irqsoff_trace;
        struct trace_array_cpu *data;
        unsigned long flags;
@@ -380,9 +385,10 @@ start_critical_timing(unsigned long ip, unsigned long 
parent_ip)
 
        local_save_flags(flags);
 
+       pc = preempt_count();
        if (!is_quiet(tr))
-               __trace_function(tr, ip, parent_ip, flags, preempt_count());
-
+               __trace_function(tr, ip, parent_ip, flags, pc);
+       trace_core_critical_timing_start(ip, parent_ip, flags, pc);
        per_cpu(tracing_cpu, cpu) = 1;
 
        atomic_dec(&data->disabled);
@@ -391,7 +397,7 @@ start_critical_timing(unsigned long ip, unsigned long 
parent_ip)
 static inline void
 stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 {
-       int cpu;
+       int cpu, pc;
        struct trace_array *tr = irqsoff_trace;
        struct trace_array_cpu *data;
        unsigned long flags;
@@ -415,8 +421,10 @@ stop_critical_timing(unsigned long ip, unsigned long 
parent_ip)
        atomic_inc(&data->disabled);
 
        local_save_flags(flags);
+       pc = preempt_count();
        if (!is_quiet(tr))
-               __trace_function(tr, ip, parent_ip, flags, preempt_count());
+               __trace_function(tr, ip, parent_ip, flags, pc);
+       trace_core_critical_timing_stop(ip, parent_ip, flags, pc);
        check_critical_timing(tr, data, parent_ip ? : ip, cpu);
        data->critical_start = 0;
        atomic_dec(&data->disabled);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to