Having a per-iterator polling work is wasteful when logging several trace_remote per_cpu/trace_pipe files in parallel. This result in one work running per-CPU, where only one would suffice.
Transition to a single per-remote polling work, scheduled on the first consumer creation and stopped when the last consuming iterator is freed. This blanket polls all CPUs, regardless of which ones are actually being read. This is acceptable because the poll consists of reading the meta-page, which is a fast operation. Also, it is more common to log all CPUs in the system than only one, so this use-case should be favoured. Signed-off-by: Vincent Donnefort <[email protected]> diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c index 71f6cda0fbd4..2271d54eb3dd 100644 --- a/kernel/trace/trace_remote.c +++ b/kernel/trace/trace_remote.c @@ -26,7 +26,6 @@ enum tri_type { struct trace_remote_iterator { struct trace_remote *remote; struct trace_seq seq; - struct delayed_work poll_work; unsigned long lost_events; u64 ts; struct ring_buffer_iter *rb_iter; @@ -56,6 +55,8 @@ struct trace_remote { struct rw_semaphore *pcpu_reader_locks; unsigned int nr_readers; unsigned int poll_ms; + struct delayed_work poll_work; + unsigned int poll_cnt; bool tracing_on; }; @@ -350,17 +351,6 @@ static bool trace_remote_has_cpu(struct trace_remote *remote, int cpu) return ring_buffer_poll_remote(remote->trace_buffer, cpu) == 0; } -static void __poll_remote(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct trace_remote_iterator *iter; - - iter = container_of(dwork, struct trace_remote_iterator, poll_work); - ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu); - schedule_delayed_work((struct delayed_work *)work, - msecs_to_jiffies(iter->remote->poll_ms)); -} - static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) { if (cpu != RING_BUFFER_ALL_CPUS) { @@ -404,6 +394,36 @@ static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) return 0; } +static void trace_remote_do_poll(struct trace_remote *remote) +{ + ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS); + schedule_delayed_work(&remote->poll_work, msecs_to_jiffies(remote->poll_ms)); +} + +static void __poll_remote(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + + trace_remote_do_poll(container_of(dwork, struct trace_remote, poll_work)); +} + +static void trace_remote_inc_poll(struct trace_remote *remote) +{ + /* poll_cnt <= nr_readers, inherits its overflow protection */ + if (!remote->poll_cnt++) + trace_remote_do_poll(remote); +} + +static void trace_remote_dec_poll(struct trace_remote *remote) +{ + if (WARN_ON_ONCE(!remote->poll_cnt)) + return; + + remote->poll_cnt--; + if (!remote->poll_cnt) + cancel_delayed_work_sync(&remote->poll_work); +} + static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type) { @@ -433,9 +453,7 @@ static struct trace_remote_iterator switch (type) { case TRI_CONSUMING: - ring_buffer_poll_remote(remote->trace_buffer, cpu); - INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); - schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); + trace_remote_inc_poll(remote); break; case TRI_NONCONSUMING: ret = __alloc_ring_buffer_iter(iter, cpu); @@ -469,7 +487,7 @@ static void trace_remote_iter_free(struct trace_remote_iterator *iter) switch (iter->type) { case TRI_CONSUMING: - cancel_delayed_work_sync(&iter->poll_work); + trace_remote_dec_poll(remote); break; case TRI_NONCONSUMING: __free_ring_buffer_iter(iter, iter->cpu); @@ -1002,6 +1020,7 @@ int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, remote->poll_ms = 100; mutex_init(&remote->lock); init_rwsem(&remote->reader_lock); + INIT_DELAYED_WORK(&remote->poll_work, __poll_remote); guard(mutex)(&trace_remotes_lock); -- 2.54.0.1032.g2f8565e1d1-goog
