Having a per-iterator polling work is wasteful when logging several
trace_remote per_cpu/trace_pipe files in parallel. This result in one
work running per-CPU, where only one would suffice.

Transition to a single per-remote polling work, scheduled on the first
consumer creation and stopped when the last consuming iterator is freed.

This blanket polls all CPUs, regardless of which ones are actually being
read. This is acceptable because the poll consists of reading the
meta-page, which is a fast operation. Also, it is more common to log all
CPUs in the system than only one, so this use-case should be favoured.

Signed-off-by: Vincent Donnefort <[email protected]>

diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index 71f6cda0fbd4..2271d54eb3dd 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -26,7 +26,6 @@ enum tri_type {
 struct trace_remote_iterator {
        struct trace_remote             *remote;
        struct trace_seq                seq;
-       struct delayed_work             poll_work;
        unsigned long                   lost_events;
        u64                             ts;
        struct ring_buffer_iter         *rb_iter;
@@ -56,6 +55,8 @@ struct trace_remote {
        struct rw_semaphore             *pcpu_reader_locks;
        unsigned int                    nr_readers;
        unsigned int                    poll_ms;
+       struct delayed_work             poll_work;
+       unsigned int                    poll_cnt;
        bool                            tracing_on;
 };
 
@@ -350,17 +351,6 @@ static bool trace_remote_has_cpu(struct trace_remote 
*remote, int cpu)
        return ring_buffer_poll_remote(remote->trace_buffer, cpu) == 0;
 }
 
-static void __poll_remote(struct work_struct *work)
-{
-       struct delayed_work *dwork = to_delayed_work(work);
-       struct trace_remote_iterator *iter;
-
-       iter = container_of(dwork, struct trace_remote_iterator, poll_work);
-       ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
-       schedule_delayed_work((struct delayed_work *)work,
-                             msecs_to_jiffies(iter->remote->poll_ms));
-}
-
 static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int 
cpu)
 {
        if (cpu != RING_BUFFER_ALL_CPUS) {
@@ -404,6 +394,36 @@ static int __alloc_ring_buffer_iter(struct 
trace_remote_iterator *iter, int cpu)
        return 0;
 }
 
+static void trace_remote_do_poll(struct trace_remote *remote)
+{
+       ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
+       schedule_delayed_work(&remote->poll_work, 
msecs_to_jiffies(remote->poll_ms));
+}
+
+static void __poll_remote(struct work_struct *work)
+{
+       struct delayed_work *dwork = to_delayed_work(work);
+
+       trace_remote_do_poll(container_of(dwork, struct trace_remote, 
poll_work));
+}
+
+static void trace_remote_inc_poll(struct trace_remote *remote)
+{
+       /* poll_cnt <= nr_readers, inherits its overflow protection */
+       if (!remote->poll_cnt++)
+               trace_remote_do_poll(remote);
+}
+
+static void trace_remote_dec_poll(struct trace_remote *remote)
+{
+       if (WARN_ON_ONCE(!remote->poll_cnt))
+               return;
+
+       remote->poll_cnt--;
+       if (!remote->poll_cnt)
+               cancel_delayed_work_sync(&remote->poll_work);
+}
+
 static struct trace_remote_iterator
 *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
 {
@@ -433,9 +453,7 @@ static struct trace_remote_iterator
 
                switch (type) {
                case TRI_CONSUMING:
-                       ring_buffer_poll_remote(remote->trace_buffer, cpu);
-                       INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
-                       schedule_delayed_work(&iter->poll_work, 
msecs_to_jiffies(remote->poll_ms));
+                       trace_remote_inc_poll(remote);
                        break;
                case TRI_NONCONSUMING:
                        ret = __alloc_ring_buffer_iter(iter, cpu);
@@ -469,7 +487,7 @@ static void trace_remote_iter_free(struct 
trace_remote_iterator *iter)
 
        switch (iter->type) {
        case TRI_CONSUMING:
-               cancel_delayed_work_sync(&iter->poll_work);
+               trace_remote_dec_poll(remote);
                break;
        case TRI_NONCONSUMING:
                __free_ring_buffer_iter(iter, iter->cpu);
@@ -1002,6 +1020,7 @@ int trace_remote_register(const char *name, struct 
trace_remote_callbacks *cbs,
        remote->poll_ms = 100;
        mutex_init(&remote->lock);
        init_rwsem(&remote->reader_lock);
+       INIT_DELAYED_WORK(&remote->poll_work, __poll_remote);
 
        guard(mutex)(&trace_remotes_lock);
 
-- 
2.54.0.1032.g2f8565e1d1-goog


Reply via email to