This commit adds task isolation hooks as follows:

- __handle_domain_irq() and handle_domain_nmi() generate an
  isolation warning for the local task

- irq_work_queue_on() generates an isolation warning for the remote
  task being interrupted for irq_work (through
  __smp_call_single_queue())

- generic_exec_single() generates a remote isolation warning for
  the remote cpu being IPI'd (through __smp_call_single_queue())

- smp_call_function_many() generates a remote isolation warning for
  the set of remote cpus being IPI'd (through
  smp_call_function_many_cond())

- on_each_cpu_cond_mask() generates a remote isolation warning for
  the set of remote cpus being IPI'd (through
  smp_call_function_many_cond())

- __ttwu_queue_wakelist() generates a remote isolation warning for
  the remote cpu being IPI'd (through __smp_call_single_queue())

- nmi_enter(), __context_tracking_exit(), __handle_domain_irq(),
  handle_domain_nmi() and scheduler_ipi() clear low-level flags and
  synchronize CPUs by calling task_isolation_kernel_enter()

Calls to task_isolation_remote() or task_isolation_interrupt() can
be placed in the platform-independent code like this when doing so
results in fewer lines of code changes, as for example is true of
the users of the arch_send_call_function_*() APIs. Or, they can be
placed in the per-architecture code when there are many callers,
as for example is true of the smp_send_reschedule() call.

A further cleanup might be to create an intermediate layer, so that
for example smp_send_reschedule() is a single generic function that
just calls arch_smp_send_reschedule(), allowing generic code to be
called every time smp_send_reschedule() is invoked. But for now, we
just update either callers or callees as makes most sense.

Calls to task_isolation_kernel_enter() are intended for early
kernel entry code. They may be called in platform-independent or
platform-specific code.

It may be possible to clean up low-level entry code and somehow
organize calls to task_isolation_kernel_enter() to avoid multiple
per-architecture or driver-specific calls to it. RCU initialization
may be a good reference point for those places in kernel
(task_isolation_kernel_enter() should precede it), however right now
it is not unified between architectures.

Signed-off-by: Chris Metcalf <cmetc...@mellanox.com>
[abel...@marvell.com: adapted for kernel 5.8, added low-level flags handling]
Signed-off-by: Alex Belits <abel...@marvell.com>
---
 include/linux/hardirq.h   |  2 ++
 include/linux/sched.h     |  2 ++
 kernel/context_tracking.c |  4 ++++
 kernel/irq/irqdesc.c      | 13 +++++++++++++
 kernel/smp.c              |  6 +++++-
 5 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 03c9fece7d43..5aab1d0a580e 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -7,6 +7,7 @@
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
+#include <linux/isolation.h>
 #include <asm/hardirq.h>
 
 extern void synchronize_irq(unsigned int irq);
@@ -114,6 +115,7 @@ extern void rcu_nmi_exit(void);
 #define nmi_enter()                                            \
        do {                                                    \
                arch_nmi_enter();                               \
+               task_isolation_kernel_enter();                  \
                printk_nmi_enter();                             \
                lockdep_off();                                  \
                BUG_ON(in_nmi() == NMI_MASK);                   \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7fb7bb3fddaa..cacfa415dc59 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -32,6 +32,7 @@
 #include <linux/posix-timers.h>
 #include <linux/rseq.h>
 #include <linux/kcsan.h>
+#include <linux/isolation.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -1743,6 +1744,7 @@ extern char *__get_task_comm(char *to, size_t len, struct 
task_struct *tsk);
 #ifdef CONFIG_SMP
 static __always_inline void scheduler_ipi(void)
 {
+       task_isolation_kernel_enter();
        /*
         * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
         * TIF_NEED_RESCHED remotely (for the first time) will also send
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 36a98c48aedc..481a722ddbce 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -21,6 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/export.h>
 #include <linux/kprobes.h>
+#include <linux/isolation.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/context_tracking.h>
@@ -148,6 +149,8 @@ void noinstr __context_tracking_exit(enum ctx_state state)
        if (!context_tracking_recursion_enter())
                return;
 
+       task_isolation_kernel_enter();
+
        if (__this_cpu_read(context_tracking.state) == state) {
                if (__this_cpu_read(context_tracking.active)) {
                        /*
@@ -159,6 +162,7 @@ void noinstr __context_tracking_exit(enum ctx_state state)
                                instrumentation_begin();
                                vtime_user_exit(current);
                                trace_user_exit(0);
+                               task_isolation_user_exit();
                                instrumentation_end();
                        }
                }
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1a7723604399..b351aac7732f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -16,6 +16,7 @@
 #include <linux/bitmap.h>
 #include <linux/irqdomain.h>
 #include <linux/sysfs.h>
+#include <linux/isolation.h>
 
 #include "internals.h"
 
@@ -669,6 +670,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned 
int hwirq,
        unsigned int irq = hwirq;
        int ret = 0;
 
+       task_isolation_kernel_enter();
+
        irq_enter();
 
 #ifdef CONFIG_IRQ_DOMAIN
@@ -676,6 +679,10 @@ int __handle_domain_irq(struct irq_domain *domain, 
unsigned int hwirq,
                irq = irq_find_mapping(domain, hwirq);
 #endif
 
+       task_isolation_interrupt((irq == hwirq) ?
+                                "irq %d (%s)" : "irq %d (%s hwirq %d)",
+                                irq, domain ? domain->name : "", hwirq);
+
        /*
         * Some hardware gives randomly wrong interrupts.  Rather
         * than crashing, do something sensible.
@@ -710,6 +717,8 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned 
int hwirq,
        unsigned int irq;
        int ret = 0;
 
+       task_isolation_kernel_enter();
+
        /*
         * NMI context needs to be setup earlier in order to deal with tracing.
         */
@@ -717,6 +726,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned 
int hwirq,
 
        irq = irq_find_mapping(domain, hwirq);
 
+       task_isolation_interrupt((irq == hwirq) ?
+                                "NMI irq %d (%s)" : "NMI irq %d (%s hwirq %d)",
+                                irq, domain ? domain->name : "", hwirq);
+
        /*
         * ack_bad_irq is not NMI-safe, just report
         * an invalid interrupt.
diff --git a/kernel/smp.c b/kernel/smp.c
index aa17eedff5be..6a6849783948 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/sched/idle.h>
 #include <linux/hypervisor.h>
+#include <linux/isolation.h>
 
 #include "smpboot.h"
 #include "sched/smp.h"
@@ -146,8 +147,10 @@ void __smp_call_single_queue(int cpu, struct llist_node 
*node)
         * locking and barrier primitives. Generic code isn't really
         * equipped to do the right thing...
         */
-       if (llist_add(node, &per_cpu(call_single_queue, cpu)))
+       if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
+               task_isolation_remote(cpu, "IPI function");
                send_call_function_single_ipi(cpu);
+       }
 }
 
 /*
@@ -545,6 +548,7 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
        }
 
        /* Send a message to all CPUs in the map */
+       task_isolation_remote_cpumask(cfd->cpumask_ipi, "IPI function");
        arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
 
        if (wait) {
-- 
2.26.2

Reply via email to