Interrupts disturb real-time tasks on affined cpus.
To ensure CPU isolation for real-time tasks, interrupt handling must
be adjusted accordingly.
Non-managed interrupts can be configured from userspace,
while managed interrupts require adjustments in kernelspace.

Adjust status of managed interrupts according change
of housekeeping CPUs to support dynamic CPU isolation.

Signed-off-by: Costa Shulyupin <costa.s...@redhat.com>

---

The following code is a proof of concept to validate
and review the correctness of the approach to solving the problem.

C++ comments denote temporary comments.

v2:
- refactor irq_affinity_adjust():
- add more comments
- add managed_irq_isolate() derived from migrate_one_irq(),
  irq_needs_fixup() and irq_fixup_move_pending()
- use irq_set_affinity() instead of irq_set_affinity_locked
- Addressed Gleixner's comments:
- use `struct cpumask *` instead of `cpumask_var_t` in function signature
- remove locking in irq_affinity_adjust()

v1:
- https://lore.kernel.org/lkml/20240516190437.3545310-5-costa.s...@redhat.com/
---
 include/linux/irq.h      |  2 +
 kernel/cgroup/cpuset.c   |  1 +
 kernel/irq/cpuhotplug.c  | 95 ++++++++++++++++++++++++++++++++++++++++
 kernel/sched/isolation.c | 10 ++++-
 4 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index fa711f80957b6..4eb2e765dbd95 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -619,6 +619,8 @@ extern int irq_affinity_online_cpu(unsigned int cpu);
 # define irq_affinity_online_cpu       NULL
 #endif
 
+int managed_irq_affinity_adjust(struct cpumask *enable_mask);
+
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void __irq_move_irq(struct irq_data *data);
 static inline void irq_move_irq(struct irq_data *data)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index bf60bdc973dd6..73b06b2cd91e3 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -90,6 +90,7 @@ static struct list_head remote_children;
 #define HOUSEKEEPING_FLAGS     (BIT(HK_TYPE_TIMER)  | BIT(HK_TYPE_RCU)  |\
                                 BIT(HK_TYPE_SCHED)  | BIT(HK_TYPE_MISC) |\
                                 BIT(HK_TYPE_DOMAIN) | BIT(HK_TYPE_WQ)   |\
+                                BIT(HK_TYPE_MANAGED_IRQ) |\
                                 BIT(HK_TYPE_KTHREAD))
 
 /*
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index ec2cdcd20bee7..adbe1f3e5bd22 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -252,3 +252,98 @@ int irq_affinity_online_cpu(unsigned int cpu)
 
        return 0;
 }
+
+/*
+ * managed_irq_isolate() - Deactivate managed interrupts if necessary
+ */
+// derived from migrate_one_irq, irq_needs_fixup, irq_fixup_move_pending
+static int managed_irq_isolate(struct irq_desc *desc)
+{
+       struct irq_data *d = irq_desc_get_irq_data(desc);
+       struct irq_chip *chip = irq_data_get_irq_chip(d);
+       const struct cpumask *a;
+       bool maskchip;
+       int err;
+
+       /*
+        * Deactivate if:
+        * - Interrupt is managed
+        * - Interrupt is not per cpu
+        * - Interrupt is started
+        * - Effective affinity mask includes isolated CPUs
+        */
+       if (!irqd_affinity_is_managed(d) || irqd_is_per_cpu(d) || 
!irqd_is_started(d)
+           || cpumask_subset(irq_data_get_effective_affinity_mask(d),
+                             housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
+               return 0;
+       // TBD: it is required?
+       /*
+        * Complete an eventually pending irq move cleanup. If this
+        * interrupt was moved in hard irq context, then the vectors need
+        * to be cleaned up. It can't wait until this interrupt actually
+        * happens and this CPU was involved.
+        */
+       irq_force_complete_move(desc);
+
+       if (irqd_is_setaffinity_pending(d)) {
+               irqd_clr_move_pending(d);
+               if (cpumask_intersects(desc->pending_mask,
+                   housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
+                       a = irq_desc_get_pending_mask(desc);
+       } else {
+               a = irq_data_get_affinity_mask(d);
+       }
+
+       maskchip = chip->irq_mask && !irq_can_move_pcntxt(d) && 
!irqd_irq_masked(d);
+       if (maskchip)
+               chip->irq_mask(d);
+
+       if (!cpumask_intersects(a, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ))) {
+               /*
+                * Shut managed interrupt down and leave the affinity untouched.
+                * The effective affinity is reset to the first online CPU.
+                */
+               irqd_set_managed_shutdown(d);
+               irq_shutdown_and_deactivate(desc);
+               return 0;
+       }
+
+       /*
+        * Do not set the force argument of irq_do_set_affinity() as this
+        * disables the masking of offline CPUs from the supplied affinity
+        * mask and therefore might keep/reassign the irq to the outgoing
+        * CPU.
+        */
+       err = irq_do_set_affinity(d, a, false);
+       if (err)
+               pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
+                                   d->irq, err);
+
+       if (maskchip)
+               chip->irq_unmask(d);
+
+       return err;
+}
+
+/** managed_irq_affinity_adjust() - Deactivate of restore managed interrupts
+ * according to change of housekeeping cpumask.
+ *
+ * @enable_mask:       CPUs for which interrupts should be restored
+ */
+int managed_irq_affinity_adjust(struct cpumask *enable_mask)
+{
+       unsigned int irq;
+
+       for_each_active_irq(irq) {
+               struct irq_desc *desc = irq_to_desc(irq);
+               unsigned int cpu;
+
+               for_each_cpu(cpu, enable_mask)
+                       irq_restore_affinity_of_irq(desc, cpu);
+               raw_spin_lock(&desc->lock);
+               managed_irq_isolate(desc);
+               raw_spin_unlock(&desc->lock);
+       }
+
+       return 0;
+}
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 3f24921b929a0..cd72300ec8b99 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -130,6 +130,8 @@ static void __init housekeeping_setup_type(enum hk_type 
type,
  */
 static int housekeeping_update(enum hk_type type, const struct cpumask *update)
 {
+       int err = 0;
+
        struct {
                struct cpumask changed;
                struct cpumask enable;
@@ -149,9 +151,15 @@ static int housekeeping_update(enum hk_type type, const 
struct cpumask *update)
        if (!static_branch_unlikely(&housekeeping_overridden))
                static_key_enable_cpuslocked(&housekeeping_overridden.key);
 
+       switch (type) {
+       case HK_TYPE_MANAGED_IRQ:
+               err = managed_irq_affinity_adjust(&masks->enable);
+               break;
+       default:
+       }
        kfree(masks);
 
-       return 0;
+       return err;
 }
 
 static int __init housekeeping_setup(char *str, unsigned long flags)
-- 
2.45.0


Reply via email to