From: Michael Kelley <mhkli...@outlook.com>

Do the following:

1) Create an interrupt handler for VMBus channel interrupts by pulling
   out portions of vmbus_chan_sched() into vmbus_chan_handler(). The
   outer part of vmbus_chan_sched() that loops through the synic event
   page bitmap remains unchanged. But when a pending VMBus channel
   interrupt is found, call generic_handle_irq_desc() to invoke
   handle_simple_irq() and then vmbus_chan_handler() for the channel's
   IRQ. handle_simple_irq() does the IRQ stats for that channel's IRQ,
   so that per-channel interrupt counts appear in /proc/interrupts. The
   overall processing of VMBus channel interrupts is unchanged except
   for the intervening handle_simple_irq() that does the stats. No acks
   or EOIs are required for VMBus channel IRQs.

2) Update __vmbus_open() to call request_irq(), specifying the previously
   setup channel IRQ name and vmbus_chan_handler() as the interrupt
   handler. Set the IRQ affinity to the target_cpu assigned when the
   channel was created.

3) Update vmbus_isr() to return "false" if it only handles VMBus
   interrupts, which were passed to the channel IRQ handler. If
   vmbus_isr() handles one or more control message interrupts, then
   return "true". Update the related definitions to specify a boolean
   return value.

4) The callers of vmbus_isr() increment IRQ stats for the top-level
   IRQ only if "true" is returned. On x86, the caller is
   sysvec_hyperv_callback(), which manages the stats directly. On
   arm64, the caller is vmbus_percpu_isr(), which maps the boolean
   return value to IRQ_NONE ("false") or IRQ_HANDLED ("true").
   Then handle_percpu_demux_irq() conditionally updates the
   stats based on the return value from vmbus_percpu_isr().

With these changes, interrupts from VMBus channels are now
processed as Linux IRQs that are demultiplexed from the main
VMBus interrupt.

Signed-off-by: Michael Kelley <mhkli...@outlook.com>
---
 arch/x86/kernel/cpu/mshyperv.c |  9 ++--
 drivers/hv/channel.c           | 25 +++++++++-
 drivers/hv/hv_common.c         |  2 +-
 drivers/hv/vmbus_drv.c         | 84 +++++++++++++++++++---------------
 include/asm-generic/mshyperv.h |  3 +-
 5 files changed, 79 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..18bc282a99db 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -110,7 +110,7 @@ void hv_set_msr(unsigned int reg, u64 value)
 }
 EXPORT_SYMBOL_GPL(hv_set_msr);
 
-static void (*vmbus_handler)(void);
+static bool (*vmbus_handler)(void);
 static void (*hv_stimer0_handler)(void);
 static void (*hv_kexec_handler)(void);
 static void (*hv_crash_handler)(struct pt_regs *regs);
@@ -119,9 +119,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
 
-       inc_irq_stat(irq_hv_callback_count);
-       if (vmbus_handler)
-               vmbus_handler();
+       if (vmbus_handler && vmbus_handler())
+               inc_irq_stat(irq_hv_callback_count);
 
        if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
                apic_eoi();
@@ -129,7 +128,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
        set_irq_regs(old_regs);
 }
 
-void hv_setup_vmbus_handler(void (*handler)(void))
+void hv_setup_vmbus_handler(bool (*handler)(void))
 {
        vmbus_handler = handler;
 }
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fb8cd8469328..1aa020b538f1 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -638,6 +638,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
        struct vmbus_channel_open_channel *open_msg;
        struct vmbus_channel_msginfo *open_info = NULL;
        struct page *page = newchannel->ringbuffer_page;
+       u32 relid = newchannel->offermsg.child_relid;
        u32 send_pages, recv_pages;
        unsigned long flags;
        int err;
@@ -685,13 +686,31 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
        if (err)
                goto error_free_gpadl;
 
+       /* Request the IRQ and assign to target_cpu */
+       err = request_irq(newchannel->irq, vmbus_chan_handler, 0,
+                         newchannel->irq_name, newchannel);
+       if (err) {
+               pr_err("request_irq failed with %d for relid %d irq %d\n",
+                               err, relid, newchannel->irq);
+               goto error_free_gpadl;
+       }
+       err = irq_set_affinity_and_hint(newchannel->irq,
+                                 cpumask_of(newchannel->target_cpu));
+       if (err) {
+               pr_err("irq_set_affinity_and_hint failed with %d for relid %d 
irq %d\n",
+                               err, relid, newchannel->irq);
+               free_irq(newchannel->irq, newchannel);
+               goto error_free_gpadl;
+       }
+       newchannel->irq_requested = true;
+
        /* Create and init the channel open message */
        open_info = kzalloc(sizeof(*open_info) +
                           sizeof(struct vmbus_channel_open_channel),
                           GFP_KERNEL);
        if (!open_info) {
                err = -ENOMEM;
-               goto error_free_gpadl;
+               goto error_free_irq;
        }
 
        init_completion(&open_info->waitevent);
@@ -759,6 +778,10 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 error_free_info:
        kfree(open_info);
+error_free_irq:
+       irq_update_affinity_hint(newchannel->irq, NULL);
+       free_irq(newchannel->irq, newchannel);
+       newchannel->irq_requested = false;
 error_free_gpadl:
        vmbus_teardown_gpadl(newchannel, &newchannel->ringbuffer_gpadlhandle);
 error_clean_ring:
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 9c452bfbd571..38a23add721c 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -610,7 +610,7 @@ bool __weak hv_isolation_type_tdx(void)
 }
 EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
 
-void __weak hv_setup_vmbus_handler(void (*handler)(void))
+void __weak hv_setup_vmbus_handler(bool (*handler)(void))
 {
 }
 EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 8fd03d41e71a..b73be7c02d37 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1193,6 +1193,45 @@ static void vmbus_force_channel_rescinded(struct 
vmbus_channel *channel)
 }
 #endif /* CONFIG_PM_SLEEP */
 
+irqreturn_t vmbus_chan_handler(int irq, void *dev_id)
+{
+       void (*callback_fn)(void *context);
+       struct vmbus_channel *channel = dev_id;
+
+       /*
+        * Make sure that the ring buffer data structure doesn't get
+        * freed while we dereference the ring buffer pointer.  Test
+        * for the channel's onchannel_callback being NULL within a
+        * sched_lock critical section.  See also the inline comments
+        * in vmbus_reset_channel_cb().
+        */
+       spin_lock(&channel->sched_lock);
+
+       callback_fn = channel->onchannel_callback;
+       if (unlikely(callback_fn == NULL))
+               goto spin_unlock;
+
+       trace_vmbus_chan_sched(channel);
+
+       ++channel->interrupts;
+
+       switch (channel->callback_mode) {
+       case HV_CALL_ISR:
+               (*callback_fn)(channel->channel_callback_context);
+               break;
+
+       case HV_CALL_BATCHED:
+               hv_begin_read(&channel->inbound);
+               fallthrough;
+       case HV_CALL_DIRECT:
+               tasklet_schedule(&channel->callback_event);
+       }
+
+spin_unlock:
+       spin_unlock(&channel->sched_lock);
+       return IRQ_HANDLED;
+}
+
 /*
  * Schedule all channels with events pending
  */
@@ -1217,7 +1256,6 @@ static void vmbus_chan_sched(struct hv_per_cpu_context 
*hv_cpu)
                return;
 
        for_each_set_bit(relid, recv_int_page, maxbits) {
-               void (*callback_fn)(void *context);
                struct vmbus_channel *channel;
                struct irq_desc *desc;
 
@@ -1244,43 +1282,14 @@ static void vmbus_chan_sched(struct hv_per_cpu_context 
*hv_cpu)
                if (channel->rescind)
                        goto sched_unlock_rcu;
 
-               /*
-                * Make sure that the ring buffer data structure doesn't get
-                * freed while we dereference the ring buffer pointer.  Test
-                * for the channel's onchannel_callback being NULL within a
-                * sched_lock critical section.  See also the inline comments
-                * in vmbus_reset_channel_cb().
-                */
-               spin_lock(&channel->sched_lock);
-
-               callback_fn = channel->onchannel_callback;
-               if (unlikely(callback_fn == NULL))
-                       goto sched_unlock;
-
-               trace_vmbus_chan_sched(channel);
-
-               ++channel->interrupts;
-
-               switch (channel->callback_mode) {
-               case HV_CALL_ISR:
-                       (*callback_fn)(channel->channel_callback_context);
-                       break;
-
-               case HV_CALL_BATCHED:
-                       hv_begin_read(&channel->inbound);
-                       fallthrough;
-               case HV_CALL_DIRECT:
-                       tasklet_schedule(&channel->callback_event);
-               }
+               generic_handle_irq_desc(desc);
 
-sched_unlock:
-               spin_unlock(&channel->sched_lock);
 sched_unlock_rcu:
                rcu_read_unlock();
        }
 }
 
-static void vmbus_isr(void)
+static bool vmbus_isr(void)
 {
        struct hv_per_cpu_context *hv_cpu
                = this_cpu_ptr(hv_context.cpu_context);
@@ -1299,15 +1308,18 @@ static void vmbus_isr(void)
                        vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
                } else
                        tasklet_schedule(&hv_cpu->msg_dpc);
-       }
 
-       add_interrupt_randomness(vmbus_interrupt);
+               add_interrupt_randomness(vmbus_interrupt);
+               return true;
+       }
+       return false;
 }
 
 static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
 {
-       vmbus_isr();
-       return IRQ_HANDLED;
+       if (vmbus_isr())
+               return IRQ_HANDLED;
+       return IRQ_NONE;
 }
 
 int vmbus_irq_set_affinity(struct irq_data *data,
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 0488ff8b511f..0a5559b9d5f7 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -178,7 +178,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, 
u32 old_msg_type)
 
 int hv_get_hypervisor_version(union hv_hypervisor_version_info *info);
 
-void hv_setup_vmbus_handler(void (*handler)(void));
+void hv_setup_vmbus_handler(bool (*handler)(void));
 void hv_remove_vmbus_handler(void);
 void hv_setup_stimer0_handler(void (*handler)(void));
 void hv_remove_stimer0_handler(void);
@@ -188,6 +188,7 @@ void hv_remove_kexec_handler(void);
 void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
 void hv_remove_crash_handler(void);
 
+extern irqreturn_t vmbus_chan_handler(int irq, void *dev_id);
 extern void vmbus_irq_mask(struct irq_data *data);
 extern void vmbus_irq_unmask(struct irq_data *data);
 extern int vmbus_irq_set_affinity(struct irq_data *data,
-- 
2.25.1


Reply via email to