From: Michael Kelley <mhkli...@outlook.com>

hv_synic_cleanup() currently prevents a CPU from going offline if any
VMBus channel IRQs are targeted at that CPU. However, current code has a
race in that an IRQ could be affinitized to the CPU after the check in
hv_synic_cleanup() and before the CPU is removed from cpu_online_mask.
Any channel interrupts could be lost and the channel would hang.

Fix this by adding a flag for each CPU indicating if the synic is online.
Filter the new affinity with these flags so that vmbus_irq_set_affinity()
doesn't pick a CPU where the synic is already offline.

Also add a spin lock so that vmbus_irq_set_affinity() changing the
channel target_cpu and sending the MODIFYCHANNEL message to Hyper-V
are atomic with respect to the checks made in hv_synic_cleanup().
hv_synic_cleanup() needs these operations to be atomic so that it
can correctly count the MODIFYCHANNEL messages that need to be
ack'ed by Hyper-V.

Signed-off-by: Michael Kelley <mhkli...@outlook.com>
---
 drivers/hv/connection.c   |  1 +
 drivers/hv/hv.c           | 22 ++++++++++++++++++++--
 drivers/hv/hyperv_vmbus.h |  2 ++
 drivers/hv/vmbus_drv.c    | 34 ++++++++++++++++++++++++++++------
 4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index a105eecdeec2..b44ce3d39135 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -213,6 +213,7 @@ int vmbus_connect(void)
 
        INIT_LIST_HEAD(&vmbus_connection.chn_list);
        mutex_init(&vmbus_connection.channel_mutex);
+       spin_lock_init(&vmbus_connection.set_affinity_lock);
 
        /*
         * Setup the vmbus event connection for channel interrupt
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 76658dfc5008..89e491219129 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -338,6 +338,8 @@ int hv_synic_init(unsigned int cpu)
 {
        hv_synic_enable_regs(cpu);
 
+       cpumask_set_cpu(cpu, &vmbus_connection.synic_online);
+
        hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
 
        return 0;
@@ -513,6 +515,17 @@ int hv_synic_cleanup(unsigned int cpu)
         * TODO: Re-bind the channels to different CPUs.
         */
        mutex_lock(&vmbus_connection.channel_mutex);
+       spin_lock(&vmbus_connection.set_affinity_lock);
+
+       /*
+        * Once the check for channels assigned to this CPU is complete, we
+        * must not allow a channel to be assigned to this CPU. So mark
+        * the synic as no longer online. This cpumask is checked in
+        * vmbus_irq_set_affinity() to prevent setting the affinity of
+        * an IRQ to such a CPU.
+        */
+       cpumask_clear_cpu(cpu, &vmbus_connection.synic_online);
+
        list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
                if (channel->target_cpu == cpu) {
                        channel_found = true;
@@ -527,10 +540,11 @@ int hv_synic_cleanup(unsigned int cpu)
                if (channel_found)
                        break;
        }
+       spin_unlock(&vmbus_connection.set_affinity_lock);
        mutex_unlock(&vmbus_connection.channel_mutex);
 
        if (channel_found)
-               return -EBUSY;
+               goto set_online;
 
        /*
         * channel_found == false means that any channels that were previously
@@ -547,7 +561,7 @@ int hv_synic_cleanup(unsigned int cpu)
                if (hv_synic_event_pending()) {
                        pr_err("Events pending when trying to offline CPU %d\n",
                                        cpu);
-                       return -EBUSY;
+                       goto set_online;
                }
        }
 
@@ -557,4 +571,8 @@ int hv_synic_cleanup(unsigned int cpu)
        hv_synic_disable_regs(cpu);
 
        return 0;
+
+set_online:
+       cpumask_set_cpu(cpu, &vmbus_connection.synic_online);
+       return -EBUSY;
 }
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 571b2955b38e..92ae5af10778 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -263,6 +263,8 @@ struct vmbus_connection {
        struct fwnode_handle *vmbus_fwnode;
        struct irq_domain *vmbus_irq_domain;
        struct irq_chip vmbus_irq_chip;
+       cpumask_t synic_online;
+       spinlock_t set_affinity_lock;
 
        /*
         * VM-wide counts of MODIFYCHANNEL messages sent and completed.
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 87f2f3436136..3430ad42d7ba 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1351,6 +1351,14 @@ int vmbus_irq_set_affinity(struct irq_data *data,
                return -EINVAL;
        }
 
+       /*
+        * The spin lock must be held so that checking synic_online, sending
+        * the MODIFYCHANNEL message, and setting channel->target_cpu are
+        * atomic with respect to hv_synic_cleanup() clearing the CPU in
+        * synic_online and doing the search.
+        */
+       spin_lock(&vmbus_connection.set_affinity_lock);
+
        /* Don't consider CPUs that are isolated */
        if (housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
                cpumask_and(&tempmask, dest,
@@ -1367,30 +1375,39 @@ int vmbus_irq_set_affinity(struct irq_data *data,
        origin_cpu = channel->target_cpu;
        if (cpumask_test_cpu(origin_cpu, &tempmask)) {
                target_cpu = origin_cpu;
+               spin_unlock(&vmbus_connection.set_affinity_lock);
                goto update_effective;
        }
 
        /*
         * Pick a CPU from the new affinity mask. As a simple heuristic to
         * spread out the selection when the mask contains multiple CPUs,
-        * start with whatever CPU was last selected.
+        * start with whatever CPU was last selected. Also filter out any
+        * CPUs where synic_online isn't set -- these CPUs are in the process
+        * of going offline and must not have channel interrupts assigned
+        * to them.
         */
+       cpumask_and(&tempmask, &tempmask, &vmbus_connection.synic_online);
        target_cpu = cpumask_next_wrap(next_cpu, &tempmask, nr_cpu_ids, false);
-       if (target_cpu >= nr_cpu_ids)
-               return -EINVAL;
+       if (target_cpu >= nr_cpu_ids) {
+               ret = -EINVAL;
+               goto unlock;
+       }
        next_cpu = target_cpu;
 
        /*
         * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
         * avoid sending the message and fail here for such channels.
         */
-       if (channel->state != CHANNEL_OPENED_STATE)
-               return -EIO;
+       if (channel->state != CHANNEL_OPENED_STATE) {
+               ret = -EIO;
+               goto unlock;
+       }
 
        ret = vmbus_send_modifychannel(channel,
                                     hv_cpu_number_to_vp_number(target_cpu));
        if (ret)
-               return ret;
+               goto unlock;
 
        /*
         * Warning.  At this point, there is *no* guarantee that the host will
@@ -1408,6 +1425,7 @@ int vmbus_irq_set_affinity(struct irq_data *data,
         */
 
        channel->target_cpu = target_cpu;
+       spin_unlock(&vmbus_connection.set_affinity_lock);
 
        /* See init_vp_index(). */
        if (hv_is_perf_channel(channel))
@@ -1422,6 +1440,10 @@ int vmbus_irq_set_affinity(struct irq_data *data,
 update_effective:
        irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
        return IRQ_SET_MASK_OK;
+
+unlock:
+       spin_unlock(&vmbus_connection.set_affinity_lock);
+       return ret;
 }
 
 /*
-- 
2.25.1


Reply via email to