mshv_irqfd_update() writes both irqfd_girq_ent and irqfd_lapic_irq as a
logical unit under seqcount write protection. Readers must snapshot these
fields inside the seqcount begin/retry loop to obtain a consistent
point-in-time view; otherwise a concurrent update can produce a torn read
where one field comes from the old state and the other from the new.
Both mshv_assert_irq_slow() and mshv_irqfd_wakeup() got this wrong: the
seqcount loop bodies were empty (just spinning until a stable sequence was
observed), and all reads of the protected fields happened after the loop
with no protection from concurrent writes. If mshv_irqfd_update() races
with interrupt assertion, the caller may use a stale or mixed
vector/apic_id/control combination, delivering an interrupt to the wrong
vCPU, with the wrong vector, or with the wrong trigger mode. This can
cause spurious or lost interrupts in the guest, or a stuck interrupt line
in the level-triggered case.
Fix mshv_assert_irq_slow() by snapshotting both irqfd_girq_ent and
irqfd_lapic_irq into local variables inside the seqcount loop, then using
those locals for the validity check, the resampler WARN_ON() and the
hypercall. Reorder the function so the seqcount loop runs first and every
subsequent read of the protected fields is satisfied from the snapshot.
Fix mshv_irqfd_wakeup() by snapshotting irqfd_lapic_irq inside its seqcount
loop and passing the snapshot to mshv_try_assert_irq_fast(), so the fast
path operates on the consistent copy rather than reading the field directly
outside seqcount protection.
Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose
/dev/mshv to VMMs")
Signed-off-by: Stanislav Kinsburskii <[email protected]>
---
drivers/hv/mshv_eventfd.c | 44 +++++++++++++++++++++++++-------------------
1 file changed, 25 insertions(+), 19 deletions(-)
diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index b398e58411dd7..25bdc5e678849 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -151,10 +151,10 @@ static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32
vector)
* Try to raise irq for guest via shared vector array. hyp does the actual
* inject of the interrupt.
*/
-static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
+static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd,
+ const struct mshv_lapic_irq *irq)
{
struct mshv_partition *partition = irqfd->irqfd_partn;
- struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
struct mshv_vp *vp;
if (!(ms_hyperv.ext_features &
@@ -191,7 +191,8 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd
*irqfd)
return 0;
}
#else /* CONFIG_X86_64 */
-static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
+static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd,
+ const struct mshv_lapic_irq *irq)
{
return -EOPNOTSUPP;
}
@@ -200,30 +201,33 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd
*irqfd)
static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
{
struct mshv_partition *partition = irqfd->irqfd_partn;
- struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
+ struct mshv_guest_irq_ent girq_ent;
+ struct mshv_lapic_irq irq;
unsigned int seq;
int idx;
+ idx = srcu_read_lock(&partition->pt_irq_srcu);
+
+ do {
+ seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+ girq_ent = irqfd->irqfd_girq_ent;
+ irq = irqfd->irqfd_lapic_irq;
+ } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+
#if IS_ENABLED(CONFIG_X86)
WARN_ON(irqfd->irqfd_resampler &&
- !irq->lapic_control.level_triggered);
+ !irq.lapic_control.level_triggered);
#endif
- idx = srcu_read_lock(&partition->pt_irq_srcu);
- if (irqfd->irqfd_girq_ent.guest_irq_num) {
- if (!irqfd->irqfd_girq_ent.girq_entry_valid) {
- srcu_read_unlock(&partition->pt_irq_srcu, idx);
- return;
- }
-
- do {
- seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
- } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+ if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) {
+ srcu_read_unlock(&partition->pt_irq_srcu, idx);
+ return;
}
- hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id,
- irq->lapic_vector, irq->lapic_apic_id,
- irq->lapic_control);
+ hv_call_assert_virtual_interrupt(partition->pt_id,
+ irq.lapic_vector, irq.lapic_apic_id,
+ irq.lapic_control);
+
srcu_read_unlock(&partition->pt_irq_srcu, idx);
}
@@ -309,16 +313,18 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait,
unsigned int mode,
int ret = 0;
if (flags & EPOLLIN) {
+ struct mshv_lapic_irq irq;
u64 cnt;
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
idx = srcu_read_lock(&pt->pt_irq_srcu);
do {
seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+ irq = irqfd->irqfd_lapic_irq;
} while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
/* An event has been signaled, raise an interrupt */
- ret = mshv_try_assert_irq_fast(irqfd);
+ ret = mshv_try_assert_irq_fast(irqfd, &irq);
if (ret)
mshv_assert_irq_slow(irqfd);