irqfd interrupt injection had divergent seqcount snapshot scaffolding in
three places, and inconsistent validity checks between the fast and slow
assert paths:
1. mshv_irqfd_wakeup() snapshotted irqfd_lapic_irq under seqcount, then on
fast-path failure called mshv_assert_irq_slow(), which re-snapshotted both
irqfd_girq_ent and irqfd_lapic_irq under seqcount again — wasteful and
duplicative.
2. The girq_entry_valid check only existed in the slow path. The fast path
would happily accept a zero-initialised mshv_lapic_irq when routing was not
yet configured for the GSI, potentially injecting vector 0 to VP 0.
3. The slow path's validity predicate was 'guest_irq_num &&
!girq_entry_valid', which short-circuits for GSI 0 (guest_irq_num == 0) and
so bypasses the validity check entirely for that GSI.
4. mshv_irqfd_resampler_ack() read irqfd_lapic_irq.lapic_control without
seqcount protection, which could observe a stale or transient value while
mshv_irqfd_update() was concurrently rewriting irqfd_lapic_irq via
mshv_copy_girq_info()'s memset-and-fill sequence.
Introduce mshv_irqfd_snapshot() that takes a consistent snapshot of both
irqfd_girq_ent and irqfd_lapic_irq inside the seqcount loop; girq_ent is
optional so the resampler ack path can snapshot only the LAPIC IRQ.
Use the helper from mshv_irqfd_resampler_ack() (closes 4),
mshv_irqfd_wakeup() and mshv_irqfd_assign()'s EPOLLIN replay path,
replacing the three ad-hoc seqcount loops.
Move the validity check into mshv_irqfd_wakeup() before either injection
path runs, so the fast path no longer accepts an unrouted irqfd (closes
2). Use !girq_entry_valid as the condition (closes 3). Change
mshv_assert_irq_slow() to take a pre-snapshotted const struct
mshv_lapic_irq pointer, eliminating its internal seqcount and SRCU
scaffolding (closes 1).
Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose
/dev/mshv to VMMs")
Signed-off-by: Stanislav Kinsburskii <[email protected]>
---
drivers/hv/mshv_eventfd.c | 90 ++++++++++++++++++++++++++-------------------
1 file changed, 52 insertions(+), 38 deletions(-)
diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 25bdc5e678849..c24069dff9702 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -74,6 +74,27 @@ static inline bool hv_should_clear_interrupt(enum
hv_interrupt_type type)
}
#endif
+/*
+ * Snapshot per-irqfd routing state under seqcount protection so callers
+ * see a consistent point-in-time view of irqfd_girq_ent and
+ * irqfd_lapic_irq even if mshv_irqfd_update() runs concurrently.
+ *
+ * @girq_ent may be NULL when the caller only needs the LAPIC IRQ.
+ */
+static void mshv_irqfd_snapshot(struct mshv_irqfd *irqfd,
+ struct mshv_guest_irq_ent *girq_ent,
+ struct mshv_lapic_irq *irq)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+ if (girq_ent)
+ *girq_ent = irqfd->irqfd_girq_ent;
+ *irq = irqfd->irqfd_lapic_irq;
+ } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+}
+
static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
{
struct mshv_irqfd_resampler *resampler;
@@ -90,7 +111,11 @@ static void mshv_irqfd_resampler_ack(struct
mshv_irq_ack_notifier *mian)
hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list,
irqfd_resampler_hnode,
srcu_read_lock_held(&partition->pt_irq_srcu)) {
- if
(hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
+ struct mshv_lapic_irq irq;
+
+ mshv_irqfd_snapshot(irqfd, NULL, &irq);
+
+ if (hv_should_clear_interrupt(irq.lapic_control.interrupt_type))
hv_call_clear_virtual_interrupt(partition->pt_id);
eventfd_signal(irqfd->irqfd_resamplefd);
@@ -198,37 +223,14 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd
*irqfd,
}
#endif
-static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
+static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd,
+ const struct mshv_lapic_irq *irq)
{
struct mshv_partition *partition = irqfd->irqfd_partn;
- struct mshv_guest_irq_ent girq_ent;
- struct mshv_lapic_irq irq;
- unsigned int seq;
- int idx;
-
- idx = srcu_read_lock(&partition->pt_irq_srcu);
-
- do {
- seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
- girq_ent = irqfd->irqfd_girq_ent;
- irq = irqfd->irqfd_lapic_irq;
- } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
-
-#if IS_ENABLED(CONFIG_X86)
- WARN_ON(irqfd->irqfd_resampler &&
- !irq.lapic_control.level_triggered);
-#endif
-
- if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) {
- srcu_read_unlock(&partition->pt_irq_srcu, idx);
- return;
- }
hv_call_assert_virtual_interrupt(partition->pt_id,
- irq.lapic_vector, irq.lapic_apic_id,
- irq.lapic_control);
-
- srcu_read_unlock(&partition->pt_irq_srcu, idx);
+ irq->lapic_vector, irq->lapic_apic_id,
+ irq->lapic_control);
}
static void mshv_irqfd_resampler_shutdown(struct mshv_irqfd *irqfd)
@@ -308,26 +310,31 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait,
unsigned int mode,
irqfd_wait);
__poll_t flags = key_to_poll(key);
int idx;
- unsigned int seq;
struct mshv_partition *pt = irqfd->irqfd_partn;
int ret = 0;
if (flags & EPOLLIN) {
+ struct mshv_guest_irq_ent girq_ent;
struct mshv_lapic_irq irq;
u64 cnt;
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
idx = srcu_read_lock(&pt->pt_irq_srcu);
- do {
- seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
- irq = irqfd->irqfd_lapic_irq;
- } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+ mshv_irqfd_snapshot(irqfd, &girq_ent, &irq);
+
+ if (!girq_ent.girq_entry_valid)
+ goto out_unlock;
+
+#if IS_ENABLED(CONFIG_X86)
+ WARN_ON(irqfd->irqfd_resampler &&
+ !irq.lapic_control.level_triggered);
+#endif
/* An event has been signaled, raise an interrupt */
- ret = mshv_try_assert_irq_fast(irqfd, &irq);
- if (ret)
- mshv_assert_irq_slow(irqfd);
+ if (mshv_try_assert_irq_fast(irqfd, &irq))
+ mshv_assert_irq_slow(irqfd, &irq);
+out_unlock:
srcu_read_unlock(&pt->pt_irq_srcu, idx);
ret = 1;
@@ -517,8 +524,15 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
*/
events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
- if (events & EPOLLIN)
- mshv_assert_irq_slow(irqfd);
+ if (events & EPOLLIN) {
+ struct mshv_guest_irq_ent girq_ent;
+ struct mshv_lapic_irq irq;
+
+ mshv_irqfd_snapshot(irqfd, &girq_ent, &irq);
+
+ if (girq_ent.girq_entry_valid)
+ mshv_assert_irq_slow(irqfd, &irq);
+ }
srcu_read_unlock(&pt->pt_irq_srcu, idx);
return 0;