Extend KFD event signaling to also notify render-node eventfd subscribers
via amdgpu_eventfd_notify(pasid, event_id, count).
Add a weak no-op amdgpu_eventfd_notify() so KFD keeps building even when
amdgpu does not provide the bridge implementation.
Use PASID already tracked by KFD (pdd->pasid, or derived from the process
pdds) to avoid drm_file-based PASID derivation.
Hook notification into:
- CPU-driven SIGNAL events (kfd_set_event)
- IRQ-driven SIGNAL events (kfd_signal_event_interrupt)
- HW exception, VM fault, reset, poison consumed, and process terminate
event paths.
This preserves existing KFD semantics while enabling render-node clients
to observe the same KFD events via standard eventfd mechanisms.
Cc: Harish Kasiviswanathan <[email protected]>
Cc: Felix Kuehling <[email protected]>
Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Signed-off-by: Srinivasan Shanmugam <[email protected]>
---
drivers/gpu/drm/amd/amdkfd/kfd_events.c | 76 +++++++++++++++++++++++--
1 file changed, 70 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 13416bff7763..00416f6ec261 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -57,6 +57,35 @@ struct kfd_signal_page {
bool need_to_free_pages;
};
+/*
+ * Optional KGD hook for render-node eventfd signaling.
+ * Weak no-op so KFD builds even when amdgpu doesn't provide it.
+ */
+__weak void amdgpu_eventfd_notify(u32 pasid, u32 event_id, u64 count)
+{
+}
+
+static u32 kfd_pasid_from_pdd(struct kfd_process_device *pdd)
+{
+ return pdd ? pdd->pasid : 0;
+}
+
+static u32 kfd_pasid_from_process(struct kfd_process *p)
+{
+ u32 i;
+
+ if (!p || !p->n_pdds)
+ return 0;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ u32 pasid = kfd_pasid_from_pdd(p->pdds[i]);
+
+ if (pasid)
+ return pasid;
+ }
+ return 0;
+}
+
static uint64_t *page_slots(struct kfd_signal_page *page)
{
return page->kernel_address;
@@ -654,6 +683,7 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
{
int ret = 0;
struct kfd_event *ev;
+ u32 pasid = kfd_pasid_from_process(p);
rcu_read_lock();
@@ -670,6 +700,14 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
ret = -EINVAL;
spin_unlock(&ev->lock);
+
+ /*
+ * CPU-driven SIGNAL event (KFD_IOC_SET_EVENT) should also wake
+ * render-node eventfd subscribers for the same (PASID,event_id).
+ */
+ if (!ret && pasid)
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
+
unlock_rcu:
rcu_read_unlock();
return ret;
@@ -713,13 +751,15 @@ static void acknowledge_signal(struct kfd_process *p,
struct kfd_event *ev)
}
static void set_event_from_interrupt(struct kfd_process *p,
- struct kfd_event *ev)
+ struct kfd_event *ev, u32 pasid)
{
if (ev && event_can_be_gpu_signaled(ev)) {
acknowledge_signal(p, ev);
spin_lock(&ev->lock);
set_event(ev);
spin_unlock(&ev->lock);
+ /* Bridge: wake render-node subscribers for same
(PASID,event_id). */
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
}
}
@@ -744,7 +784,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t
partial_id,
ev = lookup_signaled_event_by_partial_id(p, partial_id,
valid_id_bits);
if (ev) {
- set_event_from_interrupt(p, ev);
+ set_event_from_interrupt(p, ev, pasid);
} else if (p->signal_page) {
/*
* Partial ID lookup failed. Assume that the event ID
@@ -767,7 +807,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t
partial_id,
break;
if (READ_ONCE(slots[id]) !=
UNSIGNALED_EVENT_SLOT)
- set_event_from_interrupt(p, ev);
+ set_event_from_interrupt(p, ev, pasid);
}
} else {
/* With relatively many events, it's faster to
@@ -777,7 +817,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t
partial_id,
for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
if (READ_ONCE(slots[id]) !=
UNSIGNALED_EVENT_SLOT) {
ev = lookup_event_by_id(p, id);
- set_event_from_interrupt(p, ev);
+ set_event_from_interrupt(p, ev, pasid);
}
}
}
@@ -1107,7 +1147,7 @@ int kfd_event_mmap(struct kfd_process *p, struct
vm_area_struct *vma)
* Assumes that p is not going away.
*/
static void lookup_events_by_type_and_signal(struct kfd_process *p,
- int type, void *event_data)
+ int type, void *event_data, u32 pasid)
{
struct kfd_hsa_memory_exception_data *ev_data;
struct kfd_event *ev;
@@ -1130,6 +1170,8 @@ static void lookup_events_by_type_and_signal(struct
kfd_process *p,
if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
ev->memory_exception_data = *ev_data;
spin_unlock(&ev->lock);
+ if (pasid)
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
}
if (type == KFD_EVENT_TYPE_MEMORY) {
@@ -1168,7 +1210,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
if (!p)
return; /* Presumably process exited. */
- lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
+ lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL,
pasid);
kfd_unref_process(p);
}
@@ -1233,11 +1275,14 @@ void kfd_signal_vm_fault_event(struct
kfd_process_device *pdd,
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ u32 pasid = kfd_pasid_from_pdd(pdd);
spin_lock(&ev->lock);
ev->memory_exception_data = data ? *data :
memory_exception_data;
set_event(ev);
spin_unlock(&ev->lock);
+ if (pasid)
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
}
rcu_read_unlock();
@@ -1251,6 +1296,7 @@ void kfd_signal_reset_event(struct kfd_node *dev)
struct kfd_event *ev;
unsigned int temp;
uint32_t id, idx;
+ u32 pasid;
int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
KFD_HW_EXCEPTION_ECC :
KFD_HW_EXCEPTION_GPU_HANG;
@@ -1274,6 +1320,8 @@ void kfd_signal_reset_event(struct kfd_node *dev)
continue;
}
+ pasid = kfd_pasid_from_pdd(pdd);
+
if (unlikely(!pdd)) {
WARN_ONCE(1, "Could not get device data from process
pid:%d\n",
p->lead_thread->pid);
@@ -1312,6 +1360,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
ev->hw_exception_data.gpu_id = user_gpu_id;
set_event(ev);
spin_unlock(&ev->lock);
+
+ if (pasid)
+ amdgpu_eventfd_notify(pasid,
ev->event_id, 1);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY &&
reset_cause == KFD_HW_EXCEPTION_ECC) {
@@ -1320,6 +1371,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
ev->memory_exception_data.gpu_id = user_gpu_id;
set_event(ev);
spin_unlock(&ev->lock);
+
+ if (pasid)
+ amdgpu_eventfd_notify(pasid,
ev->event_id, 1);
}
}
@@ -1367,6 +1421,9 @@ void kfd_signal_poison_consumed_event(struct kfd_node
*dev, u32 pasid)
ev->hw_exception_data = hw_exception_data;
set_event(ev);
spin_unlock(&ev->lock);
+
+ if (pasid)
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
@@ -1374,6 +1431,9 @@ void kfd_signal_poison_consumed_event(struct kfd_node
*dev, u32 pasid)
ev->memory_exception_data = memory_exception_data;
set_event(ev);
spin_unlock(&ev->lock);
+
+ if (pasid)
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
}
}
@@ -1394,6 +1454,7 @@ void kfd_signal_process_terminate_event(struct
kfd_process *p)
{
struct kfd_event *ev;
u32 id;
+ u32 pasid = kfd_pasid_from_process(p);
rcu_read_lock();
@@ -1404,6 +1465,9 @@ void kfd_signal_process_terminate_event(struct
kfd_process *p)
spin_lock(&ev->lock);
set_event(ev);
spin_unlock(&ev->lock);
+
+ if (pasid)
+ amdgpu_eventfd_notify(pasid, ev->event_id, 1);
}
/* Send SIGBUS to p->lead_thread */
--
2.34.1