Allow the debugger to a single query queue, device and process exception
in a FIFO manner.
The KFD should also return the GPU or Queue id of the exception.
The debugger also has the option of clearing exceptions after
being queried.

Signed-off-by: Jonathan Kim <jonathan....@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c   | 64 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_debug.h   |  5 ++
 3 files changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 200e11f02382..b918213a0087 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2946,6 +2946,12 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, 
struct kfd_process *p, v
                r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);
                break;
        case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
+               r = kfd_dbg_ev_query_debug_event(target,
+                               &args->query_debug_event.queue_id,
+                               &args->query_debug_event.gpu_id,
+                               args->query_debug_event.exception_mask,
+                               &args->query_debug_event.exception_mask);
+               break;
        case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
        case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
        case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 1f4d3fa0278e..6985a53b83e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -33,6 +33,70 @@
 #define MAX_WATCH_ADDRESSES    4
 static DEFINE_SPINLOCK(watch_points_lock);
 
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+                     unsigned int *queue_id,
+                     unsigned int *gpu_id,
+                     uint64_t exception_clear_mask,
+                     uint64_t *event_status)
+{
+       struct process_queue_manager *pqm;
+       struct process_queue_node *pqn;
+       int i;
+
+       if (!(process && process->debug_trap_enabled))
+               return -ENODATA;
+
+       mutex_lock(&process->event_mutex);
+       *event_status = 0;
+       *queue_id = 0;
+       *gpu_id = 0;
+
+       /* find and report queue events */
+       pqm = &process->pqm;
+       list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+               uint64_t tmp = process->exception_enable_mask;
+
+               if (!pqn->q)
+                       continue;
+
+               tmp &= pqn->q->properties.exception_status;
+
+               if (!tmp)
+                       continue;
+
+               *event_status = pqn->q->properties.exception_status;
+               *queue_id = pqn->q->properties.queue_id;
+               *gpu_id = pqn->q->device->id;
+               pqn->q->properties.exception_status &= ~exception_clear_mask;
+               goto out;
+       }
+
+       /* find and report device events */
+       for (i = 0; i < process->n_pdds; i++) {
+               struct kfd_process_device *pdd = process->pdds[i];
+               uint64_t tmp = process->exception_enable_mask
+                                               & pdd->exception_status;
+
+               if (!tmp)
+                       continue;
+
+               *event_status = pdd->exception_status;
+               *gpu_id = pdd->dev->id;
+               pdd->exception_status &= ~exception_clear_mask;
+               goto out;
+       }
+
+       /* report process events */
+       if (process->exception_enable_mask & process->exception_status) {
+               *event_status = process->exception_status;
+               process->exception_status &= ~exception_clear_mask;
+       }
+
+out:
+       mutex_unlock(&process->event_mutex);
+       return *event_status ? 0 : -EAGAIN;
+}
+
 void debug_event_write_work_handler(struct work_struct *work)
 {
        struct kfd_process *process;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 12b80b6c96d0..c64ffd3efc46 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -27,6 +27,11 @@
 
 void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int 
unwind_count);
 int kfd_dbg_trap_activate(struct kfd_process *target);
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+                       unsigned int *queue_id,
+                       unsigned int *gpu_id,
+                       uint64_t exception_clear_mask,
+                       uint64_t *event_status);
 bool kfd_set_dbg_ev_from_interrupt(struct kfd_dev *dev,
                                   unsigned int pasid,
                                   uint32_t doorbell_id,
-- 
2.25.1

Reply via email to