On Fri, Dec 5, 2025 at 12:54 PM Xiaogang.Chen <[email protected]> wrote:
>
> From: Xiaogang Chen <[email protected]>
>
> This patch allows kfd driver function correctly when AMD gpu devices got
> unplug/replug at run time.
>
> When an AMD gpu device got unplug kfd driver gracefully terminates existing
> kfd processes after stops all queues by sending SIGBUS to user process. After
> that user space can still use remaining AMD gpu devices. When all AMD gpu
> devices at system got removed kfd driver will not response new requests.
>
> Unplugged AMD gpu devices can be re-plugged. kfd driver will use added devices
> to function as usual.
>
> The purpose of this patch is having kfd driver behavior as expected during and
> after AMD gpu devices unplug/replug at run time.
>
> Signed-off-by: Xiaogang Chen<[email protected]>

I'm not a KFD expert, but it looks correct to me.
Acked-by: Alex Deucher <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  5 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 11 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 78 +++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c    | 29 ++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 12 +++-
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 22 ++++++
>  8 files changed, 158 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index a2879d2b7c8e..622f613e7627 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -248,6 +248,11 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>                 kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
>  }
>
> +void amdgpu_amdkfd_teardown_processes(struct amdgpu_device *adev)
> +{
> +       kgd2kfd_teardown_processes(adev);
> +}
> +
>  void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
>  {
>         if (adev->kfd.dev) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 8bdfcde2029b..f79e20cadd70 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -155,6 +155,7 @@ struct amdkfd_process_info {
>
>  int amdgpu_amdkfd_init(void);
>  void amdgpu_amdkfd_fini(void);
> +void amdgpu_amdkfd_teardown_processes(struct amdgpu_device *adev);
>
>  void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
>  int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
> @@ -434,6 +435,8 @@ int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
>  bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
>  bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct 
> amdgpu_iv_entry *entry,
>                                bool retry_fault);
> +void kgd2kfd_lock_kfd(void);
> +void kgd2kfd_teardown_processes(struct amdgpu_device *adev);
>
>  #else
>  static inline int kgd2kfd_init(void)
> @@ -546,5 +549,13 @@ static inline bool kgd2kfd_vmfault_fast_path(struct 
> amdgpu_device *adev, struct
>         return false;
>  }
>
> +void kgd2kfd_lock_kfd(void)
> +{
> +}
> +
> +void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
> +{
> +}
> +
>  #endif
>  #endif /* AMDGPU_AMDKFD_H_INCLUDED */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index f75ede1b1c6b..dd8fc2d42b69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3663,6 +3663,7 @@ static int amdgpu_device_ip_fini_early(struct 
> amdgpu_device *adev)
>         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
>
>         amdgpu_amdkfd_suspend(adev, true);
> +       amdgpu_amdkfd_teardown_processes(adev);
>         amdgpu_userq_suspend(adev);
>
>         /* Workaround for ASICs need to disable SMC first */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index e3da2f149ae6..30d87e4daad2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -936,6 +936,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
>         }
>
>         kfree(kfd);
> +
> +       /* after remove a kfd device unlock kfd driver */
> +       kgd2kfd_unlock_kfd(NULL);
>  }
>
>  int kgd2kfd_pre_reset(struct kfd_dev *kfd,
> @@ -1519,10 +1522,14 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
>         return r;
>  }
>
> +/* unlock a kfd dev or kfd driver */
>  void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
>  {
>         mutex_lock(&kfd_processes_mutex);
> -       --kfd->kfd_dev_lock;
> +       if (kfd)
> +               --kfd->kfd_dev_lock;
> +       else
> +               --kfd_locked;
>         mutex_unlock(&kfd_processes_mutex);
>  }
>
> @@ -1686,6 +1693,75 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device 
> *adev, struct amdgpu_iv_entr
>         return false;
>  }
>
> +/* check if there is kfd process still uses adev */
> +static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev) {
> +
> +       struct kfd_process *p;
> +       struct hlist_node *p_temp;
> +       unsigned int temp;
> +       struct kfd_node *dev;
> +
> +       mutex_lock(&kfd_processes_mutex);
> +
> +       if (hash_empty(kfd_processes_table)){
> +               mutex_unlock(&kfd_processes_mutex);
> +               return true;
> +       }
> +
> +       /* check if there is device still use adev */
> +       hash_for_each_safe(kfd_processes_table, temp, p_temp, p, 
> kfd_processes) {
> +               for (int i = 0; i < p->n_pdds; i++) {
> +                       dev = p->pdds[i]->dev;
> +                       if (dev->adev == adev){
> +                               mutex_unlock(&kfd_processes_mutex);
> +                               return false;
> +                       }
> +               }
> +       }
> +
> +       mutex_unlock(&kfd_processes_mutex);
> +
> +       return true;
> +}
> +
> +/** kgd2kfd_teardown_processes - gracefully tear down existing
> + *  kfd processes that use adev
> + *
> + * @adev: amdgpu_device where kfd processes run on and will be
> + *     teardown
> + *
> + */
> +void kgd2kfd_teardown_processes(struct amdgpu_device *adev) {
> +
> +       struct hlist_node *p_temp;
> +       struct kfd_process *p;
> +       struct kfd_node *dev;
> +       unsigned int temp;
> +
> +       mutex_lock(&kfd_processes_mutex);
> +
> +       if (hash_empty(kfd_processes_table)){
> +               mutex_unlock(&kfd_processes_mutex);
> +               return;
> +       }
> +
> +       hash_for_each_safe(kfd_processes_table, temp, p_temp, p, 
> kfd_processes) {
> +               for (int i = 0; i < p->n_pdds; i++) {
> +                       dev = p->pdds[i]->dev;
> +                       if (dev->adev == adev)
> +                               kfd_signal_process_terminate_event(p);
> +               }
> +       }
> +
> +       mutex_unlock(&kfd_processes_mutex);
> +
> +       /* wait all kfd processes use adev terminate */
> +       while (!kgd2kfd_check_device_idle(adev))
> +               cond_resched();
> +
> +       return;
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>
>  /* This function will send a package to HIQ to hang the HWS
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 5a190dd6be4e..ea913368e231 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -1380,3 +1380,32 @@ void kfd_signal_poison_consumed_event(struct kfd_node 
> *dev, u32 pasid)
>
>         kfd_unref_process(p);
>  }
> +
> +/* signal KFD_EVENT_TYPE_SIGNAL events from process p
> + * send signal SIGBUS to correspondent user space process
> + */
> +void kfd_signal_process_terminate_event(struct kfd_process *p)
> +{
> +       struct kfd_event *ev;
> +       uint32_t id;
> +
> +       rcu_read_lock();
> +
> +       /* iterate from id 1 for KFD_EVENT_TYPE_SIGNAL events */
> +       id = 1;
> +       idr_for_each_entry_continue(&p->event_idr, ev, id)
> +               if (ev->type == KFD_EVENT_TYPE_SIGNAL) {
> +                       spin_lock(&ev->lock);
> +                       set_event(ev);
> +                       spin_unlock(&ev->lock);
> +               }
> +
> +       /* Send SIGBUS to p->lead_thread */
> +       dev_notice(kfd_device,
> +                       "Sending SIGBUS to process %d",
> +                       p->lead_thread->pid);
> +
> +       send_sig(SIGBUS, p->lead_thread, 0);
> +
> +       rcu_read_unlock();
> +}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 76842bb8e78b..d7b4aba0f488 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1177,6 +1177,7 @@ static inline struct kfd_node 
> *kfd_node_by_irq_ids(struct amdgpu_device *adev,
>  }
>  int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
>  int kfd_numa_node_to_apic_id(int numa_node_id);
> +uint32_t kfd_gpu_node_num(void);
>
>  /* Interrupts */
>  #define        KFD_IRQ_FENCE_CLIENTID  0xff
> @@ -1529,6 +1530,7 @@ void kfd_signal_vm_fault_event(struct 
> kfd_process_device *pdd,
>  void kfd_signal_reset_event(struct kfd_node *dev);
>
>  void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
> +void kfd_signal_process_terminate_event(struct kfd_process *p);
>
>  static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
>                                  enum TLB_FLUSH_TYPE type)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index f5d173f1ca3b..5a74469f5bef 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -854,6 +854,12 @@ struct kfd_process *kfd_create_process(struct 
> task_struct *thread)
>          */
>         mutex_lock(&kfd_processes_mutex);
>
> +       if (kfd_gpu_node_num() <= 0) {
> +               pr_warn("no gpu node! Cannot create KFD process");
> +               process = ERR_PTR(-EINVAL);
> +               goto out;
> +       }
> +
>         if (kfd_is_locked(NULL)) {
>                 pr_debug("KFD is locked! Cannot create process");
>                 process = ERR_PTR(-EINVAL);
> @@ -1176,7 +1182,6 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>         if (ef)
>                 dma_fence_signal(ef);
>
> -       kfd_process_remove_sysfs(p);
>         kfd_debugfs_remove_process(p);
>
>         kfd_process_kunmap_signal_bo(p);
> @@ -1192,6 +1197,11 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>
>         put_task_struct(p->lead_thread);
>
> +       /* the last step is removing process entries under /sys
> +        * to indicate the process has been terminated.
> +        */
> +       kfd_process_remove_sysfs(p);
> +
>         kfree(p);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 5c98746eb72d..062ad5d40d62 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -2349,6 +2349,28 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
>         return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
>  }
>
> +/* kfd_gpu_node_num - Return kfd gpu node number at system */
> +uint32_t kfd_gpu_node_num(void) {
> +
> +       struct kfd_node *dev;
> +       uint8_t gpu_num  = 0;
> +       uint8_t id  = 0;
> +
> +       while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
> +               if (!dev || kfd_devcgroup_check_permission(dev)) {
> +                       /* Skip non GPU devices and devices to which the
> +                        * current process have no access to
> +                        */
> +                       id++;
> +                       continue;
> +               }
> +               id++;
> +               gpu_num++;
> +       }
> +
> +       return gpu_num;
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>
>  int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
> --
> 2.34.1
>

Reply via email to