Reviewed-by: Jacek Lawrynowicz <jacek.lawrynow...@linux.intel.com>

On 1/7/2025 6:32 PM, Maciej Falkowski wrote:
> From: Karol Wachowski <karol.wachow...@intel.com>
> 
> With hardware scheduler it is not expected to receive JOB_DONE
> notifications from NPU FW for the jobs aborted due to command queue destroy
> JSM command.
> 
> Remove jobs submitted to unregistered command queue from submitted_jobs_xa
> to avoid triggering a TDR in such case.
> 
> Add explicit submitted_jobs_lock that protects access to list of submitted
> jobs which is now used to find jobs to abort.
> 
> Move context abort procedure to separate work queue not to slow down
> handling of IPCs or DCT requests in case where job abort takes longer,
> especially when destruction of the last job of a specific context results
> in context release.
> 
> Signed-off-by: Karol Wachowski <karol.wachow...@intel.com>
> Signed-off-by: Maciej Falkowski <maciej.falkow...@linux.intel.com>
> ---
>  drivers/accel/ivpu/ivpu_drv.c   |  32 ++------
>  drivers/accel/ivpu/ivpu_drv.h   |   2 +
>  drivers/accel/ivpu/ivpu_job.c   | 137 ++++++++++++++++++++++++--------
>  drivers/accel/ivpu/ivpu_job.h   |   4 +
>  drivers/accel/ivpu/ivpu_mmu.c   |   3 +-
>  drivers/accel/ivpu/ivpu_sysfs.c |   5 +-
>  6 files changed, 121 insertions(+), 62 deletions(-)
> 
> diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
> index f4171536640b..300eea8c305f 100644
> --- a/drivers/accel/ivpu/ivpu_drv.c
> +++ b/drivers/accel/ivpu/ivpu_drv.c
> @@ -36,8 +36,6 @@
>  #define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE
>  #endif
>  
> -static struct lock_class_key submitted_jobs_xa_lock_class_key;
> -
>  int ivpu_dbg_mask;
>  module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
>  MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
> @@ -467,26 +465,6 @@ static const struct drm_driver driver = {
>       .major = 1,
>  };
>  
> -static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
> -{
> -     struct ivpu_file_priv *file_priv;
> -     unsigned long ctx_id;
> -
> -     mutex_lock(&vdev->context_list_lock);
> -
> -     xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
> -             if (!file_priv->has_mmu_faults || file_priv->aborted)
> -                     continue;
> -
> -             mutex_lock(&file_priv->lock);
> -             ivpu_context_abort_locked(file_priv);
> -             file_priv->aborted = true;
> -             mutex_unlock(&file_priv->lock);
> -     }
> -
> -     mutex_unlock(&vdev->context_list_lock);
> -}
> -
>  static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
>  {
>       struct ivpu_device *vdev = arg;
> @@ -500,9 +478,6 @@ static irqreturn_t ivpu_irq_thread_handler(int irq, void 
> *arg)
>               case IVPU_HW_IRQ_SRC_IPC:
>                       ivpu_ipc_irq_thread_handler(vdev);
>                       break;
> -             case IVPU_HW_IRQ_SRC_MMU_EVTQ:
> -                     ivpu_context_abort_invalid(vdev);
> -                     break;
>               case IVPU_HW_IRQ_SRC_DCT:
>                       ivpu_pm_dct_irq_thread_handler(vdev);
>                       break;
> @@ -619,16 +594,21 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
>       xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
>       xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
>       xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
> -     lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, 
> &submitted_jobs_xa_lock_class_key);
>       INIT_LIST_HEAD(&vdev->bo_list);
>  
>       vdev->db_limit.min = IVPU_MIN_DB;
>       vdev->db_limit.max = IVPU_MAX_DB;
>  
> +     INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_thread_handler);
> +
>       ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
>       if (ret)
>               goto err_xa_destroy;
>  
> +     ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock);
> +     if (ret)
> +             goto err_xa_destroy;
> +
>       ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
>       if (ret)
>               goto err_xa_destroy;
> diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
> index 3fdff3f6cffd..ebfcf3e42a3d 100644
> --- a/drivers/accel/ivpu/ivpu_drv.h
> +++ b/drivers/accel/ivpu/ivpu_drv.h
> @@ -137,6 +137,7 @@ struct ivpu_device {
>       struct mutex context_list_lock; /* Protects user context 
> addition/removal */
>       struct xarray context_xa;
>       struct xa_limit context_xa_limit;
> +     struct work_struct context_abort_work;
>  
>       struct xarray db_xa;
>       struct xa_limit db_limit;
> @@ -145,6 +146,7 @@ struct ivpu_device {
>       struct mutex bo_list_lock; /* Protects bo_list */
>       struct list_head bo_list;
>  
> +     struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
>       struct xarray submitted_jobs_xa;
>       struct ivpu_ipc_consumer job_done_consumer;
>  
> diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
> index 43507d3aea51..7fed3c8406ee 100644
> --- a/drivers/accel/ivpu/ivpu_job.c
> +++ b/drivers/accel/ivpu/ivpu_job.c
> @@ -122,6 +122,7 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct 
> ivpu_file_priv *file_priv, u8 p
>  
>       cmdq->priority = priority;
>       cmdq->is_legacy = is_legacy;
> +     cmdq->is_valid = true;
>  
>       ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, 
> file_priv->cmdq_limit,
>                             &file_priv->cmdq_id_next, GFP_KERNEL);
> @@ -130,6 +131,7 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct 
> ivpu_file_priv *file_priv, u8 p
>               goto err_free_cmdq;
>       }
>  
> +     ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, 
> file_priv->ctx.id);
>       return cmdq;
>  
>  err_free_cmdq:
> @@ -247,7 +249,8 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv 
> *file_priv, struct ivpu_cm
>       if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
>               ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, 
> cmdq->id);
>               if (!ret)
> -                     ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", 
> cmdq->id);
> +                     ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx 
> %d\n",
> +                              cmdq->id, file_priv->ctx.id);
>       }
>  
>       ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
> @@ -303,8 +306,8 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire(struct 
> ivpu_file_priv *file_priv, u32
>       lockdep_assert_held(&file_priv->lock);
>  
>       cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id);
> -     if (!cmdq) {
> -             ivpu_err(vdev, "Failed to find command queue with ID: %u\n", 
> cmdq_id);
> +     if (!cmdq || !cmdq->is_valid) {
> +             ivpu_warn_ratelimited(vdev, "Failed to find command queue with 
> ID: %u\n", cmdq_id);
>               return NULL;
>       }
>  
> @@ -356,25 +359,21 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device 
> *vdev)
>       mutex_unlock(&vdev->context_list_lock);
>  }
>  
> -static void ivpu_cmdq_unregister_all(struct ivpu_file_priv *file_priv)
> -{
> -     struct ivpu_cmdq *cmdq;
> -     unsigned long cmdq_id;
> -
> -     xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
> -             ivpu_cmdq_unregister(file_priv, cmdq);
> -}
> -
>  void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
>  {
>       struct ivpu_device *vdev = file_priv->vdev;
> +     struct ivpu_cmdq *cmdq;
> +     unsigned long cmdq_id;
>  
>       lockdep_assert_held(&file_priv->lock);
>  
> -     ivpu_cmdq_unregister_all(file_priv);
> +     xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
> +             ivpu_cmdq_unregister(file_priv, cmdq);
>  
>       if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
>               ivpu_jsm_context_release(vdev, file_priv->ctx.id);
> +
> +     file_priv->aborted = true;
>  }
>  
>  static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
> @@ -513,16 +512,14 @@ static struct ivpu_job 
> *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
>  {
>       struct ivpu_job *job;
>  
> -     xa_lock(&vdev->submitted_jobs_xa);
> -     job = __xa_erase(&vdev->submitted_jobs_xa, job_id);
> +     lockdep_assert_held(&vdev->submitted_jobs_lock);
>  
> +     job = xa_erase(&vdev->submitted_jobs_xa, job_id);
>       if (xa_empty(&vdev->submitted_jobs_xa) && job) {
>               vdev->busy_time = ktime_add(ktime_sub(ktime_get(), 
> vdev->busy_start_ts),
>                                           vdev->busy_time);
>       }
>  
> -     xa_unlock(&vdev->submitted_jobs_xa);
> -
>       return job;
>  }
>  
> @@ -530,6 +527,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device 
> *vdev, u32 job_id, u32
>  {
>       struct ivpu_job *job;
>  
> +     lockdep_assert_held(&vdev->submitted_jobs_lock);
> +
>       job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
>       if (!job)
>               return -ENOENT;
> @@ -548,6 +547,10 @@ static int ivpu_job_signal_and_destroy(struct 
> ivpu_device *vdev, u32 job_id, u32
>       ivpu_stop_job_timeout_detection(vdev);
>  
>       ivpu_rpm_put(vdev);
> +
> +     if (!xa_empty(&vdev->submitted_jobs_xa))
> +             ivpu_start_job_timeout_detection(vdev);
> +
>       return 0;
>  }
>  
> @@ -556,8 +559,26 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
>       struct ivpu_job *job;
>       unsigned long id;
>  
> +     mutex_lock(&vdev->submitted_jobs_lock);
> +
>       xa_for_each(&vdev->submitted_jobs_xa, id, job)
>               ivpu_job_signal_and_destroy(vdev, id, 
> DRM_IVPU_JOB_STATUS_ABORTED);
> +
> +     mutex_unlock(&vdev->submitted_jobs_lock);
> +}
> +
> +void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 
> cmdq_id)
> +{
> +     struct ivpu_job *job;
> +     unsigned long id;
> +
> +     mutex_lock(&vdev->submitted_jobs_lock);
> +
> +     xa_for_each(&vdev->submitted_jobs_xa, id, job)
> +             if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id)
> +                     ivpu_job_signal_and_destroy(vdev, id, 
> DRM_IVPU_JOB_STATUS_ABORTED);
> +
> +     mutex_unlock(&vdev->submitted_jobs_lock);
>  }
>  
>  static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
> @@ -590,15 +611,18 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 
> priority, u32 cmdq_id)
>               goto err_unlock_file_priv;
>       }
>  
> -     xa_lock(&vdev->submitted_jobs_xa);
> +     job->cmdq_id = cmdq->id;
> +
> +     mutex_lock(&vdev->submitted_jobs_lock);
> +
>       is_first_job = xa_empty(&vdev->submitted_jobs_xa);
> -     ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, 
> file_priv->job_limit,
> -                             &file_priv->job_id_next, GFP_KERNEL);
> +     ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, 
> file_priv->job_limit,
> +                           &file_priv->job_id_next, GFP_KERNEL);
>       if (ret < 0) {
>               ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
>                        file_priv->ctx.id);
>               ret = -EBUSY;
> -             goto err_unlock_submitted_jobs_xa;
> +             goto err_unlock_submitted_jobs;
>       }
>  
>       ret = ivpu_cmdq_push_job(cmdq, job);
> @@ -621,19 +645,21 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 
> priority, u32 cmdq_id)
>                job->job_id, file_priv->ctx.id, job->engine_idx, 
> cmdq->priority,
>                job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
>  
> -     xa_unlock(&vdev->submitted_jobs_xa);
> -
> +     mutex_unlock(&vdev->submitted_jobs_lock);
>       mutex_unlock(&file_priv->lock);
>  
> -     if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
> +     if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
> +             mutex_lock(&vdev->submitted_jobs_lock);
>               ivpu_job_signal_and_destroy(vdev, job->job_id, 
> VPU_JSM_STATUS_SUCCESS);
> +             mutex_unlock(&vdev->submitted_jobs_lock);
> +     }
>  
>       return 0;
>  
>  err_erase_xa:
> -     __xa_erase(&vdev->submitted_jobs_xa, job->job_id);
> -err_unlock_submitted_jobs_xa:
> -     xa_unlock(&vdev->submitted_jobs_xa);
> +     xa_erase(&vdev->submitted_jobs_xa, job->job_id);
> +err_unlock_submitted_jobs:
> +     mutex_unlock(&vdev->submitted_jobs_lock);
>  err_unlock_file_priv:
>       mutex_unlock(&file_priv->lock);
>       ivpu_rpm_put(vdev);
> @@ -843,19 +869,33 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *
>  int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct 
> drm_file *file)
>  {
>       struct ivpu_file_priv *file_priv = file->driver_priv;
> +     struct ivpu_device *vdev = file_priv->vdev;
>       struct drm_ivpu_cmdq_destroy *args = data;
>       struct ivpu_cmdq *cmdq;
> +     u32 cmdq_id;
>       int ret = 0;
>  
>       mutex_lock(&file_priv->lock);
>  
>       cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id);
> -     if (!cmdq || cmdq->is_legacy) {
> +     if (!cmdq || !cmdq->is_valid || cmdq->is_legacy) {
>               ret = -ENOENT;
>               goto unlock;
>       }
>  
> +     /*
> +      * There is no way to stop executing jobs per command queue
> +      * in OS scheduling mode, mark command queue as invalid instead
> +      * and it will be freed together with context release.
> +      */
> +     if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) {
> +             cmdq->is_valid = false;
> +             goto unlock;
> +     }
> +
> +     cmdq_id = cmdq->id;
>       ivpu_cmdq_destroy(file_priv, cmdq);
> +     ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id);
>  unlock:
>       mutex_unlock(&file_priv->lock);
>       return ret;
> @@ -866,7 +906,6 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct 
> ivpu_ipc_hdr *ipc_hdr,
>                      struct vpu_jsm_msg *jsm_msg)
>  {
>       struct vpu_ipc_msg_payload_job_done *payload;
> -     int ret;
>  
>       if (!jsm_msg) {
>               ivpu_err(vdev, "IPC message has no JSM payload\n");
> @@ -879,9 +918,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct 
> ivpu_ipc_hdr *ipc_hdr,
>       }
>  
>       payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
> -     ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, 
> payload->job_status);
> -     if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
> -             ivpu_start_job_timeout_detection(vdev);
> +
> +     mutex_lock(&vdev->submitted_jobs_lock);
> +     ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
> +     mutex_unlock(&vdev->submitted_jobs_lock);
>  }
>  
>  void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
> @@ -894,3 +934,36 @@ void ivpu_job_done_consumer_fini(struct ivpu_device 
> *vdev)
>  {
>       ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
>  }
> +
> +void ivpu_context_abort_thread_handler(struct work_struct *work)
> +{
> +     struct ivpu_device *vdev = container_of(work, struct ivpu_device, 
> context_abort_work);
> +     struct ivpu_file_priv *file_priv;
> +     unsigned long ctx_id;
> +     struct ivpu_job *job;
> +     unsigned long id;
> +
> +     mutex_lock(&vdev->context_list_lock);
> +     xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
> +             if (!file_priv->has_mmu_faults || file_priv->aborted)
> +                     continue;
> +
> +             mutex_lock(&file_priv->lock);
> +             ivpu_context_abort_locked(file_priv);
> +             mutex_unlock(&file_priv->lock);
> +     }
> +     mutex_unlock(&vdev->context_list_lock);
> +
> +     if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
> +             return;
> +     /*
> +      * In hardware scheduling mode NPU already has stopped processing jobs
> +      * and won't send us any further notifications, thus we have to free 
> job related resources
> +      * and notify userspace
> +      */
> +     mutex_lock(&vdev->submitted_jobs_lock);
> +     xa_for_each(&vdev->submitted_jobs_xa, id, job)
> +             if (job->file_priv->aborted)
> +                     ivpu_job_signal_and_destroy(vdev, job->job_id, 
> DRM_IVPU_JOB_STATUS_ABORTED);
> +     mutex_unlock(&vdev->submitted_jobs_lock);
> +}
> diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
> index 4d31277bcc41..fef8aed1fc88 100644
> --- a/drivers/accel/ivpu/ivpu_job.h
> +++ b/drivers/accel/ivpu/ivpu_job.h
> @@ -31,6 +31,7 @@ struct ivpu_cmdq {
>       u32 id;
>       u32 db_id;
>       u8 priority;
> +     bool is_valid;
>       bool is_legacy;
>  };
>  
> @@ -51,6 +52,7 @@ struct ivpu_job {
>       struct ivpu_file_priv *file_priv;
>       struct dma_fence *done_fence;
>       u64 cmd_buf_vpu_addr;
> +     u32 cmdq_id;
>       u32 job_id;
>       u32 engine_idx;
>       size_t bo_count;
> @@ -66,9 +68,11 @@ void ivpu_context_abort_locked(struct ivpu_file_priv 
> *file_priv);
>  
>  void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv);
>  void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
> +void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 
> cmdq_id);
>  
>  void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
>  void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
> +void ivpu_context_abort_thread_handler(struct work_struct *work);
>  
>  void ivpu_jobs_abort_all(struct ivpu_device *vdev);
>  
> diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
> index 26ef52fbb93e..21f820dd0c65 100644
> --- a/drivers/accel/ivpu/ivpu_mmu.c
> +++ b/drivers/accel/ivpu/ivpu_mmu.c
> @@ -890,8 +890,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
>               REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
>       }
>  
> -     if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ))
> -             ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
> +     queue_work(system_wq, &vdev->context_abort_work);
>  }
>  
>  void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
> diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
> index 616477fc17fa..8a616791c32f 100644
> --- a/drivers/accel/ivpu/ivpu_sysfs.c
> +++ b/drivers/accel/ivpu/ivpu_sysfs.c
> @@ -30,11 +30,12 @@ npu_busy_time_us_show(struct device *dev, struct 
> device_attribute *attr, char *b
>       struct ivpu_device *vdev = to_ivpu_device(drm);
>       ktime_t total, now = 0;
>  
> -     xa_lock(&vdev->submitted_jobs_xa);
> +     mutex_lock(&vdev->submitted_jobs_lock);
> +
>       total = vdev->busy_time;
>       if (!xa_empty(&vdev->submitted_jobs_xa))
>               now = ktime_sub(ktime_get(), vdev->busy_start_ts);
> -     xa_unlock(&vdev->submitted_jobs_xa);
> +     mutex_unlock(&vdev->submitted_jobs_lock);
>  
>       return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now)));
>  }

Reply via email to