Hi Andrey, Good catch. I guess since both Christian and I were working on it parallelly we didn't catch this problem.
If it is only causing a problem in push_job then we can handle it something like this: ---------------------------------------------------------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 05dc6ecd4003..f344ce32f128 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, first = spsc_queue_count(&entity->job_queue) == 0; reschedule = idle && first && (entity->num_rq_list > 1); + if (first && list_empty(&entity->list)) { + DRM_ERROR("Trying to push to a killed entity\n"); + return; + } + if (reschedule) { rq = drm_sched_entity_get_free_sched(entity); spin_lock(&entity->rq_lock); @@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, if (first) { /* Add the entity to the run queue */ spin_lock(&entity->rq_lock); - if (!entity->rq) { - DRM_ERROR("Trying to push to a killed entity\n"); - spin_unlock(&entity->rq_lock); - return; - } drm_sched_rq_add_entity(entity->rq, entity); spin_unlock(&entity->rq_lock); drm_sched_wakeup(entity->rq->sched); ----------------------------------------------------------------------------------------------------------------------------- On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky <andrey.grodzov...@amd.com> wrote: > Thinking again about this change and 53d5f1e drm/scheduler: move idle > entities to scheduler with less load v2 > > Looks to me like use case for which fc9a539 drm/scheduler: add NULL > pointer check for run queue (v2) was done > > will not work anymore. > > First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will never > be true any more since we stopped entity->rq to NULL in > > drm_sched_entity_flush. > > Second of all, even after we removed the entity from rq in > drm_sched_entity_flush to terminate any subsequent submissions > > to the queue the other thread doing push job can just acquire again a > run queue > > from drm_sched_entity_get_free_sched and continue submission so you > can't substitute ' if (!entity->rq)' to 'if (list_empty(&entity->list))'. > > What about adding a 'stopped' flag to drm_sched_entity to be set in > drm_sched_entity_flush and in > > But it might be worth adding a stopped flag field if it is required at other places as well. Thanks, Nayan > drm_sched_entity_push_job check for 'if (!entity->stopped)' instead of > ' if (!entity->rq)' ? > > Andrey > > > On 07/30/2018 07:03 AM, Christian König wrote: > > We removed the redundancy of having an extra scheduler field, so we > > can't set the rq to NULL any more or otherwise won't know which > > scheduler to use for the cleanup. > > > > Just remove the entity from the scheduling list instead. > > > > Signed-off-by: Christian König <christian.koe...@amd.com> > > --- > > drivers/gpu/drm/scheduler/gpu_scheduler.c | 35 > +++++++------------------------ > > 1 file changed, 8 insertions(+), 27 deletions(-) > > > > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c > b/drivers/gpu/drm/scheduler/gpu_scheduler.c > > index f563e4fbb4b6..1b733229201e 100644 > > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c > > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c > > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity > *entity, > > } > > EXPORT_SYMBOL(drm_sched_entity_init); > > > > -/** > > - * drm_sched_entity_is_initialized - Query if entity is initialized > > - * > > - * @sched: Pointer to scheduler instance > > - * @entity: The pointer to a valid scheduler entity > > - * > > - * return true if entity is initialized, false otherwise > > -*/ > > -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler > *sched, > > - struct drm_sched_entity > *entity) > > -{ > > - return entity->rq != NULL && > > - entity->rq->sched == sched; > > -} > > - > > /** > > * drm_sched_entity_is_idle - Check if entity is idle > > * > > @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct > drm_sched_entity *entity) > > { > > rmb(); > > > > - if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL) > > + if (list_empty(&entity->list) || > > + spsc_queue_peek(&entity->job_queue) == NULL) > > return true; > > > > return false; > > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity > *entity, long timeout) > > long ret = timeout; > > > > sched = entity->rq->sched; > > - if (!drm_sched_entity_is_initialized(sched, entity)) > > - return ret; > > /** > > * The client will not queue more IBs during this fini, consume > existing > > * queued IBs or discard them on SIGKILL > > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity > *entity, long timeout) > > last_user = cmpxchg(&entity->last_user, current->group_leader, > NULL); > > if ((!last_user || last_user == current->group_leader) && > > (current->flags & PF_EXITING) && (current->exit_code == > SIGKILL)) > > - drm_sched_entity_set_rq(entity, NULL); > > + drm_sched_rq_remove_entity(entity->rq, entity); > > > > return ret; > > } > > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity > *entity) > > struct drm_gpu_scheduler *sched; > > > > sched = entity->rq->sched; > > - drm_sched_entity_set_rq(entity, NULL); > > + drm_sched_rq_remove_entity(entity->rq, entity); > > > > /* Consumption of existing IBs wasn't completed. Forcefully > > * remove them here. > > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct > drm_sched_entity *entity, > > if (entity->rq == rq) > > return; > > > > - spin_lock(&entity->rq_lock); > > - > > - if (entity->rq) > > - drm_sched_rq_remove_entity(entity->rq, entity); > > + BUG_ON(!rq); > > > > + spin_lock(&entity->rq_lock); > > + drm_sched_rq_remove_entity(entity->rq, entity); > > entity->rq = rq; > > - if (rq) > > - drm_sched_rq_add_entity(rq, entity); > > - > > + drm_sched_rq_add_entity(rq, entity); > > spin_unlock(&entity->rq_lock); > > } > > EXPORT_SYMBOL(drm_sched_entity_set_rq); > >
_______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel