On Fri, Apr 11, 2025 at 12:38 PM Khatri, Sunil <sukha...@amd.com> wrote: > > Are we replacing the kfx user queue with KGD userqueue names here? > Also this looks like KFD user queue and KGD userqueue are both treated > at par ?
Yeah, I could split this into two patches, one to rename the variables because they are no longer KFD specific and then the change to add the new function calls for userqs. Alex > > Looks good in general if the above understanding is correct. Some one > with better understanding of isolation should review. > Acked-by: Sunil Khatri <sunil.kha...@amd.com> > > On 4/10/2025 11:41 PM, Alex Deucher wrote: > > Enforce isolation serializes access to the GFX IP. User > > queues are isolated in the MES scheduler, but we still > > need to serialize between kernel queues and user queues. > > For enforce isolation, group KGD user queues with KFD user > > queues. > > > > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 34 ++++++++++++---------- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 ++-- > > 3 files changed, 22 insertions(+), 20 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > index 4e1c97a919cec..3c6679fce7c20 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > @@ -4344,7 +4344,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, > > amdgpu_sync_create(&adev->isolation[i].active); > > amdgpu_sync_create(&adev->isolation[i].prev); > > } > > - mutex_init(&adev->gfx.kfd_sch_mutex); > > + mutex_init(&adev->gfx.userq_sch_mutex); > > mutex_init(&adev->gfx.workload_profile_mutex); > > mutex_init(&adev->vcn.workload_profile_mutex); > > mutex_init(&adev->userq_mutex); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > index a42ac1060fa92..e08323f601535 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > @@ -1928,39 +1928,41 @@ void amdgpu_gfx_cleaner_shader_init(struct > > amdgpu_device *adev, > > static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, > > bool enable) > > { > > - mutex_lock(&adev->gfx.kfd_sch_mutex); > > + mutex_lock(&adev->gfx.userq_sch_mutex); > > > > if (enable) { > > /* If the count is already 0, it means there's an imbalance > > bug somewhere. > > * Note that the bug may be in a different caller than the > > one which triggers the > > * WARN_ON_ONCE. > > */ > > - if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { > > + if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) { > > dev_err(adev->dev, "Attempted to enable KFD scheduler > > when reference count is already zero\n"); > > goto unlock; > > } > > > > - adev->gfx.kfd_sch_req_count[idx]--; > > + adev->gfx.userq_sch_req_count[idx]--; > > > > - if (adev->gfx.kfd_sch_req_count[idx] == 0 && > > - adev->gfx.kfd_sch_inactive[idx]) { > > + if (adev->gfx.userq_sch_req_count[idx] == 0 && > > + adev->gfx.userq_sch_inactive[idx]) { > > > > schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, > > > > msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); > > } > > } else { > > - if (adev->gfx.kfd_sch_req_count[idx] == 0) { > > + if (adev->gfx.userq_sch_req_count[idx] == 0) { > > > > cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); > > - if (!adev->gfx.kfd_sch_inactive[idx]) { > > - amdgpu_amdkfd_stop_sched(adev, idx); > > - adev->gfx.kfd_sch_inactive[idx] = true; > > + if (!adev->gfx.userq_sch_inactive[idx]) { > > + amdgpu_userq_stop_sched(adev, idx); > > + if (adev->kfd.init_complete) > > + amdgpu_amdkfd_stop_sched(adev, idx); > > + adev->gfx.userq_sch_inactive[idx] = true; > > } > > } > > > > - adev->gfx.kfd_sch_req_count[idx]++; > > + adev->gfx.userq_sch_req_count[idx]++; > > } > > > > unlock: > > - mutex_unlock(&adev->gfx.kfd_sch_mutex); > > + mutex_unlock(&adev->gfx.userq_sch_mutex); > > } > > > > /** > > @@ -2005,12 +2007,12 @@ void amdgpu_gfx_enforce_isolation_handler(struct > > work_struct *work) > > msecs_to_jiffies(1)); > > } else { > > /* Tell KFD to resume the runqueue */ > > - if (adev->kfd.init_complete) { > > - WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); > > - WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); > > + WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]); > > + WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]); > > + amdgpu_userq_start_sched(adev, idx); > > + if (adev->kfd.init_complete) > > amdgpu_amdkfd_start_sched(adev, idx); > > - adev->gfx.kfd_sch_inactive[idx] = false; > > - } > > + adev->gfx.userq_sch_inactive[idx] = false; > > } > > mutex_unlock(&adev->enforce_isolation_mutex); > > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > index caaddab31023f..70b64bb1847c9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > @@ -475,9 +475,9 @@ struct amdgpu_gfx { > > bool enable_cleaner_shader; > > struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; > > /* Mutex for synchronizing KFD scheduler operations */ > > - struct mutex kfd_sch_mutex; > > - u64 kfd_sch_req_count[MAX_XCP]; > > - bool kfd_sch_inactive[MAX_XCP]; > > + struct mutex userq_sch_mutex; > > + u64 userq_sch_req_count[MAX_XCP]; > > + bool userq_sch_inactive[MAX_XCP]; > > unsigned long enforce_isolation_jiffies[MAX_XCP]; > > unsigned long enforce_isolation_time[MAX_XCP]; > >