Am 14.03.25 um 05:24 schrieb SRINIVASAN SHANMUGAM:
> On 3/7/2025 7:18 PM, Christian König wrote:
>> Instead of emitting the cleaner shader for every job which has the
>> enforce_isolation flag set only emit it for the first submission from
>> every client.
>>
>> v2: add missing NULL check
>> v3: fix another NULL pointer deref
>>
>> Signed-off-by: Christian König <christian.koe...@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 ++++++++++++++++++++------
>>  1 file changed, 21 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index ef4fe2df8398..dc10bea836db 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -643,6 +643,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>                  bool need_pipe_sync)
>>  {
>>      struct amdgpu_device *adev = ring->adev;
>> +    struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
>>      unsigned vmhub = ring->vm_hub;
>>      struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>      struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>> @@ -650,8 +651,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>      bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>>              job->gds_switch_needed;
>>      bool vm_flush_needed = job->vm_needs_flush;
>> -    struct dma_fence *fence = NULL;
>> +    bool cleaner_shader_needed = false;
>>      bool pasid_mapping_needed = false;
>> +    struct dma_fence *fence = NULL;
>>      unsigned int patch;
>>      int r;
>>  
>> @@ -674,8 +676,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>      pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>              ring->funcs->emit_wreg;
>>  
>> +    cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>> +            ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>> +            &job->base.s_fence->scheduled == isolation->spearhead;

*here*

>> +
>>      if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
>> -        !(job->enforce_isolation && !job->vmid))
>> +        !cleaner_shader_needed)
>>              return 0;
>>  
>>      amdgpu_ring_ib_begin(ring);
>> @@ -686,9 +692,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>      if (need_pipe_sync)
>>              amdgpu_ring_emit_pipeline_sync(ring);
>>  
>> -    if (adev->gfx.enable_cleaner_shader &&
>> -        ring->funcs->emit_cleaner_shader &&
>> -        job->enforce_isolation)
>> +    if (cleaner_shader_needed)
>
> Here should we also need to check, for ring->funcs->emit_cleaner_shader?
>

I moved that up to where cleaner_shader_needed is set. See the *here* above.

That makes it easier to decide if we need fence after the preamble or not.

Regards,
Christian.

> if (cleaner_shader_needed && ring->funcs->emit_cleaner_shader)
>
>>              ring->funcs->emit_cleaner_shader(ring);
>>  
>>      if (vm_flush_needed) {
>> @@ -710,7 +714,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>                                          job->oa_size);
>>      }
>>  
>> -    if (vm_flush_needed || pasid_mapping_needed) {
>> +    if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
>>              r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>>              if (r)
>>                      return r;
>> @@ -732,6 +736,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>              id->pasid_mapping = dma_fence_get(fence);
>>              mutex_unlock(&id_mgr->lock);
>>      }
>> +
>> +    /*
>> +     * Make sure that all other submissions wait for the cleaner shader to
>> +     * finish before we push them to the HW.
>> +     */
>> +    if (cleaner_shader_needed) {
>> +            mutex_lock(&adev->enforce_isolation_mutex);
>> +            dma_fence_put(isolation->spearhead);
>> +            isolation->spearhead = dma_fence_get(fence);
>> +            mutex_unlock(&adev->enforce_isolation_mutex);
>> +    }
>>      dma_fence_put(fence);
>>  
>>      amdgpu_ring_patch_cond_exec(ring, patch);

Reply via email to