On 10/22/2024 12:20 PM, Felix Kuehling wrote:
>
> On 2024-10-14 23:51, Zhu Lingshan wrote:
>> This commit specifies data type struct amdkfd_process_info
>> rather than general void* in ralted functions.
> Several interfaces in amdgpu_amdkfd.h use void * as opaque pointers, e.g.
> process_info, mem_obj, drm_priv. The reasons are partly historical because
> KFD used to be in its own kernel module. That said, there is nothing
> fundamentally wrong with opaque pointers when the upper layers doesn't need
> to see the contents in the objects returned by the lower layer.
void * is workable but imperfect, IMHO it representing a compromise that could
ideally be improve especially when we know exactly the data structure type.
This change provides better readability, type safety, compiling checking, and
avoid the castings
>
> It makes me wonder, though, why you're singling out just process_info? Are
> you proposing to change more interfaces to eliminate opaque pointers?
That is because I just happen to read process_info related code, I can surely
improve others if any individuals of them also represents a certain data type.
Thanks
Lingshan
>
> Regards,
> Felix
>
>> kfd_process->kgd_process_info is initialized
>> in init_kfd_vm() by such code:
>>
>> static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>> struct dma_fence **ef) {
>> struct amdkfd_process_info *info = NULL;
>>
>> if (!*process_info) {
>> info = kzalloc(sizeof(*info), GFP_KERNEL);
>>
>> *process_info = info;
>> }
>>
>> That means kfd_process->kgd_process_info is type
>> struct amdkfd_process_info, therefore we should avoid using void*
>>
>> Using a specified data type other than void* can help improve
>> readability. There are other benifits like: type safety,
>> avoid casting and better compling chekings.
>>
>> Signed-off-by: Zhu Lingshan <lingshan....@amd.com>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 10 +++---
>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 31 ++++++++-----------
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
>> 3 files changed, 19 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index f9d119448442..c1346b8c9ab7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -239,8 +239,8 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device
>> *adev, void **mem_obj);
>> int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
>> void **mem_obj);
>> void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
>> -int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem
>> **mem);
>> -int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
>> +int amdgpu_amdkfd_add_gws_to_process(struct amdkfd_process_info *pinfo,
>> void *gws, struct kgd_mem **mem);
>> +int amdgpu_amdkfd_remove_gws_from_process(struct amdkfd_process_info
>> *pinfo, void *mem);
>> uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
>> enum kgd_engine_type type);
>> void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
>> @@ -299,7 +299,7 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct
>> amdgpu_device *adev,
>> struct amdgpu_vm *avm, u32 pasid);
>> int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
>> struct amdgpu_vm *avm,
>> - void **process_info,
>> + struct amdkfd_process_info
>> **process_info,
>> struct dma_fence **ef);
>> void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
>> void *drm_priv);
>> @@ -348,8 +348,8 @@ void
>> amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad
>>
>> bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
>> bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
>> -void amdgpu_amdkfd_block_mmu_notifications(void *p);
>> -int amdgpu_amdkfd_criu_resume(void *p);
>> +void amdgpu_amdkfd_block_mmu_notifications(struct amdkfd_process_info
>> *pinfo);
>> +int amdgpu_amdkfd_criu_resume(struct amdkfd_process_info *pinfo);
>> int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>> uint64_t size, u32 alloc_flag, int8_t xcp_id);
>> void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index ce5ca304dba9..2a1ee17e44a1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -1376,7 +1376,7 @@ static int process_update_pds(struct
>> amdkfd_process_info *process_info,
>> return 0;
>> }
>>
>> -static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>> +static int init_kfd_vm(struct amdgpu_vm *vm, struct amdkfd_process_info
>> **process_info,
>> struct dma_fence **ef)
>> {
>> struct amdkfd_process_info *info = NULL;
>> @@ -1552,7 +1552,7 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct
>> amdgpu_device *adev,
>>
>> int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
>> struct amdgpu_vm *avm,
>> - void **process_info,
>> + struct amdkfd_process_info
>> **process_info,
>> struct dma_fence **ef)
>> {
>> int ret;
>> @@ -1639,19 +1639,16 @@ uint64_t
>> amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
>> return avm->pd_phys_addr;
>> }
>>
>> -void amdgpu_amdkfd_block_mmu_notifications(void *p)
>> +void amdgpu_amdkfd_block_mmu_notifications(struct amdkfd_process_info
>> *pinfo)
>> {
>> - struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
>> -
>> mutex_lock(&pinfo->lock);
>> WRITE_ONCE(pinfo->block_mmu_notifications, true);
>> mutex_unlock(&pinfo->lock);
>> }
>>
>> -int amdgpu_amdkfd_criu_resume(void *p)
>> +int amdgpu_amdkfd_criu_resume(struct amdkfd_process_info *pinfo)
>> {
>> int ret = 0;
>> - struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
>>
>> mutex_lock(&pinfo->lock);
>> pr_debug("scheduling work\n");
>> @@ -3093,13 +3090,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void
>> *info, struct dma_fence __rcu *
>> return ret;
>> }
>>
>> -int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem
>> **mem)
>> +int amdgpu_amdkfd_add_gws_to_process(struct amdkfd_process_info *pinfo,
>> void *gws, struct kgd_mem **mem)
>> {
>> - struct amdkfd_process_info *process_info = (struct amdkfd_process_info
>> *)info;
>> struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
>> int ret;
>>
>> - if (!info || !gws)
>> + if (!pinfo || !gws)
>> return -EINVAL;
>>
>> *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>> @@ -3110,8 +3106,8 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void
>> *gws, struct kgd_mem **mem
>> INIT_LIST_HEAD(&(*mem)->attachments);
>> (*mem)->bo = amdgpu_bo_ref(gws_bo);
>> (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
>> - (*mem)->process_info = process_info;
>> - add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
>> + (*mem)->process_info = pinfo;
>> + add_kgd_mem_to_kfd_bo_list(*mem, pinfo, false);
>> amdgpu_sync_create(&(*mem)->sync);
>>
>>
>> @@ -3136,7 +3132,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void
>> *gws, struct kgd_mem **mem
>> if (ret)
>> goto reserve_shared_fail;
>> dma_resv_add_fence(gws_bo->tbo.base.resv,
>> - &process_info->eviction_fence->base,
>> + &pinfo->eviction_fence->base,
>> DMA_RESV_USAGE_BOOKKEEP);
>> amdgpu_bo_unreserve(gws_bo);
>> mutex_unlock(&(*mem)->process_info->lock);
>> @@ -3149,7 +3145,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void
>> *gws, struct kgd_mem **mem
>> bo_reservation_failure:
>> mutex_unlock(&(*mem)->process_info->lock);
>> amdgpu_sync_free(&(*mem)->sync);
>> - remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
>> + remove_kgd_mem_from_kfd_bo_list(*mem, pinfo);
>> amdgpu_bo_unref(&gws_bo);
>> mutex_destroy(&(*mem)->lock);
>> kfree(*mem);
>> @@ -3157,17 +3153,16 @@ int amdgpu_amdkfd_add_gws_to_process(void *info,
>> void *gws, struct kgd_mem **mem
>> return ret;
>> }
>>
>> -int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
>> +int amdgpu_amdkfd_remove_gws_from_process(struct amdkfd_process_info
>> *pinfo, void *mem)
>> {
>> int ret;
>> - struct amdkfd_process_info *process_info = (struct amdkfd_process_info
>> *)info;
>> struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
>> struct amdgpu_bo *gws_bo = kgd_mem->bo;
>>
>> /* Remove BO from process's validate list so restore worker won't touch
>> * it anymore
>> */
>> - remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
>> + remove_kgd_mem_from_kfd_bo_list(kgd_mem, pinfo);
>>
>> ret = amdgpu_bo_reserve(gws_bo, false);
>> if (unlikely(ret)) {
>> @@ -3176,7 +3171,7 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info,
>> void *mem)
>> return ret;
>> }
>> amdgpu_amdkfd_remove_eviction_fence(gws_bo,
>> - process_info->eviction_fence);
>> + pinfo->eviction_fence);
>> amdgpu_bo_unreserve(gws_bo);
>> amdgpu_sync_free(&kgd_mem->sync);
>> amdgpu_bo_unref(&gws_bo);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index d6530febabad..b0426a1235b8 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -934,7 +934,7 @@ struct kfd_process {
>> bool signal_event_limit_reached;
>>
>> /* Information used for memory eviction */
>> - void *kgd_process_info;
>> + struct amdkfd_process_info *kgd_process_info;> /* Eviction
>> fence that is attached to all the BOs of this process. The
>> * fence will be triggered during eviction and new one will be created
>> * during restore