On 10/22/2024 4:01 PM, Zhu Lingshan wrote: > > On 10/22/2024 12:20 PM, Felix Kuehling wrote: >> On 2024-10-14 23:51, Zhu Lingshan wrote: >>> This commit specifies data type struct amdkfd_process_info >>> rather than general void* in ralted functions. >> Several interfaces in amdgpu_amdkfd.h use void * as opaque pointers, e.g. >> process_info, mem_obj, drm_priv. The reasons are partly historical because >> KFD used to be in its own kernel module. That said, there is nothing >> fundamentally wrong with opaque pointers when the upper layers doesn't need >> to see the contents in the objects returned by the lower layer. > void * is workable but imperfect, IMHO it representing a compromise that > could ideally be improve especially when we know exactly the data structure > type. > This change provides better readability, type safety, compiling checking, and > avoid the castings shall I make any improvements on this patch? >> It makes me wonder, though, why you're singling out just process_info? Are >> you proposing to change more interfaces to eliminate opaque pointers? > That is because I just happen to read process_info related code, I can surely > improve others if any individuals of them also represents a certain data type. do you want me to change all void * opaque pointer in a series or just this one for now?
Thanks Lingshan > > Thanks > Lingshan >> Regards, >> Felix >> >>> kfd_process->kgd_process_info is initialized >>> in init_kfd_vm() by such code: >>> >>> static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, >>> struct dma_fence **ef) { >>> struct amdkfd_process_info *info = NULL; >>> >>> if (!*process_info) { >>> info = kzalloc(sizeof(*info), GFP_KERNEL); >>> >>> *process_info = info; >>> } >>> >>> That means kfd_process->kgd_process_info is type >>> struct amdkfd_process_info, therefore we should avoid using void* >>> >>> Using a specified data type other than void* can help improve >>> readability. There are other benifits like: type safety, >>> avoid casting and better compling chekings. >>> >>> Signed-off-by: Zhu Lingshan <lingshan....@amd.com>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 10 +++--- >>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 31 ++++++++----------- >>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- >>> 3 files changed, 19 insertions(+), 24 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> index f9d119448442..c1346b8c9ab7 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> @@ -239,8 +239,8 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device >>> *adev, void **mem_obj); >>> int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, >>> void **mem_obj); >>> void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); >>> -int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem >>> **mem); >>> -int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); >>> +int amdgpu_amdkfd_add_gws_to_process(struct amdkfd_process_info *pinfo, >>> void *gws, struct kgd_mem **mem); >>> +int amdgpu_amdkfd_remove_gws_from_process(struct amdkfd_process_info >>> *pinfo, void *mem); >>> uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, >>> enum kgd_engine_type type); >>> void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, >>> @@ -299,7 +299,7 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct >>> amdgpu_device *adev, >>> struct amdgpu_vm *avm, u32 pasid); >>> int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, >>> struct amdgpu_vm *avm, >>> - void **process_info, >>> + struct amdkfd_process_info >>> **process_info, >>> struct dma_fence **ef); >>> void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, >>> void *drm_priv); >>> @@ -348,8 +348,8 @@ void >>> amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad >>> >>> bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); >>> bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); >>> -void amdgpu_amdkfd_block_mmu_notifications(void *p); >>> -int amdgpu_amdkfd_criu_resume(void *p); >>> +void amdgpu_amdkfd_block_mmu_notifications(struct amdkfd_process_info >>> *pinfo); >>> +int amdgpu_amdkfd_criu_resume(struct amdkfd_process_info *pinfo); >>> int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, >>> uint64_t size, u32 alloc_flag, int8_t xcp_id); >>> void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> index ce5ca304dba9..2a1ee17e44a1 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> @@ -1376,7 +1376,7 @@ static int process_update_pds(struct >>> amdkfd_process_info *process_info, >>> return 0; >>> } >>> >>> -static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, >>> +static int init_kfd_vm(struct amdgpu_vm *vm, struct amdkfd_process_info >>> **process_info, >>> struct dma_fence **ef) >>> { >>> struct amdkfd_process_info *info = NULL; >>> @@ -1552,7 +1552,7 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct >>> amdgpu_device *adev, >>> >>> int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, >>> struct amdgpu_vm *avm, >>> - void **process_info, >>> + struct amdkfd_process_info >>> **process_info, >>> struct dma_fence **ef) >>> { >>> int ret; >>> @@ -1639,19 +1639,16 @@ uint64_t >>> amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) >>> return avm->pd_phys_addr; >>> } >>> >>> -void amdgpu_amdkfd_block_mmu_notifications(void *p) >>> +void amdgpu_amdkfd_block_mmu_notifications(struct amdkfd_process_info >>> *pinfo) >>> { >>> - struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; >>> - >>> mutex_lock(&pinfo->lock); >>> WRITE_ONCE(pinfo->block_mmu_notifications, true); >>> mutex_unlock(&pinfo->lock); >>> } >>> >>> -int amdgpu_amdkfd_criu_resume(void *p) >>> +int amdgpu_amdkfd_criu_resume(struct amdkfd_process_info *pinfo) >>> { >>> int ret = 0; >>> - struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; >>> >>> mutex_lock(&pinfo->lock); >>> pr_debug("scheduling work\n"); >>> @@ -3093,13 +3090,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void >>> *info, struct dma_fence __rcu * >>> return ret; >>> } >>> >>> -int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem >>> **mem) >>> +int amdgpu_amdkfd_add_gws_to_process(struct amdkfd_process_info *pinfo, >>> void *gws, struct kgd_mem **mem) >>> { >>> - struct amdkfd_process_info *process_info = (struct amdkfd_process_info >>> *)info; >>> struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; >>> int ret; >>> >>> - if (!info || !gws) >>> + if (!pinfo || !gws) >>> return -EINVAL; >>> >>> *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); >>> @@ -3110,8 +3106,8 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void >>> *gws, struct kgd_mem **mem >>> INIT_LIST_HEAD(&(*mem)->attachments); >>> (*mem)->bo = amdgpu_bo_ref(gws_bo); >>> (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; >>> - (*mem)->process_info = process_info; >>> - add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); >>> + (*mem)->process_info = pinfo; >>> + add_kgd_mem_to_kfd_bo_list(*mem, pinfo, false); >>> amdgpu_sync_create(&(*mem)->sync); >>> >>> >>> @@ -3136,7 +3132,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void >>> *gws, struct kgd_mem **mem >>> if (ret) >>> goto reserve_shared_fail; >>> dma_resv_add_fence(gws_bo->tbo.base.resv, >>> - &process_info->eviction_fence->base, >>> + &pinfo->eviction_fence->base, >>> DMA_RESV_USAGE_BOOKKEEP); >>> amdgpu_bo_unreserve(gws_bo); >>> mutex_unlock(&(*mem)->process_info->lock); >>> @@ -3149,7 +3145,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void >>> *gws, struct kgd_mem **mem >>> bo_reservation_failure: >>> mutex_unlock(&(*mem)->process_info->lock); >>> amdgpu_sync_free(&(*mem)->sync); >>> - remove_kgd_mem_from_kfd_bo_list(*mem, process_info); >>> + remove_kgd_mem_from_kfd_bo_list(*mem, pinfo); >>> amdgpu_bo_unref(&gws_bo); >>> mutex_destroy(&(*mem)->lock); >>> kfree(*mem); >>> @@ -3157,17 +3153,16 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, >>> void *gws, struct kgd_mem **mem >>> return ret; >>> } >>> >>> -int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) >>> +int amdgpu_amdkfd_remove_gws_from_process(struct amdkfd_process_info >>> *pinfo, void *mem) >>> { >>> int ret; >>> - struct amdkfd_process_info *process_info = (struct amdkfd_process_info >>> *)info; >>> struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; >>> struct amdgpu_bo *gws_bo = kgd_mem->bo; >>> >>> /* Remove BO from process's validate list so restore worker won't touch >>> * it anymore >>> */ >>> - remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); >>> + remove_kgd_mem_from_kfd_bo_list(kgd_mem, pinfo); >>> >>> ret = amdgpu_bo_reserve(gws_bo, false); >>> if (unlikely(ret)) { >>> @@ -3176,7 +3171,7 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, >>> void *mem) >>> return ret; >>> } >>> amdgpu_amdkfd_remove_eviction_fence(gws_bo, >>> - process_info->eviction_fence); >>> + pinfo->eviction_fence); >>> amdgpu_bo_unreserve(gws_bo); >>> amdgpu_sync_free(&kgd_mem->sync); >>> amdgpu_bo_unref(&gws_bo); >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> index d6530febabad..b0426a1235b8 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> @@ -934,7 +934,7 @@ struct kfd_process { >>> bool signal_event_limit_reached; >>> >>> /* Information used for memory eviction */ >>> - void *kgd_process_info; >>> + struct amdkfd_process_info *kgd_process_info;> /* Eviction >>> fence that is attached to all the BOs of this process. The >>> * fence will be triggered during eviction and new one will be created >>> * during restore