On 10/22/2024 4:01 PM, Zhu Lingshan wrote:
>
> On 10/22/2024 12:20 PM, Felix Kuehling wrote:
>> On 2024-10-14 23:51, Zhu Lingshan wrote:
>>> This commit specifies data type struct amdkfd_process_info
>>> rather than general void* in ralted functions.
>> Several interfaces in amdgpu_amdkfd.h use void * as opaque pointers, e.g. 
>> process_info, mem_obj, drm_priv. The reasons are partly historical because 
>> KFD used to be in its own kernel module. That said, there is nothing 
>> fundamentally wrong with opaque pointers when the upper layers doesn't need 
>> to see the contents in the objects returned by the lower layer.
> void * is workable but imperfect, IMHO it representing a compromise that 
> could ideally be improve especially when we know exactly the data structure 
> type.
> This change provides better readability, type safety, compiling checking, and 
> avoid the castings
shall I make any improvements on this patch?
>> It makes me wonder, though, why you're singling out just process_info? Are 
>> you proposing to change more interfaces to eliminate opaque pointers?
> That is because I just happen to read process_info related code, I can surely 
> improve others if any individuals of them also represents a certain data type.
do you want me to change all void * opaque pointer in a series or just this one 
for now?

Thanks
Lingshan
>
> Thanks
> Lingshan
>> Regards,
>>   Felix
>>
>>> kfd_process->kgd_process_info is initialized
>>> in init_kfd_vm() by such code:
>>>
>>> static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>>>                        struct dma_fence **ef) {
>>>         struct amdkfd_process_info *info = NULL;
>>>
>>>         if (!*process_info) {
>>>                 info = kzalloc(sizeof(*info), GFP_KERNEL);
>>>
>>>         *process_info = info;
>>> }
>>>
>>> That means kfd_process->kgd_process_info is type
>>> struct amdkfd_process_info, therefore we should avoid using void*
>>>
>>> Using a specified data type other than void* can help improve
>>> readability. There are other benifits like: type safety,
>>> avoid casting and better compling chekings.
>>>
>>> Signed-off-by: Zhu Lingshan <lingshan....@amd.com>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    | 10 +++---
>>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 31 ++++++++-----------
>>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
>>>  3 files changed, 19 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index f9d119448442..c1346b8c9ab7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -239,8 +239,8 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device 
>>> *adev, void **mem_obj);
>>>  int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
>>>                             void **mem_obj);
>>>  void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
>>> -int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem 
>>> **mem);
>>> -int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
>>> +int amdgpu_amdkfd_add_gws_to_process(struct amdkfd_process_info *pinfo, 
>>> void *gws, struct kgd_mem **mem);
>>> +int amdgpu_amdkfd_remove_gws_from_process(struct amdkfd_process_info 
>>> *pinfo, void *mem);
>>>  uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
>>>                                   enum kgd_engine_type type);
>>>  void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
>>> @@ -299,7 +299,7 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct 
>>> amdgpu_device *adev,
>>>                                  struct amdgpu_vm *avm, u32 pasid);
>>>  int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
>>>                                     struct amdgpu_vm *avm,
>>> -                                   void **process_info,
>>> +                                   struct amdkfd_process_info 
>>> **process_info,
>>>                                     struct dma_fence **ef);
>>>  void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
>>>                                     void *drm_priv);
>>> @@ -348,8 +348,8 @@ void 
>>> amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad
>>>  
>>>  bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
>>>  bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
>>> -void amdgpu_amdkfd_block_mmu_notifications(void *p);
>>> -int amdgpu_amdkfd_criu_resume(void *p);
>>> +void amdgpu_amdkfd_block_mmu_notifications(struct amdkfd_process_info 
>>> *pinfo);
>>> +int amdgpu_amdkfd_criu_resume(struct amdkfd_process_info *pinfo);
>>>  int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>>             uint64_t size, u32 alloc_flag, int8_t xcp_id);
>>>  void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index ce5ca304dba9..2a1ee17e44a1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -1376,7 +1376,7 @@ static int process_update_pds(struct 
>>> amdkfd_process_info *process_info,
>>>     return 0;
>>>  }
>>>  
>>> -static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>>> +static int init_kfd_vm(struct amdgpu_vm *vm, struct amdkfd_process_info 
>>> **process_info,
>>>                    struct dma_fence **ef)
>>>  {
>>>     struct amdkfd_process_info *info = NULL;
>>> @@ -1552,7 +1552,7 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct 
>>> amdgpu_device *adev,
>>>  
>>>  int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
>>>                                        struct amdgpu_vm *avm,
>>> -                                      void **process_info,
>>> +                                      struct amdkfd_process_info 
>>> **process_info,
>>>                                        struct dma_fence **ef)
>>>  {
>>>     int ret;
>>> @@ -1639,19 +1639,16 @@ uint64_t 
>>> amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
>>>     return avm->pd_phys_addr;
>>>  }
>>>  
>>> -void amdgpu_amdkfd_block_mmu_notifications(void *p)
>>> +void amdgpu_amdkfd_block_mmu_notifications(struct amdkfd_process_info 
>>> *pinfo)
>>>  {
>>> -   struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
>>> -
>>>     mutex_lock(&pinfo->lock);
>>>     WRITE_ONCE(pinfo->block_mmu_notifications, true);
>>>     mutex_unlock(&pinfo->lock);
>>>  }
>>>  
>>> -int amdgpu_amdkfd_criu_resume(void *p)
>>> +int amdgpu_amdkfd_criu_resume(struct amdkfd_process_info *pinfo)
>>>  {
>>>     int ret = 0;
>>> -   struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
>>>  
>>>     mutex_lock(&pinfo->lock);
>>>     pr_debug("scheduling work\n");
>>> @@ -3093,13 +3090,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void 
>>> *info, struct dma_fence __rcu *
>>>     return ret;
>>>  }
>>>  
>>> -int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem 
>>> **mem)
>>> +int amdgpu_amdkfd_add_gws_to_process(struct amdkfd_process_info *pinfo, 
>>> void *gws, struct kgd_mem **mem)
>>>  {
>>> -   struct amdkfd_process_info *process_info = (struct amdkfd_process_info 
>>> *)info;
>>>     struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
>>>     int ret;
>>>  
>>> -   if (!info || !gws)
>>> +   if (!pinfo || !gws)
>>>             return -EINVAL;
>>>  
>>>     *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>> @@ -3110,8 +3106,8 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void 
>>> *gws, struct kgd_mem **mem
>>>     INIT_LIST_HEAD(&(*mem)->attachments);
>>>     (*mem)->bo = amdgpu_bo_ref(gws_bo);
>>>     (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
>>> -   (*mem)->process_info = process_info;
>>> -   add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
>>> +   (*mem)->process_info = pinfo;
>>> +   add_kgd_mem_to_kfd_bo_list(*mem, pinfo, false);
>>>     amdgpu_sync_create(&(*mem)->sync);
>>>  
>>>  
>>> @@ -3136,7 +3132,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void 
>>> *gws, struct kgd_mem **mem
>>>     if (ret)
>>>             goto reserve_shared_fail;
>>>     dma_resv_add_fence(gws_bo->tbo.base.resv,
>>> -                      &process_info->eviction_fence->base,
>>> +                      &pinfo->eviction_fence->base,
>>>                        DMA_RESV_USAGE_BOOKKEEP);
>>>     amdgpu_bo_unreserve(gws_bo);
>>>     mutex_unlock(&(*mem)->process_info->lock);
>>> @@ -3149,7 +3145,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void 
>>> *gws, struct kgd_mem **mem
>>>  bo_reservation_failure:
>>>     mutex_unlock(&(*mem)->process_info->lock);
>>>     amdgpu_sync_free(&(*mem)->sync);
>>> -   remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
>>> +   remove_kgd_mem_from_kfd_bo_list(*mem, pinfo);
>>>     amdgpu_bo_unref(&gws_bo);
>>>     mutex_destroy(&(*mem)->lock);
>>>     kfree(*mem);
>>> @@ -3157,17 +3153,16 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, 
>>> void *gws, struct kgd_mem **mem
>>>     return ret;
>>>  }
>>>  
>>> -int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
>>> +int amdgpu_amdkfd_remove_gws_from_process(struct amdkfd_process_info 
>>> *pinfo, void *mem)
>>>  {
>>>     int ret;
>>> -   struct amdkfd_process_info *process_info = (struct amdkfd_process_info 
>>> *)info;
>>>     struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
>>>     struct amdgpu_bo *gws_bo = kgd_mem->bo;
>>>  
>>>     /* Remove BO from process's validate list so restore worker won't touch
>>>      * it anymore
>>>      */
>>> -   remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
>>> +   remove_kgd_mem_from_kfd_bo_list(kgd_mem, pinfo);
>>>  
>>>     ret = amdgpu_bo_reserve(gws_bo, false);
>>>     if (unlikely(ret)) {
>>> @@ -3176,7 +3171,7 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, 
>>> void *mem)
>>>             return ret;
>>>     }
>>>     amdgpu_amdkfd_remove_eviction_fence(gws_bo,
>>> -                   process_info->eviction_fence);
>>> +                   pinfo->eviction_fence);
>>>     amdgpu_bo_unreserve(gws_bo);
>>>     amdgpu_sync_free(&kgd_mem->sync);
>>>     amdgpu_bo_unref(&gws_bo);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index d6530febabad..b0426a1235b8 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -934,7 +934,7 @@ struct kfd_process {
>>>     bool signal_event_limit_reached;
>>>  
>>>     /* Information used for memory eviction */
>>> -   void *kgd_process_info;
>>> +   struct amdkfd_process_info *kgd_process_info;>          /* Eviction 
>>> fence that is attached to all the BOs of this process. The
>>>      * fence will be triggered during eviction and new one will be created
>>>      * during restore

Reply via email to