Hi Felix,

Thanks for review. I will fix these problems in v2.

Regards,
Ma Jun
On 10/25/2022 5:03 AM, Felix Kuehling wrote:
> On 2022-10-24 07:26, Ma Jun wrote:
>> Init and save the base cu processor id for later use
>>
>> Signed-off-by: Ma Jun <jun....@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c   | 24 +--------------------
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 28 +++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  3 +++
>>   3 files changed, 32 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index d25ac9cbe5b2..4857ec5b9f46 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> @@ -30,26 +30,6 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_amdkfd.h"
>>   
>> -/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
>> - * GPU processor ID are expressed with Bit[31]=1.
>> - * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
>> - * used in the CRAT.
>> - */
>> -static uint32_t gpu_processor_id_low = 0x80001000;
>> -
>> -/* Return the next available gpu_processor_id and increment it for next GPU
>> - *  @total_cu_count - Total CUs present in the GPU including ones
>> - *                    masked off
>> - */
>> -static inline unsigned int get_and_inc_gpu_processor_id(
>> -                            unsigned int total_cu_count)
>> -{
>> -    int current_id = gpu_processor_id_low;
>> -
>> -    gpu_processor_id_low += total_cu_count;
>> -    return current_id;
>> -}
>> -
>>   /* Static table to describe GPU Cache information */
>>   struct kfd_gpu_cache_info {
>>      uint32_t        cache_size;
>> @@ -2223,7 +2203,6 @@ static int kfd_create_vcrat_image_gpu(void 
>> *pcrat_image,
>>      struct crat_subtype_computeunit *cu;
>>      struct kfd_cu_info cu_info;
>>      int avail_size = *size;
>> -    uint32_t total_num_of_cu;
>>      int num_of_cache_entries = 0;
>>      int cache_mem_filled = 0;
>>      uint32_t nid = 0;
>> @@ -2275,8 +2254,7 @@ static int kfd_create_vcrat_image_gpu(void 
>> *pcrat_image,
>>      cu->wave_front_size = cu_info.wave_front_size;
>>      cu->array_count = cu_info.num_shader_arrays_per_engine *
>>              cu_info.num_shader_engines;
>> -    total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
>> -    cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
>> +    cu->processor_id_low = kdev->processor_id_low;
> 
> I don't understand why you can't leave the call to get_and_inc here. You 
> could set kdev->processor_id_low here. Or set it when parsing the CRAT 
> table later. Setting it in kdev before creating the topology seems 
> backwards.
> 
> 
>>      cu->num_cu_per_array = cu_info.num_cu_per_sh;
>>      cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
>>      cu->num_banks = cu_info.num_shader_engines;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index ae1f0be3f3a1..099df4598a42 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -38,6 +38,32 @@
>>   
>>   #define MQD_SIZE_ALIGNED 768
>>   
>> +/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
>> + * GPU processor ID are expressed with Bit[31]=1.
>> + * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
>> + * used in the CRAT.
>> + */
>> +static uint32_t gpu_processor_id_low = 0x80001000;
>> +
>> +/* Return the next available gpu_processor_id and increment it for next GPU
>> + *  @total_cu_count - Total CUs present in the GPU including ones
>> + *                    masked off
>> + */
>> +static inline unsigned int get_and_inc_gpu_processor_id(struct kfd_dev *kfd)
>> +{
>> +    struct amdgpu_device *adev = kfd->adev;
>> +    unsigned int array_count = 0;
>> +    unsigned int total_cu_count = 0;
>> +
>> +    kfd->processor_id_low = gpu_processor_id_low;
>> +
>> +    array_count = adev->gfx.config.max_sh_per_se * 
>> adev->gfx.config.max_shader_engines;
>> +    total_cu_count = array_count * adev->gfx.config.max_cu_per_sh;
>> +
>> +    gpu_processor_id_low += total_cu_count;
> 
> Can this function fun on two threads concurrently? If yes, you may need 
> a lock here.
> > 
>> +    return 0;
> 
> If this function cannot fail, you should make it return void.
> 
> 
>> +}
>> +
>>   /*
>>    * kfd_locked is used to lock the kfd driver during suspend or reset
>>    * once locked, kfd driver will stop any further GPU execution.
>> @@ -656,6 +682,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>   
>>      amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
>>   
>> +    get_and_inc_gpu_processor_id(kfd);
> 
> You're ignoring the return value. Since the function cannot fail, make 
> it void. The name "get_..." implies that it should return something, 
> though, so maybe change the name. E.g. assign_gpu_processor_id.
> 
> Regards,
>    Felix
> 
> 
>> +
>>      if (kfd_topology_add_device(kfd)) {
>>              dev_err(kfd_device, "Error adding device to topology\n");
>>              goto kfd_topology_add_device_error;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 182eb67edbc5..4c06b233472f 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -370,6 +370,9 @@ struct kfd_dev {
>>   
>>      /* Track per device allocated watch points. */
>>      uint32_t alloc_watch_ids;
>> +
>> +    /* cu processor id base */
>> +    unsigned int    processor_id_low;
>>   };
>>   
>>   struct kfd_ipc_obj;

Reply via email to