Hi Felix, Thanks for review. I will fix these problems in v2.
Regards, Ma Jun On 10/25/2022 5:03 AM, Felix Kuehling wrote: > On 2022-10-24 07:26, Ma Jun wrote: >> Init and save the base cu processor id for later use >> >> Signed-off-by: Ma Jun <jun....@amd.com> >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 24 +-------------------- >> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 28 +++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +++ >> 3 files changed, 32 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> index d25ac9cbe5b2..4857ec5b9f46 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> @@ -30,26 +30,6 @@ >> #include "amdgpu.h" >> #include "amdgpu_amdkfd.h" >> >> -/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. >> - * GPU processor ID are expressed with Bit[31]=1. >> - * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs >> - * used in the CRAT. >> - */ >> -static uint32_t gpu_processor_id_low = 0x80001000; >> - >> -/* Return the next available gpu_processor_id and increment it for next GPU >> - * @total_cu_count - Total CUs present in the GPU including ones >> - * masked off >> - */ >> -static inline unsigned int get_and_inc_gpu_processor_id( >> - unsigned int total_cu_count) >> -{ >> - int current_id = gpu_processor_id_low; >> - >> - gpu_processor_id_low += total_cu_count; >> - return current_id; >> -} >> - >> /* Static table to describe GPU Cache information */ >> struct kfd_gpu_cache_info { >> uint32_t cache_size; >> @@ -2223,7 +2203,6 @@ static int kfd_create_vcrat_image_gpu(void >> *pcrat_image, >> struct crat_subtype_computeunit *cu; >> struct kfd_cu_info cu_info; >> int avail_size = *size; >> - uint32_t total_num_of_cu; >> int num_of_cache_entries = 0; >> int cache_mem_filled = 0; >> uint32_t nid = 0; >> @@ -2275,8 +2254,7 @@ static int kfd_create_vcrat_image_gpu(void >> *pcrat_image, >> cu->wave_front_size = cu_info.wave_front_size; >> cu->array_count = cu_info.num_shader_arrays_per_engine * >> cu_info.num_shader_engines; >> - total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); >> - cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); >> + cu->processor_id_low = kdev->processor_id_low; > > I don't understand why you can't leave the call to get_and_inc here. You > could set kdev->processor_id_low here. Or set it when parsing the CRAT > table later. Setting it in kdev before creating the topology seems > backwards. > > >> cu->num_cu_per_array = cu_info.num_cu_per_sh; >> cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; >> cu->num_banks = cu_info.num_shader_engines; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> index ae1f0be3f3a1..099df4598a42 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> @@ -38,6 +38,32 @@ >> >> #define MQD_SIZE_ALIGNED 768 >> >> +/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. >> + * GPU processor ID are expressed with Bit[31]=1. >> + * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs >> + * used in the CRAT. >> + */ >> +static uint32_t gpu_processor_id_low = 0x80001000; >> + >> +/* Return the next available gpu_processor_id and increment it for next GPU >> + * @total_cu_count - Total CUs present in the GPU including ones >> + * masked off >> + */ >> +static inline unsigned int get_and_inc_gpu_processor_id(struct kfd_dev *kfd) >> +{ >> + struct amdgpu_device *adev = kfd->adev; >> + unsigned int array_count = 0; >> + unsigned int total_cu_count = 0; >> + >> + kfd->processor_id_low = gpu_processor_id_low; >> + >> + array_count = adev->gfx.config.max_sh_per_se * >> adev->gfx.config.max_shader_engines; >> + total_cu_count = array_count * adev->gfx.config.max_cu_per_sh; >> + >> + gpu_processor_id_low += total_cu_count; > > Can this function fun on two threads concurrently? If yes, you may need > a lock here. > > >> + return 0; > > If this function cannot fail, you should make it return void. > > >> +} >> + >> /* >> * kfd_locked is used to lock the kfd driver during suspend or reset >> * once locked, kfd driver will stop any further GPU execution. >> @@ -656,6 +682,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, >> >> amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); >> >> + get_and_inc_gpu_processor_id(kfd); > > You're ignoring the return value. Since the function cannot fail, make > it void. The name "get_..." implies that it should return something, > though, so maybe change the name. E.g. assign_gpu_processor_id. > > Regards, > Felix > > >> + >> if (kfd_topology_add_device(kfd)) { >> dev_err(kfd_device, "Error adding device to topology\n"); >> goto kfd_topology_add_device_error; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index 182eb67edbc5..4c06b233472f 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -370,6 +370,9 @@ struct kfd_dev { >> >> /* Track per device allocated watch points. */ >> uint32_t alloc_watch_ids; >> + >> + /* cu processor id base */ >> + unsigned int processor_id_low; >> }; >> >> struct kfd_ipc_obj;