On Wed, Nov 6, 2024 at 1:49 AM Victor Zhao <victor.z...@amd.com> wrote:
>
> From: Gang Ba <g...@amd.com>
>
> amdgpu_amdkfd_alloc_gtt_mem currently allocates USWC memory.
> It uses write-combining for CPU access, which is slow for reading.
> Add a new parameter to amdgpu_amdkfd_alloc_gtt_mem to allocate
> normal GTT memory.
>
> Signed-off-by: Gang Ba <g...@amd.com>
> Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>
> Signed-off-by: Victor Zhao <victor.z...@amd.com>

Reviewed-by: Alex Deucher <alexander.deuc...@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c             | 7 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h             | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c                | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c        | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c               | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +-
>  7 files changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 713be49540d4..acb762855c24 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -294,7 +294,7 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
>
>  int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
>                                 void **mem_obj, uint64_t *gpu_addr,
> -                               void **cpu_ptr, bool cp_mqd_gfx9)
> +                               void **cpu_ptr, bool cp_mqd_gfx9, bool 
> is_uswc_mode)
>  {
>         struct amdgpu_bo *bo = NULL;
>         struct amdgpu_bo_param bp;
> @@ -305,7 +305,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device 
> *adev, size_t size,
>         bp.size = size;
>         bp.byte_align = PAGE_SIZE;
>         bp.domain = AMDGPU_GEM_DOMAIN_GTT;
> -       bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> +       if (is_uswc_mode)
> +               bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> +       else
> +               bp.flags = 0;
>         bp.type = ttm_bo_type_kernel;
>         bp.resv = NULL;
>         bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 4b80ad860639..745121e0dd8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -234,7 +234,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo 
> *bo,
>  /* Shared API */
>  int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
>                                 void **mem_obj, uint64_t *gpu_addr,
> -                               void **cpu_ptr, bool mqd_gfx9);
> +                               void **cpu_ptr, bool mqd_gfx9, bool 
> is_uswc_mode);
>  void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
>  int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
>                                 void **mem_obj);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 956198da7859..1f1d79ac5e6c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -779,7 +779,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>         if (amdgpu_amdkfd_alloc_gtt_mem(
>                         kfd->adev, size, &kfd->gtt_mem,
>                         &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
> -                       false)) {
> +                       false, true)) {
>                 dev_err(kfd_device, "Could not allocate %d bytes\n", size);
>                 goto alloc_gtt_mem_failure;
>         }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 38c19dc8311d..9672542578d4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -2783,7 +2783,7 @@ static int allocate_hiq_sdma_mqd(struct 
> device_queue_manager *dqm)
>
>         retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
>                 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
> -               (void *)&(mem_obj->cpu_ptr), false);
> +               (void *)&(mem_obj->cpu_ptr), false, true);
>
>         return retval;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index 84e8ea3a8a0c..c9882f1d4419 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -140,7 +140,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node 
> *node,
>                         NUM_XCC(node->xcc_mask),
>                         &(mqd_mem_obj->gtt_mem),
>                         &(mqd_mem_obj->gpu_addr),
> -                       (void *)&(mqd_mem_obj->cpu_ptr), true);
> +                       (void *)&(mqd_mem_obj->cpu_ptr), true, true);
>
>                 if (retval) {
>                         kfree(mqd_mem_obj);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 6bab6fc6a35d..e1d8fd11d2b1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1639,7 +1639,7 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_node *dev,
>                                                 &pdd->proc_ctx_bo,
>                                                 &pdd->proc_ctx_gpu_addr,
>                                                 &pdd->proc_ctx_cpu_ptr,
> -                                               false);
> +                                               false, true);
>                 if (retval) {
>                         dev_err(dev->adev->dev,
>                                 "failed to allocate process context bo\n");
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index c76db22a1000..3a3be0e19fb9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -260,7 +260,7 @@ static int init_user_queue(struct process_queue_manager 
> *pqm,
>                                                 &(*q)->gang_ctx_bo,
>                                                 &(*q)->gang_ctx_gpu_addr,
>                                                 &(*q)->gang_ctx_cpu_ptr,
> -                                               false);
> +                                               false, true);
>                 if (retval) {
>                         pr_err("failed to allocate gang context bo\n");
>                         goto cleanup;
> --
> 2.34.1
>

Reply via email to