[AMD Official Use Only - AMD Internal Distribution Only]

Yes, I agree. I’ve just been notified that this memory configuration is a 
mistake rather than a valid user case. So the fix is low priority for now.

-----Original Message-----
From: Limonciello, Mario <[email protected]>
Sent: Friday, March 20, 2026 11:14 AM
To: Zhang, Yifan <[email protected]>; [email protected]
Cc: Deucher, Alexander <[email protected]>; Koenig, Christian 
<[email protected]>; Limonciello, Mario <[email protected]>; 
Yuan, Perry <[email protected]>
Subject: Re: [PATCH v2] drm/amdkfd: check system memory when set apu_prefer_gtt



On 3/19/2026 2:32 AM, Yifan Zhang wrote:
> Current apu_prefer_gtt setting only check gtt_size, which could be set
> by user to a larger than system memory value (via ttm modules
> parameter pages_limit). E.g. carveout vram 32GB, gtt_size 50GB (via
> ttm modules parameter pages_limit), system memory 31GB. In that case,
> apu_prefer_gtt will be set incorrectly. Take system memory into
> account when set apu_prefer_gtt.
>

Wouldn't it be cleaner to do this in TTM?  IE test that a bad option was set by 
user pages_limit value and then show something like:

if (user > possible) {
     pr_warn("Requested invalid %d pages, limiting to %d pages", user, 
possible);
     user = possible;
}

Then we can always trust what we get from TTM.

> Signed-off-by: Yifan Zhang <[email protected]>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c       | 2 --
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c          | 7 ++++++-
>   4 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 3bfd79c89df3..a6ee9d9bfafb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -170,8 +170,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>       int i;
>       int last_valid_bit;
>
> -     amdgpu_amdkfd_gpuvm_init_mem_limits();
> -
>       if (adev->kfd.dev) {
>               struct kgd2kfd_shared_resources gpu_resources = {
>                       .compute_vmid_bitmap =
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index cdbab7f8cee8..13cada7da4a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -369,7 +369,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct
> amdgpu_device *adev, int xcp_id);
>
>
>   #if IS_ENABLED(CONFIG_HSA_AMD)
> -void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
> +uint64_t amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>   void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>                               struct amdgpu_vm *vm);
>
> @@ -382,7 +382,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
>   void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
>   #else
>   static inline
> -void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
> +uint64_t amdgpu_amdkfd_gpuvm_init_mem_limits(void)
>   {
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 8a869fe41acd..4fba7d2f34a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -109,13 +109,13 @@ static bool reuse_dmamap(struct amdgpu_device *adev, 
> struct amdgpu_device *bo_ad
>    *  System (TTM + userptr) memory - 15/16th System RAM
>    *  TTM memory - 3/8th System RAM
>    */
> -void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
> +uint64_t amdgpu_amdkfd_gpuvm_init_mem_limits(void)
>   {
>       struct sysinfo si;
>       uint64_t mem;
>
>       if (kfd_mem_limit.max_system_mem_limit)
> -             return;
> +             return kfd_mem_limit.max_system_mem_limit;
>
>       si_meminfo(&si);
>       mem = si.totalram - si.totalhigh;
> @@ -132,6 +132,8 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
>       pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
>               (kfd_mem_limit.max_system_mem_limit >> 20),
>               (kfd_mem_limit.max_ttm_mem_limit >> 20));
> +
> +     return kfd_mem_limit.max_system_mem_limit;
>   }
>
>   void amdgpu_amdkfd_reserve_system_mem(uint64_t size) diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 714fd8d12ca5..df98ece071e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2071,6 +2071,7 @@ static void amdgpu_ttm_buffer_entity_fini(struct 
> amdgpu_gtt_mgr *mgr,
>   int amdgpu_ttm_init(struct amdgpu_device *adev)
>   {
>       uint64_t gtt_size;
> +     uint64_t max_system_mem_limit;
>       int r;
>
>       dma_set_max_seg_size(adev->dev, UINT_MAX); @@ -2210,8 +2211,12 @@
> int amdgpu_ttm_init(struct amdgpu_device *adev)
>       dev_info(adev->dev, " %uM of GTT memory ready.\n",
>                (unsigned int)(gtt_size / (1024 * 1024)));
>
> +
> +     max_system_mem_limit = amdgpu_amdkfd_gpuvm_init_mem_limits();
> +
>       if (adev->flags & AMD_IS_APU) {
> -             if (adev->gmc.real_vram_size < gtt_size)
> +             if (adev->gmc.real_vram_size < gtt_size &&
> +                     adev->gmc.real_vram_size < max_system_mem_limit)
>                       adev->apu_prefer_gtt = true;
>       }
>

Reply via email to