AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, and almost all of them are not displayable. Shouldn't we use a different way to indicate that we need a non-power-of-two alignment, such as looking at the alignment field directly?
Marek On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam < arunpravin.paneersel...@amd.com> wrote: > Add address alignment support to the DCC VRAM buffers. > > v2: > - adjust size based on the max_texture_channel_caches values > only for GFX12 DCC buffers. > - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only > for DCC buffers. > - roundup non power of two DCC buffer adjusted size to nearest > power of two number as the buddy allocator does not support non > power of two alignments. This applies only to the contiguous > DCC buffers. > > v3:(Alex) > - rewrite the max texture channel caches comparison code in an > algorithmic way to determine the alignment size. > > v4:(Alex) > - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c > and add a new gmc func callback for dcc alignment. If the callback > is non-NULL, call it to get the alignment, otherwise, use the default. > > v5:(Alex) > - Set the Alignment to a default value if the callback doesn't exist. > - Add the callback to amdgpu_gmc_funcs. > > v6: > - Fix checkpatch error reported by Intel CI. > > Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com> > Acked-by: Alex Deucher <alexander.deuc...@amd.com> > Acked-by: Christian König <christian.koe...@amd.com> > Reviewed-by: Frank Min <frank....@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 ++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++ > 3 files changed, 55 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index febca3130497..654d0548a3f8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { > uint64_t addr, uint64_t *flags); > /* get the amount of memory used by the vbios for pre-OS console */ > unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); > + /* get the DCC buffer alignment */ > + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); > > enum amdgpu_memory_partition (*query_mem_partition_mode)( > struct amdgpu_device *adev); > @@ -363,6 +365,10 @@ struct amdgpu_gmc { > (adev)->gmc.gmc_funcs->override_vm_pte_flags \ > ((adev), (vm), (addr), (pte_flags)) > #define amdgpu_gmc_get_vbios_fb_size(adev) > (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) > +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ > + typeof(_adev) (adev) = (_adev); \ > + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ > +}) > > /** > * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through > the BAR > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index f91cc149d06c..aa9dca12371c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > > remaining_size = (u64)vres->base.size; > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { > + u64 adjust_size; > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + adjust_size = amdgpu_gmc_get_dcc_alignment(adev); > + remaining_size = roundup_pow_of_two(remaining_size > + adjust_size); > + vres->flags |= DRM_BUDDY_TRIM_DISABLE; > + } > + } > > mutex_lock(&mgr->lock); > while (remaining_size) { > @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > min_block_size = mgr->default_page_size; > > size = remaining_size; > - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) > + > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) > + min_block_size = size; > + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > + !(size & (((u64)pages_per_block << PAGE_SHIFT) - > 1))) > min_block_size = (u64)pages_per_block << > PAGE_SHIFT; > > BUG_ON(min_block_size < mm->chunk_size); > @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > } > mutex_unlock(&mgr->lock); > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { > + struct drm_buddy_block *dcc_block; > + u64 dcc_start, alignment; > + > + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); > + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + alignment = amdgpu_gmc_get_dcc_alignment(adev); > + /* Adjust the start address for DCC buffers only */ > + dcc_start = roundup(dcc_start, alignment); > + drm_buddy_block_trim(mm, &dcc_start, > + (u64)vres->base.size, > + &vres->blocks); > + } > + } > + > vres->base.start = 0; > size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), > vres->base.size); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > index fd3ac483760e..4259edcdec8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > @@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct > amdgpu_device *adev) > return 0; > } > > +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) > +{ > + u64 max_tex_channel_caches, alignment; > + > + max_tex_channel_caches = > adev->gfx.config.max_texture_channel_caches; > + if (is_power_of_2(max_tex_channel_caches)) > + alignment = (max_tex_channel_caches / SZ_4) * > max_tex_channel_caches; > + else > + alignment = roundup_pow_of_two(max_tex_channel_caches) * > + max_tex_channel_caches; > + > + return (u64)alignment * SZ_1K; > +} > + > static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, > .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, > @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs > gmc_v12_0_gmc_funcs = { > .get_vm_pde = gmc_v12_0_get_vm_pde, > .get_vm_pte = gmc_v12_0_get_vm_pte, > .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, > + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, > }; > > static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) > -- > 2.25.1 > >