[AMD Official Use Only - AMD Internal Distribution Only] Hi Christian,
Thank you for the review and the feedback. I will update the patch according to your feedback. Please see my 2 inline comments below. Regards Sam > From: Christian König <ckoenig.leichtzumer...@gmail.com> > Date: Wednesday, April 16, 2025 at 21:52 > To: Zhang, GuoQing (Sam) <guoqing.zh...@amd.com>, > amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org> > Cc: Zhao, Victor <victor.z...@amd.com>, Chang, HaiJun <haijun.ch...@amd.com>, > Deng, Emily <emily.d...@amd.com> > Subject: Re: [PATCH 4/6] drm/amdgpu: enable pdb0 for hibernation on SRIOV > Am 14.04.25 um 12:46 schrieb Samuel Zhang: > > When switching to new GPU index after hibernation and then resume, > > VRAM offset of each VRAM BO will be changed, and the cached gpu > > addresses needed to updated. > > > > This is to enable pdb0 and switch to use pdb0-based virtual gpu > > address by default in amdgpu_bo_create_reserved(). since the virtual > > addresses do not change, this can avoid the need to update all > > cached gpu addresses all over the codebase. > > > > Signed-off-by: Emily Deng <emily.d...@amd.com> > > Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com> > > Change-Id: I2b20b9b94f1e41820a013ce5d05bb3fa96859b21 > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 43 +++++++++++++++------- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++- > > drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +++++++++------ > > drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 30 ++++++++++++--- > > 6 files changed, 82 insertions(+), 31 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > > index 5b60d714e089..e706afcb7e95 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > > @@ -248,18 +248,25 @@ void amdgpu_gmc_vram_location(struct amdgpu_device > > *adev, struct amdgpu_gmc *mc, > > void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct > > amdgpu_gmc *mc) > > { > > u64 hive_vram_start = 0; > > - u64 hive_vram_end = mc->xgmi.node_segment_size * > > mc->xgmi.num_physical_nodes - 1; > > - mc->vram_start = mc->xgmi.node_segment_size * > > mc->xgmi.physical_node_id; > > - mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; > > + u64 hive_vram_end = mc->xgmi.node_segment_size * > > mc->xgmi.num_physical_nodes; > > + > > + hive_vram_end = ALIGN(hive_vram_end, > > (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21) - 1; > > + > > + if (!mc->vram_start) { > > + mc->vram_start = mc->xgmi.node_segment_size * > > mc->xgmi.physical_node_id; > > + mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - > > 1; > > + dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM > > used)\n", > > + mc->mc_vram_size >> 20, mc->vram_start, > > + mc->vram_end, mc->real_vram_size >> 20); > > + } > > + > > mc->gart_start = hive_vram_end + 1; > > mc->gart_end = mc->gart_start + mc->gart_size - 1; > > mc->fb_start = hive_vram_start; > > mc->fb_end = hive_vram_end; > > - dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM > > used)\n", > > - mc->mc_vram_size >> 20, mc->vram_start, > > - mc->vram_end, mc->real_vram_size >> 20); > > - dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", > > - mc->gart_size >> 20, mc->gart_start, mc->gart_end); > > + > > + dev_info(adev->dev, "FB 0x%016llX - 0x%016llX, GART: %lluM 0x%016llX > > - 0x%016llX\n", > > + mc->fb_start, mc->fb_end, mc->gart_size >> 20, > > mc->gart_start, mc->gart_end); > > } > > > > /** > > @@ -677,8 +684,9 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device > > *adev, uint32_t vmid, > > &job); > > if (r) > > goto error_alloc; > > - > > - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); > > + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ? > > + adev->gmc.pdb0_bo : > > + adev->gart.bo); > > job->vm_needs_flush = true; > > job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; > > amdgpu_ring_pad_ib(ring, &job->ibs[0]); > > @@ -1041,8 +1049,9 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) > > */ > > u64 vram_size = adev->gmc.xgmi.node_segment_size * > > adev->gmc.xgmi.num_physical_nodes; > > u64 pde0_page_size = > > (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21; > > - u64 vram_addr = adev->vm_manager.vram_base_offset - > > + u64 vram_addr_first = adev->vm_manager.vram_base_offset - > > adev->gmc.xgmi.physical_node_id * > > adev->gmc.xgmi.node_segment_size; > > + u64 vram_addr = adev->vm_manager.vram_base_offset; > > u64 vram_end = vram_addr + vram_size; > > u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); > > int idx; > > @@ -1056,11 +1065,19 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device > > *adev) > > flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + > > 9*1)); > > flags |= AMDGPU_PDE_PTE_FLAG(adev); > > > > + if (adev->gmc.xgmi.connected_to_cpu) { > > + vram_addr = vram_addr_first; > > + vram_end = vram_addr + vram_size; > > + } > > + > > /* The first n PDE0 entries are used as PTE, > > * pointing to vram > > */ > > - for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) > > - amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, > > vram_addr, flags); > > + for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) { > > + amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, > > + (vram_addr >= vram_addr_first + vram_size) ? > > (vram_addr - vram_size) : vram_addr, > > + flags); > > + } > > > > /* The n+1'th PDE0 entry points to a huge > > * PTB who has more than 512 entries each > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > > index bd7fc123b8f9..758b47240c6f 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > > @@ -307,6 +307,7 @@ struct amdgpu_gmc { > > struct amdgpu_bo *pdb0_bo; > > /* CPU kmapped address of pdb0*/ > > void *ptr_pdb0; > > + bool enable_pdb0; > > > > /* MALL size */ > > u64 mall_size; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > index d90e9daf5a50..83a3444c69d9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > @@ -287,8 +287,14 @@ int amdgpu_bo_create_reserved(struct amdgpu_device > > *adev, > > goto error_unpin; > > } > > > > - if (gpu_addr) > > + if (gpu_addr) { > > *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr); > > + if (!adev->gmc.xgmi.connected_to_cpu && > > adev->gmc.enable_pdb0) { > > + if ((*bo_ptr)->tbo.resource->mem_type == TTM_PL_VRAM) > > { > > + *gpu_addr -= amdgpu_ttm_domain_start(adev, > > TTM_PL_VRAM); > > + } > > + } > > + } > > Please NAK to that approach here. The GPU offset should still point into the > mapped VRAM. This change is to change to the default GPU address from FB aperture type to pdb0 type in this centralized place so that I don’t need to change every callsite of amdgpu_bo_create_reserved(). Could you suggest a better approach if this approach is not acceptable? > > > > > > if (cpu_addr) { > > r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > > b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > > index cb25f7f0dfc1..5ebb92ac9fd7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > > @@ -180,7 +180,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct > > amdgpu_device *adev, > > /* In the case squeezing vram into GART aperture, we don't > > use > > * FB aperture and AGP aperture. Disable them. > > */ > > - if (adev->gmc.pdb0_bo) { > > + if (adev->gmc.pdb0_bo && !amdgpu_sriov_vf(adev)) { > > WREG32_SOC15(GC, GET_INST(GC, i), > > regMC_VM_FB_LOCATION_TOP, 0); > > WREG32_SOC15(GC, GET_INST(GC, i), > > regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); > > WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, > > 0); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > index 7c7a9fe6be6d..73ac05b9a1bf 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > @@ -1677,6 +1677,10 @@ static int gmc_v9_0_early_init(struct > > amdgpu_ip_block *ip_block) > > adev->gmc.private_aperture_start + (4ULL << 30) - 1; > > adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; > > > > + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || > > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || > > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) > > + adev->gmc.enable_pdb0 = amdgpu_sriov_vf(adev); > > return 0; > > } > > > > @@ -1719,6 +1723,14 @@ static void gmc_v9_0_vram_gtt_location(struct > > amdgpu_device *adev, > > { > > u64 base = adev->mmhub.funcs->get_fb_location(adev); > > > > + if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.enable_pdb0) { > > + adev->gmc.vmid0_page_table_depth = 1; > > + adev->gmc.vmid0_page_table_block_size = 12; > > + } else { > > + adev->gmc.vmid0_page_table_depth = 0; > > + adev->gmc.vmid0_page_table_block_size = 0; > > + } > > + > > What is the justification to moving that stuff around? vmid0_page_table_block_size is used in new code in amdgpu_gmc_sysvm_location(). See the call sequence below. gmc_v9_0_sw_init - gmc_v9_0_mc_init - gmc_v9_0_vram_gtt_location, - vmid0_page_table_block_size = 12, **new location** - amdgpu_gmc_sysvm_location - use **vmid0_page_table_block_size** - gmc_v9_0_gart_init, - assign vmid0_page_table_block_size, **old location** > > > amdgpu_gmc_set_agp_default(adev, mc); > > > > /* add the xgmi offset of the physical node */ > > @@ -1727,7 +1739,10 @@ static void gmc_v9_0_vram_gtt_location(struct > > amdgpu_device *adev, > > amdgpu_gmc_sysvm_location(adev, mc); > > } else { > > amdgpu_gmc_vram_location(adev, mc, base); > > - amdgpu_gmc_gart_location(adev, mc, > > AMDGPU_GART_PLACEMENT_BEST_FIT); > > + if (!adev->gmc.enable_pdb0) > > + amdgpu_gmc_gart_location(adev, mc, > > AMDGPU_GART_PLACEMENT_BEST_FIT); > > + else > > + amdgpu_gmc_sysvm_location(adev, mc); > > if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) > > amdgpu_gmc_agp_location(adev, mc); > > } > > @@ -1838,14 +1853,6 @@ static int gmc_v9_0_gart_init(struct amdgpu_device > > *adev) > > return 0; > > } > > > > - if (adev->gmc.xgmi.connected_to_cpu) { > > - adev->gmc.vmid0_page_table_depth = 1; > > - adev->gmc.vmid0_page_table_block_size = 12; > > - } else { > > - adev->gmc.vmid0_page_table_depth = 0; > > - adev->gmc.vmid0_page_table_block_size = 0; > > - } > > - > > /* Initialize common gart structure */ > > r = amdgpu_gart_init(adev); > > if (r) > > @@ -1864,7 +1871,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device > > *adev) > > if (r) > > return r; > > > > - if (adev->gmc.xgmi.connected_to_cpu) > > + if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.enable_pdb0) > > Drop the connected_to_cpu check here. > > > r = amdgpu_gmc_pdb0_alloc(adev); > > } > > > > @@ -2361,7 +2368,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device > > *adev) > > { > > int r; > > > > - if (adev->gmc.xgmi.connected_to_cpu) > > + if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.enable_pdb0) > > And here. > > > amdgpu_gmc_init_pdb0(adev); > > > > if (adev->gart.bo == NULL) { > > diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c > > b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c > > index fe0710b55c3a..13b229d07ac4 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c > > +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c > > @@ -74,27 +74,47 @@ static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct > > amdgpu_device *adev, int hu > > static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, > > int hubid) > > { > > - uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); > > + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ? > > adev->gmc.pdb0_bo : adev->gart.bo); > > That can be written as adev->gmc.pdb0_bo ?: adev->gart.bo > > > > > mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base); > > > > - WREG32_SOC15_OFFSET(MMHUB, 0, > > + if (adev->gmc.pdb0_bo) { > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > + > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, > > + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > + (u32)(adev->gmc.fb_start >> 12)); > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > + > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, > > + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > + (u32)(adev->gmc.fb_start >> 44)); > > + > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > + > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, > > + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > + (u32)(adev->gmc.gart_end >> 12)); > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > + > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, > > + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > + (u32)(adev->gmc.gart_end >> 44)); > > + } else { > > ++ WREG32_SOC15_OFFSET(MMHUB, 0, > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, > > hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > (u32)(adev->gmc.gart_start >> 12)); > > - WREG32_SOC15_OFFSET(MMHUB, 0, > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, > > hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > (u32)(adev->gmc.gart_start >> 44)); > > When you indent the WREG32_SOC15_OFFSET() you need to indent the following > lines as well. > > > > > - WREG32_SOC15_OFFSET(MMHUB, 0, > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, > > hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > (u32)(adev->gmc.gart_end >> 12)); > > - WREG32_SOC15_OFFSET(MMHUB, 0, > > + WREG32_SOC15_OFFSET(MMHUB, 0, > > mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, > > hubid * MMHUB_INSTANCE_REGISTER_OFFSET, > > (u32)(adev->gmc.gart_end >> 44)); > > + } > > The programming of the end addr is still the same, you don't need to change > anything here. > > Regards, > Christian. > > > } > > > > static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, > > uint32_t vmid, >