On 5/28/25 09:46, Lazar, Lijo wrote: > On 5/22/2025 4:10 PM, Samuel Zhang wrote: >> When switching to new GPU index after hibernation and then resume, >> VRAM offset of each VRAM BO will be changed, and the cached gpu >> addresses needed to updated. >> >> This is to enable pdb0 and switch to use pdb0-based virtual gpu >> address by default in amdgpu_bo_create_reserved(). since the virtual >> addresses do not change, this can avoid the need to update all >> cached gpu addresses all over the codebase. >> >> Signed-off-by: Emily Deng <emily.d...@amd.com> >> Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 28 ++++++++++++++++++------ >> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 8 ++++--- >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 +++++++---- >> drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 6 +++-- >> 4 files changed, 39 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> index d1fa5e8e3937..74b565283aa9 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> @@ -38,6 +38,8 @@ >> #include <drm/drm_drv.h> >> #include <drm/ttm/ttm_tt.h> >> >> +static const u64 four_gb = 0x100000000ULL; >> + >> /** >> * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 >> * >> @@ -251,10 +253,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device >> *adev, struct amdgpu_gmc *mc >> u64 hive_vram_end = mc->xgmi.node_segment_size * >> mc->xgmi.num_physical_nodes - 1; >> mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id; >> mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; >> - mc->gart_start = hive_vram_end + 1; >> + /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. >> */ >> + mc->gart_start = ALIGN(hive_vram_end + 1, four_gb); >> mc->gart_end = mc->gart_start + mc->gart_size - 1; >> - mc->fb_start = hive_vram_start; >> - mc->fb_end = hive_vram_end; >> + if (amdgpu_virt_xgmi_migrate_enabled(adev)) { >> + /* set mc->vram_start to 0 to switch the returned GPU address of >> + * amdgpu_bo_create_reserved() from FB aperture to GART >> aperture. >> + */ >> + mc->vram_start = 0; >> + mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; >> + mc->visible_vram_size = min(mc->visible_vram_size, >> mc->real_vram_size); >> + } else { >> + mc->fb_start = hive_vram_start; >> + mc->fb_end = hive_vram_end; >> + } >> dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", >> mc->mc_vram_size >> 20, mc->vram_start, >> mc->vram_end, mc->real_vram_size >> 20); >> @@ -276,7 +288,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device >> *adev, struct amdgpu_gmc *mc >> void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc >> *mc, >> enum amdgpu_gart_placement gart_placement) >> { >> - const uint64_t four_gb = 0x100000000ULL; >> u64 size_af, size_bf; >> /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ >> u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); >> @@ -1053,9 +1064,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) >> */ >> u64 vram_size = adev->gmc.xgmi.node_segment_size * >> adev->gmc.xgmi.num_physical_nodes; >> u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21; >> - u64 vram_addr = adev->vm_manager.vram_base_offset - >> - adev->gmc.xgmi.physical_node_id * >> adev->gmc.xgmi.node_segment_size; >> - u64 vram_end = vram_addr + vram_size; >> + u64 vram_addr, vram_end; >> u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); >> int idx; >> >> @@ -1068,6 +1077,11 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) >> flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); >> flags |= AMDGPU_PDE_PTE_FLAG(adev); >> >> + vram_addr = adev->vm_manager.vram_base_offset; >> + if (!amdgpu_virt_xgmi_migrate_enabled(adev)) >> + vram_addr -= adev->gmc.xgmi.physical_node_id * >> adev->gmc.xgmi.node_segment_size; >> + vram_end = vram_addr + vram_size; >> + >> /* The first n PDE0 entries are used as PTE, >> * pointing to vram >> */ >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c >> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c >> index cb25f7f0dfc1..32a3987bef80 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c >> @@ -76,6 +76,8 @@ static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct >> amdgpu_device *adev, >> { >> uint64_t pt_base; >> int i; >> + uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ? >> + adev->gmc.vram_start : adev->gmc.fb_start; >> >> if (adev->gmc.pdb0_bo) >> pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); >> @@ -91,10 +93,10 @@ static void >> gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, >> if (adev->gmc.pdb0_bo) { >> WREG32_SOC15(GC, GET_INST(GC, i), >> regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, >> - (u32)(adev->gmc.fb_start >> 12)); >> + (u32)(gart_start >> 12)); >> WREG32_SOC15(GC, GET_INST(GC, i), >> regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, >> - (u32)(adev->gmc.fb_start >> 44)); >> + (u32)(gart_start >> 44)); >> >> WREG32_SOC15(GC, GET_INST(GC, i), >> regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, >> @@ -180,7 +182,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct >> amdgpu_device *adev, >> /* In the case squeezing vram into GART aperture, we don't use >> * FB aperture and AGP aperture. Disable them. >> */ >> - if (adev->gmc.pdb0_bo) { >> + if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) { >> WREG32_SOC15(GC, GET_INST(GC, i), >> regMC_VM_FB_LOCATION_TOP, 0); >> WREG32_SOC15(GC, GET_INST(GC, i), >> regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); >> WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index 59385da80185..3003aa5a53e2 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -413,6 +413,11 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = >> { >> (0x001d43e0 + 0x00001800), >> }; >> >> +static inline bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev) >> +{ >> + return adev->gmc.xgmi.connected_to_cpu || >> amdgpu_virt_xgmi_migrate_enabled(adev); >> +} >> + > > What I meant is - since this is generic logic you could keep it as > amdgpu_gmc_* function and maintain as inline in amdgpu_gmc.h
Yeah agrre. One more little style nit pick below. > > Thanks, > Lijo > >> static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev) >> { >> return !!adev->aid_mask; >> @@ -1726,7 +1731,7 @@ static void gmc_v9_0_vram_gtt_location(struct >> amdgpu_device *adev, >> >> /* add the xgmi offset of the physical node */ >> base += adev->gmc.xgmi.physical_node_id * >> adev->gmc.xgmi.node_segment_size; >> - if (adev->gmc.xgmi.connected_to_cpu) { >> + if (amdgpu_gmc_is_pdb0_enabled(adev)) { >> amdgpu_gmc_sysvm_location(adev, mc); >> } else { >> amdgpu_gmc_vram_location(adev, mc, base); >> @@ -1841,7 +1846,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device >> *adev) >> return 0; >> } >> >> - if (adev->gmc.xgmi.connected_to_cpu) { >> + if (amdgpu_gmc_is_pdb0_enabled(adev)) { >> adev->gmc.vmid0_page_table_depth = 1; >> adev->gmc.vmid0_page_table_block_size = 12; >> } else { >> @@ -1867,7 +1872,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device >> *adev) >> if (r) >> return r; >> >> - if (adev->gmc.xgmi.connected_to_cpu) >> + if (amdgpu_gmc_is_pdb0_enabled(adev)) >> r = amdgpu_gmc_pdb0_alloc(adev); >> } >> >> @@ -2372,7 +2377,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device >> *adev) >> { >> int r; >> >> - if (adev->gmc.xgmi.connected_to_cpu) >> + if (amdgpu_gmc_is_pdb0_enabled(adev)) >> amdgpu_gmc_init_pdb0(adev); >> >> if (adev->gart.bo == NULL) { >> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c >> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c >> index 84cde1239ee4..2e3d0db9bc24 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c >> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c >> @@ -78,6 +78,8 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct >> amdgpu_device *adev) >> uint64_t pt_base; >> u32 inst_mask; >> int i; >> + uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ? >> + adev->gmc.vram_start : adev->gmc.fb_start; Please keep long lines early in the decleration and especially thing like "i", "r" or "ret" last. Apart from that looks good to me. Regards, Christian. >> >> if (adev->gmc.pdb0_bo) >> pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); >> @@ -94,10 +96,10 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct >> amdgpu_device *adev) >> if (adev->gmc.pdb0_bo) { >> WREG32_SOC15(MMHUB, i, >> regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, >> - (u32)(adev->gmc.fb_start >> 12)); >> + (u32)(gart_start >> 12)); >> WREG32_SOC15(MMHUB, i, >> regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, >> - (u32)(adev->gmc.fb_start >> 44)); >> + (u32)(gart_start >> 44)); >> >> WREG32_SOC15(MMHUB, i, >> regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, >