On 4/30/25 12:16, Samuel Zhang wrote: > When switching to new GPU index after hibernation and then resume, > VRAM offset of each VRAM BO will be changed, and the cached gpu > addresses needed to updated. > > This is to enable pdb0 and switch to use pdb0-based virtual gpu > address by default in amdgpu_bo_create_reserved(). since the virtual > addresses do not change, this can avoid the need to update all > cached gpu addresses all over the codebase. > > Signed-off-by: Emily Deng <emily.d...@amd.com> > Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 42 ++++++++++++++++++------ > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 2 +- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++++++--- > 4 files changed, 46 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index ef6eaddc2ccb..3b3f9843ef7a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -38,6 +38,8 @@ > #include <drm/drm_drv.h> > #include <drm/ttm/ttm_tt.h> > > +static const u64 four_gb = 0x100000000ULL; > + > /** > * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 > * > @@ -250,15 +252,26 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device > *adev, struct amdgpu_gmc *mc > { > u64 hive_vram_start = 0; > u64 hive_vram_end = mc->xgmi.node_segment_size * > mc->xgmi.num_physical_nodes - 1; > - mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id; > - mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; > + > + if (adev->gmc.xgmi.connected_to_cpu) { > + mc->vram_start = mc->xgmi.node_segment_size * > mc->xgmi.physical_node_id; > + mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; > + dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM > used)\n", > + mc->mc_vram_size >> 20, mc->vram_start, > + mc->vram_end, mc->real_vram_size >> 20); > + } else { > + /* reset vram_start to 0 to switch the returned GPU address of > + * amdgpu_bo_create_reserved() from FB aperture to GART > aperture. > + */ > + mc->vram_start = 0;
You need to setup mc->vram_end here as well. > + hive_vram_end = ALIGN(hive_vram_end + 1, four_gb) - 1; Don't touch hive_vram_end here. When this isn't 4GiB aligned we have a problem with node_segment_size. > + } > + > mc->gart_start = hive_vram_end + 1; > mc->gart_end = mc->gart_start + mc->gart_size - 1; > mc->fb_start = hive_vram_start; > mc->fb_end = hive_vram_end; > - dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", > - mc->mc_vram_size >> 20, mc->vram_start, > - mc->vram_end, mc->real_vram_size >> 20); Please keep that print here > + > dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", > mc->gart_size >> 20, mc->gart_start, mc->gart_end); > } > @@ -277,7 +290,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device > *adev, struct amdgpu_gmc *mc > void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc > *mc, > enum amdgpu_gart_placement gart_placement) > { > - const uint64_t four_gb = 0x100000000ULL; > u64 size_af, size_bf; > /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ > u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); > @@ -678,8 +690,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, > uint32_t vmid, > &job); > if (r) > goto error_alloc; > - > - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); > + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?: > adev->gart.bo); Move that change into a separate patch. > job->vm_needs_flush = true; > job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; > amdgpu_ring_pad_ib(ring, &job->ibs[0]); > @@ -1057,6 +1068,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) > u64 vram_addr = adev->vm_manager.vram_base_offset - > adev->gmc.xgmi.physical_node_id * > adev->gmc.xgmi.node_segment_size; > u64 vram_end = vram_addr + vram_size; > + u64 vram_last = vram_end, vram_pa; > u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); > int idx; > > @@ -1069,11 +1081,21 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) > flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); > flags |= AMDGPU_PDE_PTE_FLAG(adev); > > + if (!adev->gmc.xgmi.connected_to_cpu) { > + /* always start from current device so that the GART address > can keep > + * consistent when hibernate-resume with different GPUs. > + */ > + vram_addr = adev->vm_manager.vram_base_offset; > + vram_end = vram_addr + vram_size; > + } > + > /* The first n PDE0 entries are used as PTE, > * pointing to vram > */ > - for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) > - amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, > flags); > + for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) { > + vram_pa = (vram_addr >= vram_last) ? (vram_addr - vram_size) : > vram_addr; > + amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_pa, > flags); > + } Hui? That doesn't seem to make any sense. > > /* The n+1'th PDE0 entry points to a huge > * PTB who has more than 512 entries each > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index 291d96168a57..778c7506bb2d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -308,6 +308,7 @@ struct amdgpu_gmc { > struct amdgpu_bo *pdb0_bo; > /* CPU kmapped address of pdb0*/ > void *ptr_pdb0; > + bool enable_pdb0; > > /* MALL size */ > u64 mall_size; > diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > index cb25f7f0dfc1..5ebb92ac9fd7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c > @@ -180,7 +180,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct > amdgpu_device *adev, > /* In the case squeezing vram into GART aperture, we don't use > * FB aperture and AGP aperture. Disable them. > */ > - if (adev->gmc.pdb0_bo) { > + if (adev->gmc.pdb0_bo && !amdgpu_sriov_vf(adev)) { > WREG32_SOC15(GC, GET_INST(GC, i), > regMC_VM_FB_LOCATION_TOP, 0); > WREG32_SOC15(GC, GET_INST(GC, i), > regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); > WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 3c950c75dea1..42c38848763b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -1682,6 +1682,11 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block > *ip_block) > adev->gmc.private_aperture_start + (4ULL << 30) - 1; > adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; > > + adev->gmc.enable_pdb0 = adev->gmc.xgmi.connected_to_cpu; > + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) > + adev->gmc.enable_pdb0 = amdgpu_sriov_vf(adev); > return 0; > } > > @@ -1730,7 +1735,10 @@ static void gmc_v9_0_vram_gtt_location(struct > amdgpu_device *adev, > amdgpu_gmc_sysvm_location(adev, mc); > } else { > amdgpu_gmc_vram_location(adev, mc, base); > - amdgpu_gmc_gart_location(adev, mc, > AMDGPU_GART_PLACEMENT_BEST_FIT); > + if (adev->gmc.enable_pdb0) > + amdgpu_gmc_sysvm_location(adev, mc); > + else > + amdgpu_gmc_gart_location(adev, mc, > AMDGPU_GART_PLACEMENT_BEST_FIT); That is nonsense. You need to adjust the if above and not here. The amdgpu_gmc_vram_location() function should never be called when PDB0 is in use. Regards, Christian. > if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) > amdgpu_gmc_agp_location(adev, mc); > } > @@ -1841,7 +1849,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device > *adev) > return 0; > } > > - if (adev->gmc.xgmi.connected_to_cpu) { > + if (adev->gmc.enable_pdb0) { > adev->gmc.vmid0_page_table_depth = 1; > adev->gmc.vmid0_page_table_block_size = 12; > } else { > @@ -1867,7 +1875,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device > *adev) > if (r) > return r; > > - if (adev->gmc.xgmi.connected_to_cpu) > + if (adev->gmc.enable_pdb0) > r = amdgpu_gmc_pdb0_alloc(adev); > } > > @@ -2372,7 +2380,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device > *adev) > { > int r; > > - if (adev->gmc.xgmi.connected_to_cpu) > + if (adev->gmc.enable_pdb0) > amdgpu_gmc_init_pdb0(adev); > > if (adev->gart.bo == NULL) {