[AMD Official Use Only - AMD Internal Distribution Only] > > @@ -1210,6 +1212,9 @@ static void psp_prep_ta_load_cmd_buf(struct > > psp_gfx_cmd_resp *cmd, > > cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); > > cmd->cmd.cmd_load_ta.app_len = > > context->bin_desc.size_bytes; > > > > + if (context->mem_context.shared_bo) > > + context->mem_context.shared_mc_addr = > > amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo); > > + > > Rather put this into the psp_prep_ta_load_cmd_buf() function and remove the > shared_mc_addr member. > > Please double check other similar cases as well.
Removing of these members are easy to change and test, since they are used in one c file. I can make one refactoring patch for each removal. - ta_mem_context.shared_mc_addr - amdgpu_firmware.fw_buf_mc - psp_context.tmr_mc_addr - psp_context.cmd_buf_mc_addr - dummy_read_1_table->mc_address Removing of these members will involve changes in multiple PSP and SMU version files. For versions that MI308 don't use, I don't have the environment to test the changes. Should I remove them as well? @Koenig, Christian<mailto:christian.koe...@amd.com> - psp->fw_pri_mc_addr - psp->fence_buf_mc_addr - psp->km_ring.ring_mem_mc_addr - driver_table->mc_address - pm_status_table->mc_address Thanks Sam From: Koenig, Christian <christian.koe...@amd.com> Date: Wednesday, April 30, 2025 at 20:51 To: Zhang, GuoQing (Sam) <guoqing.zh...@amd.com>, amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org> Cc: Zhao, Victor <victor.z...@amd.com>, Chang, HaiJun <haijun.ch...@amd.com>, Deucher, Alexander <alexander.deuc...@amd.com>, Jiang Liu <ge...@linux.alibaba.com> Subject: Re: [PATCH v2 2/3] drm/amdgpu: update GPU addresses for SMU and PSP On 4/30/25 12:16, Samuel Zhang wrote: > add amdgpu_bo_fb_aper_addr() and update the cached GPU addresses to use > the FB aperture address for SMU and PSP. > > 2 reasons for this change: > 1. when pdb0 is enabled, gpu addr from amdgpu_bo_create_kernel() is GART > aperture address, it is not compatible with SMU and PSP, it need to updated > to use FB aperture address. > 2. Since FB aperture address will change after switching to new GPU > index after hibernation, it need to be updated after resume. > > Signed-off-by: Jiang Liu <ge...@linux.alibaba.com> > Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 18 ++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 22 ++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +++ > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 +++++++++++++++++ > 7 files changed, 63 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index a2abddf3c110..ef6eaddc2ccb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -209,6 +209,7 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, > struct amdgpu_gmc *mc, > uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20; > uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; > > + mc->vram_offset = base; > mc->vram_start = base; > mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; > if (limit < mc->real_vram_size) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index bd7fc123b8f9..291d96168a57 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -251,6 +251,7 @@ struct amdgpu_gmc { > */ > u64 vram_start; > u64 vram_end; > + u64 vram_offset; Please don't add any new fields here. We should already have all the necessary information in that structure. > /* FB region , it's same as local vram region in single GPU, in XGMI > * configuration, this region covers all GPUs in the same hive , > * each GPU in the hive has the same view of this FB region . > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 4e794d546b61..ca29270f66d1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -1513,6 +1513,24 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) > return amdgpu_bo_gpu_offset_no_check(bo); > } > > +/** > + * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo > + * @bo: amdgpu VRAM buffer object for which we query the offset > + * > + * Returns: > + * current FB aperture GPU offset of the object. > + */ > +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo) > +{ > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + uint64_t offset; > + > + WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM); > + > + offset = (bo->tbo.resource->start << PAGE_SHIFT) + > adev->gmc.vram_offset; Rather use fb_base + XGMI hive index here. > + return amdgpu_gmc_sign_extend(offset); > +} > + > /** > * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo > * @bo: amdgpu object for which we query the offset > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index dcce362bfad3..c8a63e38f5d9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -320,6 +320,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, > struct dma_resv *resv, > bool intr); > int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); > u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); > +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo); > u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); > uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); > uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index e1e658a97b36..bdab40b42983 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -871,6 +871,8 @@ static int psp_tmr_init(struct psp_context *psp) > &psp->tmr_bo, &psp->tmr_mc_addr, > pptr); > } > + if (psp->tmr_bo) > + psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo); > > return ret; > } > @@ -1210,6 +1212,9 @@ static void psp_prep_ta_load_cmd_buf(struct > psp_gfx_cmd_resp *cmd, > cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); > cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes; > > + if (context->mem_context.shared_bo) > + context->mem_context.shared_mc_addr = > amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo); > + Rather put this into the psp_prep_ta_load_cmd_buf() function and remove the shared_mc_addr member. Please double check other similar cases as well. Apart from that this looks rather good to me, Christian. > cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = > lower_32_bits(context->mem_context.shared_mc_addr); > cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = > @@ -2336,11 +2341,26 @@ bool amdgpu_psp_tos_reload_needed(struct > amdgpu_device *adev) > return false; > } > > +static void psp_update_gpu_addresses(struct amdgpu_device *adev) > +{ > + struct psp_context *psp = &adev->psp; > + > + if (psp->cmd_buf_bo && psp->cmd_buf_mem) { > + psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo); > + psp->fence_buf_mc_addr = > amdgpu_bo_fb_aper_addr(psp->fence_buf_bo); > + psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo); > + } > + if (adev->firmware.rbuf && psp->km_ring.ring_mem) > + psp->km_ring.ring_mem_mc_addr = > amdgpu_bo_fb_aper_addr(adev->firmware.rbuf); > +} > + > static int psp_hw_start(struct psp_context *psp) > { > struct amdgpu_device *adev = psp->adev; > int ret; > > + psp_update_gpu_addresses(adev); > + > if (!amdgpu_sriov_vf(adev)) { > if ((is_psp_fw_valid(psp->kdb)) && > (psp->funcs->bootloader_load_kdb != NULL)) { > @@ -3976,6 +3996,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device > *dev, > memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); > > mutex_lock(&adev->psp.mutex); > + fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(fw_buf_bo); > ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr); > mutex_unlock(&adev->psp.mutex); > > @@ -4085,6 +4106,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file > *filp, struct kobject *kobj, > memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, > adev->psp.vbflash_image_size); > > mutex_lock(&adev->psp.mutex); > + fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(fw_buf_bo); > ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr); > mutex_unlock(&adev->psp.mutex); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > index 3d9e9fdc10b4..f3b56c219e7e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > @@ -1152,6 +1152,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) > adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; > } > > + if (adev->firmware.fw_buf) > + adev->firmware.fw_buf_mc = > amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf); > + > for (i = 0; i < adev->firmware.max_ucodes; i++) { > ucode = &adev->firmware.ucode[i]; > if (ucode->fw) { > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index 315b0856bf02..dfdda98cf0c5 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -1000,6 +1000,21 @@ static int smu_fini_fb_allocations(struct smu_context > *smu) > return 0; > } > > +static void smu_update_gpu_addresses(struct smu_context *smu) > +{ > + struct smu_table_context *smu_table = &smu->smu_table; > + struct smu_table *pm_status_table = smu_table->tables + > SMU_TABLE_PMSTATUSLOG; > + struct smu_table *driver_table = &(smu_table->driver_table); > + struct smu_table *dummy_read_1_table = &smu_table->dummy_read_1_table; > + > + if (pm_status_table->bo) > + pm_status_table->mc_address = > amdgpu_bo_fb_aper_addr(pm_status_table->bo); > + if (driver_table->bo) > + driver_table->mc_address = > amdgpu_bo_fb_aper_addr(driver_table->bo); > + if (dummy_read_1_table->bo) > + dummy_read_1_table->mc_address = > amdgpu_bo_fb_aper_addr(dummy_read_1_table->bo); > +} > + > /** > * smu_alloc_memory_pool - allocate memory pool in the system memory > * > @@ -1789,6 +1804,8 @@ static int smu_start_smc_engine(struct smu_context *smu) > struct amdgpu_device *adev = smu->adev; > int ret = 0; > > + smu_update_gpu_addresses(smu); > + > smu->smc_fw_state = SMU_FW_INIT; > > if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {