On 5/6/25 10:49, Zhang, GuoQing (Sam) wrote: > [AMD Official Use Only - AMD Internal Distribution Only] > > >> > @@ -1210,6 +1212,9 @@ static void psp_prep_ta_load_cmd_buf(struct >> > psp_gfx_cmd_resp *cmd, > >> > cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); > >> > cmd->cmd.cmd_load_ta.app_len = >> >context->bin_desc.size_bytes; > >> > > >> > + if (context->mem_context.shared_bo) > >> > + context->mem_context.shared_mc_addr = >> > amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo); > >> > + > >> > >> Rather put this into the psp_prep_ta_load_cmd_buf() function and remove the >> shared_mc_addr member. > >> > >> Please double check other similar cases as well. > > > > Removing of these members are easy to change and test, since they are used in > one c file. I can make one refactoring patch for each removal. > > - ta_mem_context.shared_mc_addr > > - amdgpu_firmware.fw_buf_mc > > - psp_context.tmr_mc_addr > > - psp_context.cmd_buf_mc_addr > > - dummy_read_1_table->mc_address > > > > Removing of these members will involve changes in multiple PSP and SMU > version files. For versions that MI308 don't use, I don't have the > environment to test the changes. Should I remove them as well? @Koenig, > Christian <mailto:christian.koe...@amd.com>
Keep them as they are for now. Such cleanups can come later. Regards, Christian. > > - psp->fw_pri_mc_addr > > - psp->fence_buf_mc_addr > > - psp->km_ring.ring_mem_mc_addr > > - driver_table->mc_address > > - pm_status_table->mc_address > > > > Thanks > > Sam > > > > *From: *Koenig, Christian <christian.koe...@amd.com> > *Date: *Wednesday, April 30, 2025 at 20:51 > *To: *Zhang, GuoQing (Sam) <guoqing.zh...@amd.com>, > amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org> > *Cc: *Zhao, Victor <victor.z...@amd.com>, Chang, HaiJun > <haijun.ch...@amd.com>, Deucher, Alexander <alexander.deuc...@amd.com>, Jiang > Liu <ge...@linux.alibaba.com> > *Subject: *Re: [PATCH v2 2/3] drm/amdgpu: update GPU addresses for SMU and PSP > > On 4/30/25 12:16, Samuel Zhang wrote: >> add amdgpu_bo_fb_aper_addr() and update the cached GPU addresses to use >> the FB aperture address for SMU and PSP. >> >> 2 reasons for this change: >> 1. when pdb0 is enabled, gpu addr from amdgpu_bo_create_kernel() is GART >> aperture address, it is not compatible with SMU and PSP, it need to updated >> to use FB aperture address. >> 2. Since FB aperture address will change after switching to new GPU >> index after hibernation, it need to be updated after resume. >> >> Signed-off-by: Jiang Liu <ge...@linux.alibaba.com> >> Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 18 ++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 22 ++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +++ >> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 +++++++++++++++++ >> 7 files changed, 63 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> index a2abddf3c110..ef6eaddc2ccb 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >> @@ -209,6 +209,7 @@ void amdgpu_gmc_vram_location(struct amdgpu_device >> *adev, struct amdgpu_gmc *mc, >> uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20; >> uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; >> >> + mc->vram_offset = base; >> mc->vram_start = base; >> mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; >> if (limit < mc->real_vram_size) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> index bd7fc123b8f9..291d96168a57 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> @@ -251,6 +251,7 @@ struct amdgpu_gmc { >> */ >> u64 vram_start; >> u64 vram_end; >> + u64 vram_offset; > > > Please don't add any new fields here. We should already have all the > necessary information in that structure. > > >> /* FB region , it's same as local vram region in single GPU, in XGMI >> * configuration, this region covers all GPUs in the same hive , >> * each GPU in the hive has the same view of this FB region . >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> index 4e794d546b61..ca29270f66d1 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> @@ -1513,6 +1513,24 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) >> return amdgpu_bo_gpu_offset_no_check(bo); >> } >> >> +/** >> + * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo >> + * @bo: amdgpu VRAM buffer object for which we query the offset >> + * >> + * Returns: >> + * current FB aperture GPU offset of the object. >> + */ >> +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo) >> +{ >> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >> + uint64_t offset; >> + >> + WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM); >> + >> + offset = (bo->tbo.resource->start << PAGE_SHIFT) + >> adev->gmc.vram_offset; > > Rather use fb_base + XGMI hive index here. > >> + return amdgpu_gmc_sign_extend(offset); >> +} >> + >> /** >> * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo >> * @bo: amdgpu object for which we query the offset >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> index dcce362bfad3..c8a63e38f5d9 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> @@ -320,6 +320,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, >> struct dma_resv *resv, >> bool intr); >> int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); >> u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); >> +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo); >> u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); >> uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); >> uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c >> index e1e658a97b36..bdab40b42983 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c >> @@ -871,6 +871,8 @@ static int psp_tmr_init(struct psp_context *psp) >> &psp->tmr_bo, &psp->tmr_mc_addr, >> pptr); >> } >> + if (psp->tmr_bo) >> + psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo); >> >> return ret; >> } >> @@ -1210,6 +1212,9 @@ static void psp_prep_ta_load_cmd_buf(struct >> psp_gfx_cmd_resp *cmd, >> cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); >> cmd->cmd.cmd_load_ta.app_len = >>context->bin_desc.size_bytes; >> >> + if (context->mem_context.shared_bo) >> + context->mem_context.shared_mc_addr = >> amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo); >> + > > Rather put this into the psp_prep_ta_load_cmd_buf() function and remove the > shared_mc_addr member. > > Please double check other similar cases as well. > > Apart from that this looks rather good to me, > Christian. > > >> cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = >> lower_32_bits(context->mem_context.shared_mc_addr); >> cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = >> @@ -2336,11 +2341,26 @@ bool amdgpu_psp_tos_reload_needed(struct >> amdgpu_device *adev) >> return false; >> } >> >> +static void psp_update_gpu_addresses(struct amdgpu_device *adev) >> +{ >> + struct psp_context *psp = &adev->psp; >> + >> + if (psp->cmd_buf_bo && psp->cmd_buf_mem) { >> + psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo); >> + psp->fence_buf_mc_addr = >> amdgpu_bo_fb_aper_addr(psp->fence_buf_bo); >> + psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo); >> + } >> + if (adev->firmware.rbuf && psp->km_ring.ring_mem) >> + psp->km_ring.ring_mem_mc_addr = >> amdgpu_bo_fb_aper_addr(adev->firmware.rbuf); >> +} >> + >> static int psp_hw_start(struct psp_context *psp) >> { >> struct amdgpu_device *adev = psp->adev; >> int ret; >> >> + psp_update_gpu_addresses(adev); >> + >> if (!amdgpu_sriov_vf(adev)) { >> if ((is_psp_fw_valid(psp->kdb)) && >> (psp->funcs->bootloader_load_kdb != NULL)) { >> @@ -3976,6 +3996,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct >> device *dev, >> memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); >> >> mutex_lock(&adev->psp.mutex); >> + fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(fw_buf_bo); >> ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr); >> mutex_unlock(&adev->psp.mutex); >> >> @@ -4085,6 +4106,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file >> *filp, struct kobject *kobj, >> memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, >>adev->psp.vbflash_image_size); >> >> mutex_lock(&adev->psp.mutex); >> + fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(fw_buf_bo); >> ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr); >> mutex_unlock(&adev->psp.mutex); >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c >> index 3d9e9fdc10b4..f3b56c219e7e 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c >> @@ -1152,6 +1152,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) >> adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; >> } >> >> + if (adev->firmware.fw_buf) >> + adev->firmware.fw_buf_mc = >> amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf); >> + >> for (i = 0; i < adev->firmware.max_ucodes; i++) { >> ucode = &adev->firmware.ucode[i]; >> if (ucode->fw) { >> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c >> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c >> index 315b0856bf02..dfdda98cf0c5 100644 >> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c >> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c >> @@ -1000,6 +1000,21 @@ static int smu_fini_fb_allocations(struct smu_context >> *smu) >> return 0; >> } >> >> +static void smu_update_gpu_addresses(struct smu_context *smu) >> +{ >> + struct smu_table_context *smu_table = &smu->smu_table; >> + struct smu_table *pm_status_table = smu_table->tables + >> SMU_TABLE_PMSTATUSLOG; >> + struct smu_table *driver_table = &(smu_table->driver_table); >> + struct smu_table *dummy_read_1_table = &smu_table->dummy_read_1_table; >> + >> + if (pm_status_table->bo) >> + pm_status_table->mc_address = >> amdgpu_bo_fb_aper_addr(pm_status_table->bo); >> + if (driver_table->bo) >> + driver_table->mc_address = >> amdgpu_bo_fb_aper_addr(driver_table->bo); >> + if (dummy_read_1_table->bo) >> + dummy_read_1_table->mc_address = >> amdgpu_bo_fb_aper_addr(dummy_read_1_table->bo); >> +} >> + >> /** >> * smu_alloc_memory_pool - allocate memory pool in the system memory >> * >> @@ -1789,6 +1804,8 @@ static int smu_start_smc_engine(struct smu_context >> *smu) >> struct amdgpu_device *adev = smu->adev; >> int ret = 0; >> >> + smu_update_gpu_addresses(smu); >> + >> smu->smc_fw_state = SMU_FW_INIT; >> >> if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { >