On 5/28/25 09:46, Lazar, Lijo wrote:
> On 5/22/2025 4:10 PM, Samuel Zhang wrote:
>> When switching to new GPU index after hibernation and then resume,
>> VRAM offset of each VRAM BO will be changed, and the cached gpu
>> addresses needed to updated.
>>
>> This is to enable pdb0 and switch to use pdb0-based virtual gpu
>> address by default in amdgpu_bo_create_reserved(). since the virtual
>> addresses do not change, this can avoid the need to update all
>> cached gpu addresses all over the codebase.
>>
>> Signed-off-by: Emily Deng <emily.d...@amd.com>
>> Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 28 ++++++++++++++++++------
>>  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c |  8 ++++---
>>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 13 +++++++----
>>  drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c  |  6 +++--
>>  4 files changed, 39 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> index d1fa5e8e3937..74b565283aa9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> @@ -38,6 +38,8 @@
>>  #include <drm/drm_drv.h>
>>  #include <drm/ttm/ttm_tt.h>
>>  
>> +static const u64 four_gb = 0x100000000ULL;
>> +
>>  /**
>>   * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
>>   *
>> @@ -251,10 +253,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device 
>> *adev, struct amdgpu_gmc *mc
>>      u64 hive_vram_end = mc->xgmi.node_segment_size * 
>> mc->xgmi.num_physical_nodes - 1;
>>      mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
>>      mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
>> -    mc->gart_start = hive_vram_end + 1;
>> +    /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. 
>> */
>> +    mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
>>      mc->gart_end = mc->gart_start + mc->gart_size - 1;
>> -    mc->fb_start = hive_vram_start;
>> -    mc->fb_end = hive_vram_end;
>> +    if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
>> +            /* set mc->vram_start to 0 to switch the returned GPU address of
>> +             * amdgpu_bo_create_reserved() from FB aperture to GART 
>> aperture.
>> +             */
>> +            mc->vram_start = 0;
>> +            mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
>> +            mc->visible_vram_size = min(mc->visible_vram_size, 
>> mc->real_vram_size);
>> +    } else {
>> +            mc->fb_start = hive_vram_start;
>> +            mc->fb_end = hive_vram_end;
>> +    }
>>      dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
>>                      mc->mc_vram_size >> 20, mc->vram_start,
>>                      mc->vram_end, mc->real_vram_size >> 20);
>> @@ -276,7 +288,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device 
>> *adev, struct amdgpu_gmc *mc
>>  void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc 
>> *mc,
>>                            enum amdgpu_gart_placement gart_placement)
>>  {
>> -    const uint64_t four_gb = 0x100000000ULL;
>>      u64 size_af, size_bf;
>>      /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
>>      u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
>> @@ -1053,9 +1064,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>>       */
>>      u64 vram_size = adev->gmc.xgmi.node_segment_size * 
>> adev->gmc.xgmi.num_physical_nodes;
>>      u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
>> -    u64 vram_addr = adev->vm_manager.vram_base_offset -
>> -            adev->gmc.xgmi.physical_node_id * 
>> adev->gmc.xgmi.node_segment_size;
>> -    u64 vram_end = vram_addr + vram_size;
>> +    u64 vram_addr, vram_end;
>>      u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
>>      int idx;
>>  
>> @@ -1068,6 +1077,11 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>>      flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
>>      flags |= AMDGPU_PDE_PTE_FLAG(adev);
>>  
>> +    vram_addr = adev->vm_manager.vram_base_offset;
>> +    if (!amdgpu_virt_xgmi_migrate_enabled(adev))
>> +            vram_addr -= adev->gmc.xgmi.physical_node_id * 
>> adev->gmc.xgmi.node_segment_size;
>> +    vram_end = vram_addr + vram_size;
>> +
>>      /* The first n PDE0 entries are used as PTE,
>>       * pointing to vram
>>       */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
>> index cb25f7f0dfc1..32a3987bef80 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
>> @@ -76,6 +76,8 @@ static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct 
>> amdgpu_device *adev,
>>  {
>>      uint64_t pt_base;
>>      int i;
>> +    uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
>> +                    adev->gmc.vram_start : adev->gmc.fb_start;
>>  
>>      if (adev->gmc.pdb0_bo)
>>              pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
>> @@ -91,10 +93,10 @@ static void 
>> gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
>>              if (adev->gmc.pdb0_bo) {
>>                      WREG32_SOC15(GC, GET_INST(GC, i),
>>                                   regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
>> -                                 (u32)(adev->gmc.fb_start >> 12));
>> +                                 (u32)(gart_start >> 12));
>>                      WREG32_SOC15(GC, GET_INST(GC, i),
>>                                   regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
>> -                                 (u32)(adev->gmc.fb_start >> 44));
>> +                                 (u32)(gart_start >> 44));
>>  
>>                      WREG32_SOC15(GC, GET_INST(GC, i),
>>                                   regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
>> @@ -180,7 +182,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct 
>> amdgpu_device *adev,
>>              /* In the case squeezing vram into GART aperture, we don't use
>>               * FB aperture and AGP aperture. Disable them.
>>               */
>> -            if (adev->gmc.pdb0_bo) {
>> +            if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) {
>>                      WREG32_SOC15(GC, GET_INST(GC, i), 
>> regMC_VM_FB_LOCATION_TOP, 0);
>>                      WREG32_SOC15(GC, GET_INST(GC, i), 
>> regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
>>                      WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 59385da80185..3003aa5a53e2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -413,6 +413,11 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = 
>> {
>>      (0x001d43e0 + 0x00001800),
>>  };
>>  
>> +static inline bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev)
>> +{
>> +    return adev->gmc.xgmi.connected_to_cpu || 
>> amdgpu_virt_xgmi_migrate_enabled(adev);
>> +}
>> +
> 
> What I meant is - since this is generic logic you could keep it as
> amdgpu_gmc_* function and maintain as inline in amdgpu_gmc.h

Yeah agrre.

One more little style nit pick below.

> 
> Thanks,
> Lijo
> 
>>  static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev)
>>  {
>>      return !!adev->aid_mask;
>> @@ -1726,7 +1731,7 @@ static void gmc_v9_0_vram_gtt_location(struct 
>> amdgpu_device *adev,
>>  
>>      /* add the xgmi offset of the physical node */
>>      base += adev->gmc.xgmi.physical_node_id * 
>> adev->gmc.xgmi.node_segment_size;
>> -    if (adev->gmc.xgmi.connected_to_cpu) {
>> +    if (amdgpu_gmc_is_pdb0_enabled(adev)) {
>>              amdgpu_gmc_sysvm_location(adev, mc);
>>      } else {
>>              amdgpu_gmc_vram_location(adev, mc, base);
>> @@ -1841,7 +1846,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device 
>> *adev)
>>              return 0;
>>      }
>>  
>> -    if (adev->gmc.xgmi.connected_to_cpu) {
>> +    if (amdgpu_gmc_is_pdb0_enabled(adev)) {
>>              adev->gmc.vmid0_page_table_depth = 1;
>>              adev->gmc.vmid0_page_table_block_size = 12;
>>      } else {
>> @@ -1867,7 +1872,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device 
>> *adev)
>>              if (r)
>>                      return r;
>>  
>> -            if (adev->gmc.xgmi.connected_to_cpu)
>> +            if (amdgpu_gmc_is_pdb0_enabled(adev))
>>                      r = amdgpu_gmc_pdb0_alloc(adev);
>>      }
>>  
>> @@ -2372,7 +2377,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
>> *adev)
>>  {
>>      int r;
>>  
>> -    if (adev->gmc.xgmi.connected_to_cpu)
>> +    if (amdgpu_gmc_is_pdb0_enabled(adev))
>>              amdgpu_gmc_init_pdb0(adev);
>>  
>>      if (adev->gart.bo == NULL) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c 
>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
>> index 84cde1239ee4..2e3d0db9bc24 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
>> @@ -78,6 +78,8 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct 
>> amdgpu_device *adev)
>>      uint64_t pt_base;
>>      u32 inst_mask;
>>      int i;
>> +    uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
>> +                    adev->gmc.vram_start : adev->gmc.fb_start;

Please keep long lines early in the decleration and especially thing like "i", 
"r" or "ret" last.

Apart from that looks good to me.

Regards,
Christian.

>>  
>>      if (adev->gmc.pdb0_bo)
>>              pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
>> @@ -94,10 +96,10 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct 
>> amdgpu_device *adev)
>>              if (adev->gmc.pdb0_bo) {
>>                      WREG32_SOC15(MMHUB, i,
>>                                   regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
>> -                                 (u32)(adev->gmc.fb_start >> 12));
>> +                                 (u32)(gart_start >> 12));
>>                      WREG32_SOC15(MMHUB, i,
>>                                   regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
>> -                                 (u32)(adev->gmc.fb_start >> 44));
>> +                                 (u32)(gart_start >> 44));
>>  
>>                      WREG32_SOC15(MMHUB, i,
>>                                   regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
> 

Reply via email to