Re: [PATCH 1/5 V2] drm/amdgpu: Add sysfs interface for gc reset mask
On 10/23/2024 8:13 AM, jesse.zh...@amd.com wrote: > Add two sysfs interfaces for gfx and compute: > gfx_reset_mask > compute_reset_mask > > These interfaces are read-only and show the resets supported by the IP. > For example, full adapter reset (mode1/mode2/BACO/etc), > soft reset, queue reset, and pipe reset. > > V2: the sysfs node returns a text string instead of some flags (Christian) > > Signed-off-by: Jesse Zhang > Suggested-by:Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 122 > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++ > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 5 + > drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 5 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 5 + > 7 files changed, 150 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index e96984c53e72..10d55755ee88 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -1588,6 +1588,94 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct > device *dev, > return count; > } > > +static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct drm_device *ddev = dev_get_drvdata(dev); > + struct amdgpu_device *adev = drm_to_adev(ddev); > + ssize_t size = 0; > + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; > + > + if (!adev || !ring) > + return -ENODEV; > + > + if (amdgpu_device_should_recover_gpu(adev)) > + size += sysfs_emit_at(buf, size, "full "); > + > + if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery) > + && !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery) > + size += sysfs_emit_at(buf, size, "soft "); > + If amdgpu_gpu_recovery is disabled, then that check may be made before creating the sysfs file itself. It doesn't have to be here. > + if (amdgpu_gpu_recovery && ring->funcs->reset) { > +switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { > +case IP_VERSION(9, 2, 2): //reven2 > +case IP_VERSION(9, 3, 0): //renior > +case IP_VERSION(9, 4, 0): //vega20 > +case IP_VERSION(10, 1, 0): //navi10 > +case IP_VERSION(10, 1, 1): //navi12 > +case IP_VERSION(10, 1, 2): //navi13 > +/* Skip flag setting because some cases > + * are not supported by current firmware. > + */ > +break; > + > +default: > + size += sysfs_emit_at(buf, size, "queue "); > +break; > + } > +} This kind of version check is not good. Instead initialize supported_reset_types in IP version files. As in the compute example below, sometimes it requires FW support/other checks also, not just the existence of callback implementation. This function may just iterate over the type mask to print the text version. Thanks, Lijo > + > + size += sysfs_emit_at(buf, size, "\n"); > + return size; > +} > + > +static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct drm_device *ddev = dev_get_drvdata(dev); > + struct amdgpu_device *adev = drm_to_adev(ddev); > + ssize_t size = 0; > + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; > + > + if (!adev || !ring) > + return -ENODEV; > + > + if (amdgpu_device_should_recover_gpu(adev)) > + size += sysfs_emit_at(buf, size, "full "); > + > + if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery) > + && !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery) > + size += sysfs_emit_at(buf, size, "soft "); > + > + if (amdgpu_gpu_recovery && ring->funcs->reset) { > +switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { > +case IP_VERSION(9, 2, 2): //reven2 > +case IP_VERSION(9, 3, 0): //renior > +case IP_VERSION(9, 4, 0): //vega20 > +case IP_VERSION(10, 1, 0): //navi10 > +case IP_VERSION(10, 1, 1): //navi12 > +case IP_VERSION(10, 1, 2): //navi13 > +/* Skip flag setting because some test cases > + * are not supported by current firmware. > + */ > +break; > + > +default: > + size += sysfs_emit_at(buf, size,
RE: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
[AMD Official Use Only - AMD Internal Distribution Only] Ping on this series. Thanks, Prike > -Original Message- > From: Liang, Prike > Sent: Monday, October 14, 2024 3:49 PM > To: amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Lazar, Lijo > ; Liang, Prike > Subject: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete > > To check the status of S3 suspend completion, use the PM core > pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, clean up > the > AMDGPU driver's private flag suspend_complete. > > Signed-off-by: Prike Liang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++- > drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- > 5 files changed, 5 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 48c9b9b06905..9b35763ae0a7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -,8 +,6 @@ struct amdgpu_device { > boolin_s3; > boolin_s4; > boolin_s0ix; > - /* indicate amdgpu suspension status */ > - boolsuspend_complete; > > enum pp_mp1_state mp1_state; > struct amdgpu_doorbell_index doorbell_index; diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 680e44fdee6e..78972151b970 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev) > struct drm_device *drm_dev = dev_get_drvdata(dev); > struct amdgpu_device *adev = drm_to_adev(drm_dev); > > - adev->suspend_complete = false; > if (amdgpu_acpi_is_s0ix_active(adev)) > adev->in_s0ix = true; > else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@ static > int amdgpu_pmops_suspend_noirq(struct device *dev) > struct drm_device *drm_dev = dev_get_drvdata(dev); > struct amdgpu_device *adev = drm_to_adev(drm_dev); > > - adev->suspend_complete = true; > if (amdgpu_acpi_should_gpu_reset(adev)) > return amdgpu_asic_reset(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index be320d753507..ba8e66744376 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device > *adev) >* confirmed that the APU gfx10/gfx11 needn't such update. >*/ > if (adev->flags & AMD_IS_APU && > - adev->in_s3 && !adev->suspend_complete) { > - DRM_INFO(" Will skip the CSB packet resubmit\n"); > + adev->in_s3 && !pm_resume_via_firmware()) { > + DRM_INFO("Will skip the CSB packet resubmit\n"); > return 0; > } > r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); diff > --git > a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c > index 12ff6cf568dc..d9d11131a744 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct > amdgpu_device *adev) >*performing pm core test. >*/ > if (adev->flags & AMD_IS_APU && adev->in_s3 && > - !pm_resume_via_firmware()) { > - adev->suspend_complete = false; > + !pm_resume_via_firmware()) > return true; > - } else { > - adev->suspend_complete = true; > + else > return false; > - } > } > > static int soc15_asic_reset(struct amdgpu_device *adev) diff --git > a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c > index c4b950e75133..7a47a21ef00f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc21.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c > @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct > amdgpu_device *adev) >* 2) S3 suspend got aborted and TOS is active. >*/ > if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && > - !adev->suspend_complete) { > + !pm_resume_via_firmware()) { > sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); > msleep(100); > sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); > -- > 2.34.1
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin: On 22/10/2024 17:24, Christian König wrote: Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy): [Public] +static uint32_t fold_memtype(uint32_t memtype) { In general please add prefixes to even static functions, e.g. amdgpu_vm_ or amdgpu_bo_. + /* Squash private placements into 'cpu' to keep the legacy userspace view. */ + switch (mem_type) { + case TTM_PL_VRAM: + case TTM_PL_TT: + return memtype + default: + return TTM_PL_SYSTEM; + } +} + +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) { That whole function belongs into amdgpu_bo.c Do you mean bo_get_memtype or fold_memtype? I debated whether bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since it's using fold_memtype and only useful for memory stats because of folding the private placements I just left them here together with the other mem stats code. I can move it to amdgpu_bo.c make it return the memtype verbatim and just fold it when I do the accounting. I think that folding GDS, GWS and OA into system is also a bug. We should really not doing that. Just wanted to point out for this round that the code to query the current placement from a BO should probably go into amdgpu_bo.c and not amdgpu_vm.c + struct ttm_resource *res = bo->tbo.resource; + const uint32_t domain_to_pl[] = { + [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, + [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, + [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, + [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, + [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, + [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, + [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, + }; + uint32_t domain; + + if (res) + return fold_memtype(res->mem_type); + + /* + * If no backing store use one of the preferred domain for basic + * stats. We take the MSB since that should give a reasonable + * view. + */ + BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM < TTM_PL_SYSTEM); + domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); + if (drm_WARN_ON_ONCE(&adev->ddev, + domain == 0 || --domain >= ARRAY_SIZE(domain_to_pl))) It's perfectly legal to create a BO without a placement. That one just won't have a backing store. This is lifted from the previous change I'm rebasing onto. I think what it’s trying to do is if the BO doesn't have a placement, use the "biggest" (VRAM > TT > SYSTEM) preferred placement for the purpose of accounting. Previously we just ignore BOs that doesn't have a placement. I guess there's argument for going with either approaches. I was not arguing, I'm simply pointing out a bug. It's perfectly valid for bo->preferred_domains to be 0. So the following WARN_ON() that no bit is set is incorrect. + return 0; + return fold_memtype(domain_to_pl[domain]) That would need specular execution mitigation if I'm not completely mistaken. Better use a switch/case statement. Do you mean change the array indexing to a switch statement? Yes. Did you mean array_index_nospec? Yes. Domain is not a direct userspace input and is calculated from the mask which sanitized to allowed values prior to this call. So I *think* switch is an overkill but don't mind it either. Just commenting FWIW. I missed that the mask is applied. Thinking more about it I'm not sure if we should do this conversion in the first place. IIRC Tvrtko you once suggested a patch which switched a bunch of code to use the TTM placement instead of the UAPI flags. Going more into this direction I think when we want to look at the current placement we should probably also use the TTM PL enumeration directly. Regards, Christian. Regards, Tvrtko
Re: [PATCH] drm/amdgpu: handle default profile on on devices without fullscreen 3D
On 2024-10-22 15:50, Alex Deucher wrote: > Some devices do not support fullscreen 3D. > > v2: Make the check generic. > > Fixes: 336568de918e ("drm/amdgpu/swsmu: default to fullscreen 3D profile for > dGPUs") > Signed-off-by: Alex Deucher > Cc: Kenneth Feng > Cc: Lijo Lazar > --- > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 11 ++- > 1 file changed, 10 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index accc96a03bd9..8d4aee4e2287 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -1236,6 +1236,14 @@ static void smu_init_xgmi_plpd_mode(struct smu_context > *smu) > } > } > > +static bool smu_is_workload_profile_available(struct smu_context *smu, > + u32 profile) > +{ > + if (profile >= PP_SMC_POWER_PROFILE_COUNT) > + return false; > + return smu->workload_map && smu->workload_map[profile].valid_mapping; > +} > + > static int smu_sw_init(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > @@ -1267,7 +1275,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) > smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; > smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; > > - if (smu->is_apu) > + if (smu->is_apu || > + !smu_is_workload_profile_available(smu, > PP_SMC_POWER_PROFILE_FULLSCREEN3D)) > smu->workload_mask = 1 << > smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; > else > smu->workload_mask = 1 << > smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; FWIW, PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT isn't really suitable for compositing with an APU either (certainly not while the machine is connected to AC, though I'm getting good battery life even forcing the compute profile). -- Earthling Michel Dänzer \GNOME / Xwayland / Mesa developer https://redhat.com \ Libre software enthusiast
[PATCH v3] drm/amdgpu: Add dcn30 drm_panic support
Add support for the drm_panic module, which displays a pretty user friendly message on the screen when a Linux kernel panic occurs. It should work on all radeon using amdgpu_dm_plane.c, when the framebuffer is linear (like when in a VT). For tiled framebuffer, it will only work on radeon with dcn3x. It should be easy to add support for dcn20, but I can't test it. I've tested it on a Radeon W6400 pro, Radeon 7900XTX and Radeon RX 5700. Also it doesn't work yet on laptop's panel, maybe due to PSR. Signed-off-by: Jocelyn Falempe --- v2: - Disable tiling, and force page flip in the panic_flush() callback. - Enable tiling support for dcn31. - Force immediate page flip. v3: * Add hubp_clear_tiling hook to dcn32 and dcn35 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 136 +- .../amd/display/dc/hubp/dcn30/dcn30_hubp.c| 17 +++ .../amd/display/dc/hubp/dcn30/dcn30_hubp.h| 2 + .../amd/display/dc/hubp/dcn31/dcn31_hubp.c| 1 + .../amd/display/dc/hubp/dcn32/dcn32_hubp.c| 3 +- .../amd/display/dc/hubp/dcn35/dcn35_hubp.c| 1 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 1 + 7 files changed, 159 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 495e3cd70426d..60606b36f07bd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -26,7 +26,9 @@ #include #include +#include "drm/drm_framebuffer.h" #include +#include #include #include #include @@ -36,6 +38,7 @@ #include "amdgpu_display.h" #include "amdgpu_dm_trace.h" #include "amdgpu_dm_plane.h" +#include "bif/bif_4_1_d.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" @@ -1421,6 +1424,124 @@ static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane, amdgpu_dm_plane_handle_cursor_update(plane, old_state); } +/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel() + * they are called from the panic handler, and protected by the drm_panic spinlock. + */ +static struct amdgpu_bo *panic_abo; + +/* Use the indirect MMIO to write each pixel to the GPU VRAM, + * This is a simplified version of amdgpu_device_mm_access() + */ +static void amdgpu_dm_set_pixel(struct drm_scanout_buffer *sb, + unsigned int x, + unsigned int y, + u32 color) +{ + struct amdgpu_res_cursor cursor; + unsigned long offset; + struct amdgpu_bo *abo = panic_abo; + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + uint32_t tmp; + + offset = x * 4 + y * sb->pitch[0]; + amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor); + + tmp = cursor.start >> 31; + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x8000); + if (tmp != 0x) + WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); + WREG32_NO_KIQ(mmMM_DATA, color); +} + +static int amdgpu_dm_plane_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb) +{ + struct amdgpu_bo *abo; + struct drm_framebuffer *fb = plane->state->fb; + + if (!fb) + return -EINVAL; + + DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format); + + abo = gem_to_amdgpu_bo(fb->obj[0]); + if (!abo) + return -EINVAL; + + sb->width = fb->width; + sb->height = fb->height; + /* Use the generic linear format, because tiling will be disabled in panic_flush() */ + sb->format = drm_format_info(fb->format->format); + if (!sb->format) + return -EINVAL; + + sb->pitch[0] = fb->pitches[0]; + + if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { + if (abo->tbo.resource->mem_type != TTM_PL_VRAM) { + drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n"); + return -EINVAL; + } + /* Only handle 32bits format, to simplify mmio access */ + if (fb->format->cpp[0] != 4) { + drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n"); + return -EINVAL; + } + sb->set_pixel = amdgpu_dm_set_pixel; + panic_abo = abo; + return 0; + } + if (!abo->kmap.virtual && + ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) { + drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n"); + return -ENOMEM; + } + if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) + iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual); + else + iosys_
Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
On 10/14/2024 1:19 PM, Prike Liang wrote: > To check the status of S3 suspend completion, > use the PM core pm_suspend_global_flags bit(1) > to detect S3 abort events. Therefore, clean up > the AMDGPU driver's private flag suspend_complete. > > Signed-off-by: Prike Liang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++- > drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- > 5 files changed, 5 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 48c9b9b06905..9b35763ae0a7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -,8 +,6 @@ struct amdgpu_device { > boolin_s3; > boolin_s4; > boolin_s0ix; > - /* indicate amdgpu suspension status */ > - boolsuspend_complete; > > enum pp_mp1_state mp1_state; > struct amdgpu_doorbell_index doorbell_index; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 680e44fdee6e..78972151b970 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev) > struct drm_device *drm_dev = dev_get_drvdata(dev); > struct amdgpu_device *adev = drm_to_adev(drm_dev); > > - adev->suspend_complete = false; > if (amdgpu_acpi_is_s0ix_active(adev)) > adev->in_s0ix = true; > else if (amdgpu_acpi_is_s3_active(adev)) > @@ -2516,7 +2515,6 @@ static int amdgpu_pmops_suspend_noirq(struct device > *dev) > struct drm_device *drm_dev = dev_get_drvdata(dev); > struct amdgpu_device *adev = drm_to_adev(drm_dev); > > - adev->suspend_complete = true; > if (amdgpu_acpi_should_gpu_reset(adev)) > return amdgpu_asic_reset(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index be320d753507..ba8e66744376 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device > *adev) >* confirmed that the APU gfx10/gfx11 needn't such update. >*/ > if (adev->flags & AMD_IS_APU && > - adev->in_s3 && !adev->suspend_complete) { > - DRM_INFO(" Will skip the CSB packet resubmit\n"); > + adev->in_s3 && !pm_resume_via_firmware()) { > + DRM_INFO("Will skip the CSB packet resubmit\n"); > return 0; > } > r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c > b/drivers/gpu/drm/amd/amdgpu/soc15.c > index 12ff6cf568dc..d9d11131a744 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct > amdgpu_device *adev) >*performing pm core test. >*/ > if (adev->flags & AMD_IS_APU && adev->in_s3 && > - !pm_resume_via_firmware()) { > - adev->suspend_complete = false; > + !pm_resume_via_firmware()) > return true; > - } else { > - adev->suspend_complete = true; > + else > return false; > - } > } > > static int soc15_asic_reset(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c > b/drivers/gpu/drm/amd/amdgpu/soc21.c > index c4b950e75133..7a47a21ef00f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc21.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c > @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct > amdgpu_device *adev) >* 2) S3 suspend got aborted and TOS is active. >*/ > if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && > - !adev->suspend_complete) { > + !pm_resume_via_firmware()) { Looks like this will cover only ACPI based systems. Not sure if that assumption is valid for dGPU cases. Thanks, Lijo > sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); > msleep(100); > sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
Re: [PATCH 2/5 V2] drm/amdgpu: Add sysfs interface for sdma reset mask
Am 23.10.24 um 04:43 schrieb jesse.zh...@amd.com: Add the sysfs interface for sdma: sdma_reset_mask The interface is read-only and show the resets supported by the IP. For example, full adapter reset (mode1/mode2/BACO/etc), soft reset, queue reset, and pipe reset. V2: the sysfs node returns a text string instead of some flags (Christian) Signed-off-by: Jesse Zhang Suggested-by:Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 48 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 5 +++ drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 5 +++ 10 files changed, 90 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 183a976ba29d..f20b7285f5fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -343,3 +343,51 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) return 0; } + +static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size = 0; + struct amdgpu_ring *ring = &adev->sdma.instance[0].ring; + + if (!adev || !ring) + return -ENODEV; + + if (amdgpu_device_should_recover_gpu(adev)) + size += sysfs_emit_at(buf, size, "full "); + + if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery) + && !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery) A total nit pick, but the indentation here is wrong and I think checkpatch.pl might complain about that. In general the Linux kernel coding styles says that for "if"s you should indent with tabs and then spaces so that in this case the && is under the amdgpu_gpu_recovery of the previous line. + size += sysfs_emit_at(buf, size, "soft "); + + if (amdgpu_gpu_recovery && ring->funcs->reset) + size += sysfs_emit_at(buf, size, "queue "); You could add a generic helper which takes the ring as parameter and prints "full soft queue" into the buffer. Patch #1 is kind of special because of the FW limitations, but that should make patch #2-#5 a bit smaller and use more generic code. And should we print the strings in the order they are applied? In other words "soft queue full" ? Apart from that the patches look totally clean to me. Regards, Christian. + + size += sysfs_emit_at(buf, size, "\n"); + return size; +} + +static DEVICE_ATTR(sdma_reset_mask, 0444, + amdgpu_get_sdma_reset_mask, NULL); + +int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->sdma.num_instances) { + r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask); + if (r) + return r; + } + + return r; +} + +void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev) +{ + if (adev->sdma.num_instances) + device_remove_file(adev->dev, &dev_attr_sdma_reset_mask); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 087ce0f6fa07..3058548d0733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -175,5 +175,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance, void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev); +void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 10fd772cb80f..bd04310cb2b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -863,6 +863,10 @@ static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block) return r; } + r = amdgpu_sdma_sysfs_reset_mask_init(adev); + if (r) + return r; + return r; } @@ -874,6 +878,7 @@ static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + amdgpu_sdma_sysfs_reset_mask_fini(adev); sdma
Re: [PATCH] drm/amd/pm: correct the workload setting
On 10/23/2024 8:42 AM, Kenneth Feng wrote: > Correct the workload setting in order not to mix the setting > with the end user. Update the workload mask accordingly. > > Signed-off-by: Kenneth Feng > --- > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 ++-- > drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 39 +++-- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 33 --- > .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 42 --- > 5 files changed, 115 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index accc96a03bd9..f1984736ff4a 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -1252,7 +1252,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) > atomic64_set(&smu->throttle_int_counter, 0); > smu->watermarks_bitmap = 0; > smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > - smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > + smu->user_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > + smu->driver_workload_mask = 0; > > atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); > atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); > @@ -1267,10 +1268,12 @@ static int smu_sw_init(struct amdgpu_ip_block > *ip_block) > smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; > smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; > > - if (smu->is_apu) > + if (smu->is_apu) { > smu->workload_mask = 1 << > smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; > - else > + } else { > smu->workload_mask = 1 << > smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; > + smu->user_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; This is driver decided, not user decided. Keeping it as user decided is confusing. Also, ideally user settings is preferred to be kept here - smu_user_dpm_profile > + } > > smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; > @@ -2346,11 +2349,13 @@ static int smu_switch_power_profile(void *handle, > > if (!en) { > smu->workload_mask &= ~(1 << smu->workload_prority[type]); > + smu->driver_workload_mask &= ~(1 << > smu->workload_prority[type]); > index = fls(smu->workload_mask); > index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : > 0; > workload[0] = smu->workload_setting[index]; > } else { > smu->workload_mask |= (1 << smu->workload_prority[type]); > + smu->driver_workload_mask |= (1 << smu->workload_prority[type]); > index = fls(smu->workload_mask); > index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; > workload[0] = smu->workload_setting[index]; > @@ -3045,12 +3050,16 @@ static int smu_set_power_profile_mode(void *handle, > uint32_t param_size) > { > struct smu_context *smu = handle; > + int ret; > > if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || > !smu->ppt_funcs->set_power_profile_mode) > return -EOPNOTSUPP; > + smu->user_profile_mode_setting = true; > + ret = smu_bump_power_profile_mode(smu, param, param_size); > + smu->user_profile_mode_setting = false; Instead of doing this way, I think it's better to save the user preference as a mask here. Later decide what to be applied whenever workload mask is applied. if (user_profile = default or unknown) // No user preference user_mask = 0 ; // user driver default mask else if (user_profile = xyz) user_mask = xyz_workload_mask; Save user_mask to user_dpm profile. Whenever workload mask is applied, select user_mask if non-zero or driver_mask, or a combination of both. Thanks, Lijo > > - return smu_bump_power_profile_mode(smu, param, param_size); > + return ret; > } > > static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > index 8bb32b3f0d9c..e43b23dd3457 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > @@ -557,10 +557,11 @@ struct smu_context { > bool disable_uclk_switch; > > uint32_t workload_mask; > + uint32_t driver_workload_mask; > uint32_t workload_prority[WORKLOAD_POLICY_MAX]; > uint32_t workload_setting[WORKLOAD_POLICY_MAX]; > uint32_t power_profile_mode; > - uint32_t default_power_profile_mode; > + uint32_t user_profile_mode; > bool pm_enabled; > bool is_apu; >
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
Am 23.10.24 um 13:37 schrieb Tvrtko Ursulin: On 23/10/2024 10:14, Christian König wrote: Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin: On 22/10/2024 17:24, Christian König wrote: Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy): [Public] +static uint32_t fold_memtype(uint32_t memtype) { In general please add prefixes to even static functions, e.g. amdgpu_vm_ or amdgpu_bo_. + /* Squash private placements into 'cpu' to keep the legacy userspace view. */ + switch (mem_type) { + case TTM_PL_VRAM: + case TTM_PL_TT: + return memtype + default: + return TTM_PL_SYSTEM; + } +} + +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) { That whole function belongs into amdgpu_bo.c Do you mean bo_get_memtype or fold_memtype? I debated whether bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since it's using fold_memtype and only useful for memory stats because of folding the private placements I just left them here together with the other mem stats code. I can move it to amdgpu_bo.c make it return the memtype verbatim and just fold it when I do the accounting. I think that folding GDS, GWS and OA into system is also a bug. We should really not doing that. Just wanted to point out for this round that the code to query the current placement from a BO should probably go into amdgpu_bo.c and not amdgpu_vm.c + struct ttm_resource *res = bo->tbo.resource; + const uint32_t domain_to_pl[] = { + [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, + [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, + [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, + [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, + [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, + [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, + [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, + }; + uint32_t domain; + + if (res) + return fold_memtype(res->mem_type); + + /* + * If no backing store use one of the preferred domain for basic + * stats. We take the MSB since that should give a reasonable + * view. + */ + BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM < TTM_PL_SYSTEM); + domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); + if (drm_WARN_ON_ONCE(&adev->ddev, + domain == 0 || --domain >= ARRAY_SIZE(domain_to_pl))) It's perfectly legal to create a BO without a placement. That one just won't have a backing store. This is lifted from the previous change I'm rebasing onto. I think what it’s trying to do is if the BO doesn't have a placement, use the "biggest" (VRAM > TT > SYSTEM) preferred placement for the purpose of accounting. Previously we just ignore BOs that doesn't have a placement. I guess there's argument for going with either approaches. I was not arguing, I'm simply pointing out a bug. It's perfectly valid for bo->preferred_domains to be 0. So the following WARN_ON() that no bit is set is incorrect. + return 0; + return fold_memtype(domain_to_pl[domain]) That would need specular execution mitigation if I'm not completely mistaken. Better use a switch/case statement. Do you mean change the array indexing to a switch statement? Yes. Did you mean array_index_nospec? Yes. Domain is not a direct userspace input and is calculated from the mask which sanitized to allowed values prior to this call. So I *think* switch is an overkill but don't mind it either. Just commenting FWIW. I missed that the mask is applied. Thinking more about it I'm not sure if we should do this conversion in the first place. IIRC Tvrtko you once suggested a patch which switched a bunch of code to use the TTM placement instead of the UAPI flags. Maybe 8fb0efb10184 ("drm/amdgpu: Reduce mem_type to domain double indirection") is what are you thinking of? Yes, exactly that one. Going more into this direction I think when we want to look at the current placement we should probably also use the TTM PL enumeration directly. It does this already. The placement flags are just to "invent" a TTM PL enum when bo->tbo.resource == NULL. Ah, good point! I though we would do the mapping the other way around. In this case that is even more something we should probably not do at all. When bo->tbo.resource is NULL then this BO isn't resident at all, so it should not account to resident memory. Again, based of the same enum. Not sure if you have something other in mind or you are happy with that? I think that for drm-total-* we should use the GEM flags and for drm-resident-* we should use the TTM placement. Then what Teddy does is IMO only tangential, he just changes when stats are collected and not this aspect. Yeah, right but we should probably fix it up in the right way while on it. To fold or not the special placements (GWS, GDS & co) is also tangential. In my patch I
Re: [PATCH] drm/amd/display: add missing tracepoint event in DM atomic_commit_tail
On 2024-10-23 09:53, Melissa Wen wrote: There are two events to trace the beginning and the end of amdgpu_dm_atomic_commit_tail, but only the one ate the beginning was placed. Place amdgpu_dm_atomic_commit_tail_finish tracepoint at the end than. Signed-off-by: Melissa Wen Reviewed-by: Leo Li Thanks! --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 98f0b142f497..bbfc47f6595f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10088,6 +10088,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) for (i = 0; i < crtc_disable_count; i++) pm_runtime_put_autosuspend(dev->dev); pm_runtime_mark_last_busy(dev->dev); + + trace_amdgpu_dm_atomic_commit_tail_finish(state); } static int dm_force_atomic_commit(struct drm_connector *connector)
[PATCH] drm/amd/display: add missing tracepoint event in DM atomic_commit_tail
There are two events to trace the beginning and the end of amdgpu_dm_atomic_commit_tail, but only the one ate the beginning was placed. Place amdgpu_dm_atomic_commit_tail_finish tracepoint at the end than. Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 98f0b142f497..bbfc47f6595f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10088,6 +10088,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) for (i = 0; i < crtc_disable_count; i++) pm_runtime_put_autosuspend(dev->dev); pm_runtime_mark_last_busy(dev->dev); + + trace_amdgpu_dm_atomic_commit_tail_finish(state); } static int dm_force_atomic_commit(struct drm_connector *connector) -- 2.45.2
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
On 23/10/2024 10:14, Christian König wrote: Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin: On 22/10/2024 17:24, Christian König wrote: Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy): [Public] +static uint32_t fold_memtype(uint32_t memtype) { In general please add prefixes to even static functions, e.g. amdgpu_vm_ or amdgpu_bo_. + /* Squash private placements into 'cpu' to keep the legacy userspace view. */ + switch (mem_type) { + case TTM_PL_VRAM: + case TTM_PL_TT: + return memtype + default: + return TTM_PL_SYSTEM; + } +} + +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) { That whole function belongs into amdgpu_bo.c Do you mean bo_get_memtype or fold_memtype? I debated whether bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since it's using fold_memtype and only useful for memory stats because of folding the private placements I just left them here together with the other mem stats code. I can move it to amdgpu_bo.c make it return the memtype verbatim and just fold it when I do the accounting. I think that folding GDS, GWS and OA into system is also a bug. We should really not doing that. Just wanted to point out for this round that the code to query the current placement from a BO should probably go into amdgpu_bo.c and not amdgpu_vm.c + struct ttm_resource *res = bo->tbo.resource; + const uint32_t domain_to_pl[] = { + [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, + [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, + [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, + [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, + [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, + [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, + [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, + }; + uint32_t domain; + + if (res) + return fold_memtype(res->mem_type); + + /* + * If no backing store use one of the preferred domain for basic + * stats. We take the MSB since that should give a reasonable + * view. + */ + BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM < TTM_PL_SYSTEM); + domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); + if (drm_WARN_ON_ONCE(&adev->ddev, + domain == 0 || --domain >= ARRAY_SIZE(domain_to_pl))) It's perfectly legal to create a BO without a placement. That one just won't have a backing store. This is lifted from the previous change I'm rebasing onto. I think what it’s trying to do is if the BO doesn't have a placement, use the "biggest" (VRAM > TT > SYSTEM) preferred placement for the purpose of accounting. Previously we just ignore BOs that doesn't have a placement. I guess there's argument for going with either approaches. I was not arguing, I'm simply pointing out a bug. It's perfectly valid for bo->preferred_domains to be 0. So the following WARN_ON() that no bit is set is incorrect. + return 0; + return fold_memtype(domain_to_pl[domain]) That would need specular execution mitigation if I'm not completely mistaken. Better use a switch/case statement. Do you mean change the array indexing to a switch statement? Yes. Did you mean array_index_nospec? Yes. Domain is not a direct userspace input and is calculated from the mask which sanitized to allowed values prior to this call. So I *think* switch is an overkill but don't mind it either. Just commenting FWIW. I missed that the mask is applied. Thinking more about it I'm not sure if we should do this conversion in the first place. IIRC Tvrtko you once suggested a patch which switched a bunch of code to use the TTM placement instead of the UAPI flags. Maybe 8fb0efb10184 ("drm/amdgpu: Reduce mem_type to domain double indirection") is what are you thinking of? Going more into this direction I think when we want to look at the current placement we should probably also use the TTM PL enumeration directly. It does this already. The placement flags are just to "invent" a TTM PL enum when bo->tbo.resource == NULL. if (!res) { /* * If no backing store use one of the preferred domain for basic * stats. We take the MSB since that should give a reasonable * view. */ BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM < TTM_PL_SYSTEM); type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); if (!type) return; type--; if (drm_WARN_ON_ONCE(&adev->ddev, type >= ARRAY_SIZE(domain_to_pl))) return; type = domain_to_pl[type]; } else { type = res->mem_type; } ... stats[type].total += size; if (drm_ge
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
On 22/10/2024 17:24, Christian König wrote: Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy): [Public] +static uint32_t fold_memtype(uint32_t memtype) { In general please add prefixes to even static functions, e.g. amdgpu_vm_ or amdgpu_bo_. + /* Squash private placements into 'cpu' to keep the legacy userspace view. */ + switch (mem_type) { + case TTM_PL_VRAM: + case TTM_PL_TT: + return memtype + default: + return TTM_PL_SYSTEM; + } +} + +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) { That whole function belongs into amdgpu_bo.c Do you mean bo_get_memtype or fold_memtype? I debated whether bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since it's using fold_memtype and only useful for memory stats because of folding the private placements I just left them here together with the other mem stats code. I can move it to amdgpu_bo.c make it return the memtype verbatim and just fold it when I do the accounting. I think that folding GDS, GWS and OA into system is also a bug. We should really not doing that. Just wanted to point out for this round that the code to query the current placement from a BO should probably go into amdgpu_bo.c and not amdgpu_vm.c + struct ttm_resource *res = bo->tbo.resource; + const uint32_t domain_to_pl[] = { + [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, + [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, + [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, + [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, + [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, + [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, + [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, + }; + uint32_t domain; + + if (res) + return fold_memtype(res->mem_type); + + /* + * If no backing store use one of the preferred domain for basic + * stats. We take the MSB since that should give a reasonable + * view. + */ + BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM < TTM_PL_SYSTEM); + domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); + if (drm_WARN_ON_ONCE(&adev->ddev, + domain == 0 || --domain >= ARRAY_SIZE(domain_to_pl))) It's perfectly legal to create a BO without a placement. That one just won't have a backing store. This is lifted from the previous change I'm rebasing onto. I think what it’s trying to do is if the BO doesn't have a placement, use the "biggest" (VRAM > TT > SYSTEM) preferred placement for the purpose of accounting. Previously we just ignore BOs that doesn't have a placement. I guess there's argument for going with either approaches. I was not arguing, I'm simply pointing out a bug. It's perfectly valid for bo->preferred_domains to be 0. So the following WARN_ON() that no bit is set is incorrect. + return 0; + return fold_memtype(domain_to_pl[domain]) That would need specular execution mitigation if I'm not completely mistaken. Better use a switch/case statement. Do you mean change the array indexing to a switch statement? Yes. Did you mean array_index_nospec? Domain is not a direct userspace input and is calculated from the mask which sanitized to allowed values prior to this call. So I *think* switch is an overkill but don't mind it either. Just commenting FWIW. Regards, Tvrtko
[PATCH] drm/amdgpu/smu13: fix profile reporting
The following 3 commits landed in parallel: commit d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") commit 7a1613e47e65 ("drm/amdgpu/smu13: always apply the powersave optimization") commit 7c210ca5a2d7 ("drm/amdgpu: handle default profile on on devices without fullscreen 3D") While everything is set correctly, this caused the profile to be reported incorrectly because both the powersave and fullscreen3d bits were set in the mask and when the driver prints the profile, it looks for the first bit set. Fixes: d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index cb923e33fd6f..d53e162dcd8d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2485,7 +2485,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask; + u32 workload_mask, selected_workload_mask; smu->power_profile_mode = input[size]; @@ -2552,7 +2552,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - workload_mask = 1 << workload_type; + selected_workload_mask = workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2572,7 +2572,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask, NULL); if (!ret) - smu->workload_mask = workload_mask; + smu->workload_mask = selected_workload_mask; return ret; } -- 2.46.2
RE: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
[AMD Official Use Only - AMD Internal Distribution Only] Yeah it looks like I missed the whole active/purgeable thing as well... Teddy
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
On 23/10/2024 13:12, Christian König wrote: Am 23.10.24 um 13:37 schrieb Tvrtko Ursulin: On 23/10/2024 10:14, Christian König wrote: Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin: On 22/10/2024 17:24, Christian König wrote: Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy): [Public] +static uint32_t fold_memtype(uint32_t memtype) { In general please add prefixes to even static functions, e.g. amdgpu_vm_ or amdgpu_bo_. + /* Squash private placements into 'cpu' to keep the legacy userspace view. */ + switch (mem_type) { + case TTM_PL_VRAM: + case TTM_PL_TT: + return memtype + default: + return TTM_PL_SYSTEM; + } +} + +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) { That whole function belongs into amdgpu_bo.c Do you mean bo_get_memtype or fold_memtype? I debated whether bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since it's using fold_memtype and only useful for memory stats because of folding the private placements I just left them here together with the other mem stats code. I can move it to amdgpu_bo.c make it return the memtype verbatim and just fold it when I do the accounting. I think that folding GDS, GWS and OA into system is also a bug. We should really not doing that. Just wanted to point out for this round that the code to query the current placement from a BO should probably go into amdgpu_bo.c and not amdgpu_vm.c + struct ttm_resource *res = bo->tbo.resource; + const uint32_t domain_to_pl[] = { + [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, + [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, + [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, + [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, + [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, + [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, + [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, + }; + uint32_t domain; + + if (res) + return fold_memtype(res->mem_type); + + /* + * If no backing store use one of the preferred domain for basic + * stats. We take the MSB since that should give a reasonable + * view. + */ + BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM < TTM_PL_SYSTEM); + domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); + if (drm_WARN_ON_ONCE(&adev->ddev, + domain == 0 || --domain >= ARRAY_SIZE(domain_to_pl))) It's perfectly legal to create a BO without a placement. That one just won't have a backing store. This is lifted from the previous change I'm rebasing onto. I think what it’s trying to do is if the BO doesn't have a placement, use the "biggest" (VRAM > TT > SYSTEM) preferred placement for the purpose of accounting. Previously we just ignore BOs that doesn't have a placement. I guess there's argument for going with either approaches. I was not arguing, I'm simply pointing out a bug. It's perfectly valid for bo->preferred_domains to be 0. So the following WARN_ON() that no bit is set is incorrect. + return 0; + return fold_memtype(domain_to_pl[domain]) That would need specular execution mitigation if I'm not completely mistaken. Better use a switch/case statement. Do you mean change the array indexing to a switch statement? Yes. Did you mean array_index_nospec? Yes. Domain is not a direct userspace input and is calculated from the mask which sanitized to allowed values prior to this call. So I *think* switch is an overkill but don't mind it either. Just commenting FWIW. I missed that the mask is applied. Thinking more about it I'm not sure if we should do this conversion in the first place. IIRC Tvrtko you once suggested a patch which switched a bunch of code to use the TTM placement instead of the UAPI flags. Maybe 8fb0efb10184 ("drm/amdgpu: Reduce mem_type to domain double indirection") is what are you thinking of? Yes, exactly that one. Going more into this direction I think when we want to look at the current placement we should probably also use the TTM PL enumeration directly. It does this already. The placement flags are just to "invent" a TTM PL enum when bo->tbo.resource == NULL. Ah, good point! I though we would do the mapping the other way around. In this case that is even more something we should probably not do at all. When bo->tbo.resource is NULL then this BO isn't resident at all, so it should not account to resident memory. It doesn't, only for total. I should have pasted more context..: struct ttm_resource *res = bo->tbo.resource; ... /* DRM stats common fields: */ stats[type].total += size; if (drm_gem_object_is_shared_for_memory_stats(obj)) stats[type].drm.shared += size; else stats[type].drm.private += size; if (res) { stats[type].drm.resident += size So if no current
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
On 22/10/2024 18:06, Christian König wrote: Am 22.10.24 um 18:46 schrieb Li, Yunxiang (Teddy): [Public] I suppose we could add a field like amd-memory-private: to cover the private placements. No, that is not really appropriate either. GWS, GDS and OA are not memory in the first place. Those BOs are HW blocks which the driver allocated to use. So accounting them for the memory usage doesn't make any sense at all. We could print them in the fdinfo as something special for statistics, but it's probably not that useful. When would a BO not have a placement, is it when it is being moved? There are BOs which are only temporary, so when they are evicted their backing store is just discarded. Additional to that allocation of backing store is sometimes delayed until the first use. Would this work correctly if instead of preferred allowed mask was used? Point being, to correctly support fdinfo stats drm-total-, *if* a BO *can* have a backing store at any point it should always be counted there. *If* it currently has a placement it is drm-resident-. If it has a placement but can be discarded it is drm-purgeable-. Etc. Regards, Tvrtko Since we are tracking the state changes, I wonder if such situations can be avoided now so whenever we call these stat update functions the BO would always have a placement. No, as I said before those use cases are perfectly valid. BO don't need a backing store nor do they need a placement. So the code has to gracefully handle that. Regards, Christian. Teddy
Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
Am 23.10.24 um 14:24 schrieb Tvrtko Ursulin: [SNIP] To fold or not the special placements (GWS, GDS & co) is also tangential. In my patch I just preserved the legacy behaviour so it can easily be tweaked on top. Yeah, but again the original behavior is completely broken. GWS, GDS and OA are counted in blocks of HW units (multiplied by PAGE_SIZE IIRC to avoid some GEM&TTM warnings). When you accumulate that anywhere in the memory stats then that is just completely off. Ooops. :) Are they backed by some memory though, be it system or VRAM? GDS is an internal 4 or 64KiB memory block which is only valid while shaders are running. It is used to communicate stuff between different shader stages and not even CPU accessible. GWS and OA are not even memory, those are just HW blocks which implement a fixed function. IIRC most HW generation have 16 of each and when setting up the application virtual address space you can specify how many will be used by the application. Regards, Christian. Regards, Tvrtko
RE: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime
[AMD Official Use Only - AMD Internal Distribution Only] > From: Tvrtko Ursulin > Sent: Wednesday, October 23, 2024 8:25 > On 23/10/2024 13:12, Christian König wrote: > > Am 23.10.24 um 13:37 schrieb Tvrtko Ursulin: > >> > >> On 23/10/2024 10:14, Christian König wrote: > >>> Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin: > > On 22/10/2024 17:24, Christian König wrote: > > Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy): > >> [Public] > >> > +static uint32_t fold_memtype(uint32_t memtype) { > >>> In general please add prefixes to even static functions, e.g. > >>> amdgpu_vm_ or > >>> amdgpu_bo_. > >>> > + /* Squash private placements into 'cpu' to keep the legacy > userspace view. > >>> */ > + switch (mem_type) { > + case TTM_PL_VRAM: > + case TTM_PL_TT: > + return memtype > + default: > + return TTM_PL_SYSTEM; > + } > +} > + > +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) { > >>> That whole function belongs into amdgpu_bo.c > >> Do you mean bo_get_memtype or fold_memtype? I debated whether > >> bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and > >> since it's using fold_memtype and only useful for memory stats > >> because of folding the private placements I just left them here > >> together with the other mem stats code. > >> > >> I can move it to amdgpu_bo.c make it return the memtype verbatim > >> and just fold it when I do the accounting. > > > > I think that folding GDS, GWS and OA into system is also a bug. We > > should really not doing that. > > > > Just wanted to point out for this round that the code to query the > > current placement from a BO should probably go into amdgpu_bo.c > > and not amdgpu_vm.c > > > >> > + struct ttm_resource *res = bo->tbo.resource; > + const uint32_t domain_to_pl[] = { > + [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = > +TTM_PL_SYSTEM, > + [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, > + [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = > TTM_PL_VRAM, > + [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = > +AMDGPU_PL_GDS, > + [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = > +AMDGPU_PL_GWS, > + [ilog2(AMDGPU_GEM_DOMAIN_OA)] = > AMDGPU_PL_OA, > + [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = > >>> AMDGPU_PL_DOORBELL, > + }; > + uint32_t domain; > + > + if (res) > + return fold_memtype(res->mem_type); > + > + /* > +* If no backing store use one of the preferred domain for > basic > +* stats. We take the MSB since that should give a > +reasonable > +* view. > +*/ > + BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || > TTM_PL_VRAM < > >>> TTM_PL_SYSTEM); > + domain = fls(bo->preferred_domains & > +AMDGPU_GEM_DOMAIN_MASK); > + if (drm_WARN_ON_ONCE(&adev->ddev, > +domain == 0 || --domain >= > ARRAY_SIZE(domain_to_pl))) > >>> It's perfectly legal to create a BO without a placement. That > >>> one just won't have a backing store. > >>> > >> This is lifted from the previous change I'm rebasing onto. I > >> think what it’s trying to do is if the BO doesn't have a > >> placement, use the "biggest" (VRAM > TT > SYSTEM) preferred > >> placement for the purpose of accounting. Previously we just > >> ignore BOs that doesn't have a placement. I guess there's > >> argument for going with either approaches. > > > > I was not arguing, I'm simply pointing out a bug. It's perfectly > > valid for bo->preferred_domains to be 0. > > > > So the following WARN_ON() that no bit is set is incorrect. > > > >> > + return 0; > + return fold_memtype(domain_to_pl[domain]) > >>> That would need specular execution mitigation if I'm not > >>> completely mistaken. > >>> > >>> Better use a switch/case statement. > >>> > >> Do you mean change the array indexing to a switch statement? > > > > Yes. > > Did you mean array_index_nospec? > >>> > >>> Yes. > >>> > Domain is not a direct userspace input and is calculated from the > mask which sanitized to allowed values prior to this call. So I > *think* switch is an overkill but don't mind it either. Just > commenting FWIW. > >>> > >>> I missed that the mask is applied. > >>> > >>> Thinking more about it I'm not sure if we should do this conversion > >>> in the first place. IIRC Tvrtko you once suggested a patch which > >>> switched a bunch of code to use the TTM
[PATCH AUTOSEL 6.11 25/30] drm/amdkfd: Accounting pdd vram_usage for svm
From: Philip Yang [ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ] Process device data pdd->vram_usage is read by rocm-smi via sysfs, this is currently missing the svm_bo usage accounting, so "rocm-smi --showpids" per process VRAM usage report is incorrect. Add pdd->vram_usage accounting when svm_bo allocation and release, change to atomic64_t type because it is updated outside process mutex now. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 546b02f2241a6..5953bc5f31192 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1170,7 +1170,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) size >>= 1; - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); } mutex_unlock(&p->mutex); @@ -1241,7 +1241,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); + atomic64_sub(size, &pdd->vram_usage); err_unlock: err_pdd: @@ -2346,7 +2346,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { bo_bucket->restored_offset = offset; /* Update the VRAM usage count */ - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); + atomic64_add(bo_bucket->size, &pdd->vram_usage); } return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2b3ec92981e8f..f35741fade911 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -766,7 +766,7 @@ struct kfd_process_device { enum kfd_pdd_bound bound; /* VRAM usage */ - uint64_t vram_usage; + atomic64_t vram_usage; struct attribute attr_vram; char vram_filename[MAX_SYSFS_FILENAME_LEN]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index e44892109f71b..8343b3e4de7b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, } else if (strncmp(attr->name, "vram_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_vram); - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); } else if (strncmp(attr->name, "sdma_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_sdma); @@ -1599,7 +1599,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; pdd->runtime_inuse = false; - pdd->vram_usage = 0; + atomic64_set(&pdd->vram_usage, 0); pdd->sdma_past_activity_counter = 0; pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bd9c2921e0dcc..7d00d89586a10 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -404,6 +404,27 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); + + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + struct kfd_process_device *pdd; + struct kfd_process *p; + struct mm_struct *mm; + + mm = svm_bo->eviction_fence->mm; + /* +* The forked child process takes svm_bo device pages ref, svm_bo could be +* released after parent process is gone. +*/ + p = kfd_looku
[PATCH AUTOSEL 6.6 19/23] drm/amdkfd: Accounting pdd vram_usage for svm
From: Philip Yang [ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ] Process device data pdd->vram_usage is read by rocm-smi via sysfs, this is currently missing the svm_bo usage accounting, so "rocm-smi --showpids" per process VRAM usage report is incorrect. Add pdd->vram_usage accounting when svm_bo allocation and release, change to atomic64_t type because it is updated outside process mutex now. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 19d46be639429..8669677662d0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1164,7 +1164,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) size >>= 1; - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); } mutex_unlock(&p->mutex); @@ -1235,7 +1235,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); + atomic64_sub(size, &pdd->vram_usage); err_unlock: err_pdd: @@ -2352,7 +2352,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { bo_bucket->restored_offset = offset; /* Update the VRAM usage count */ - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); + atomic64_add(bo_bucket->size, &pdd->vram_usage); } return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 67204c3dfbb8f..27c9d5c43765a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -765,7 +765,7 @@ struct kfd_process_device { enum kfd_pdd_bound bound; /* VRAM usage */ - uint64_t vram_usage; + atomic64_t vram_usage; struct attribute attr_vram; char vram_filename[MAX_SYSFS_FILENAME_LEN]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 43f520b379670..6c90231e0aec2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, } else if (strncmp(attr->name, "vram_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_vram); - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); } else if (strncmp(attr->name, "sdma_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_sdma); @@ -1589,7 +1589,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; pdd->runtime_inuse = false; - pdd->vram_usage = 0; + atomic64_set(&pdd->vram_usage, 0); pdd->sdma_past_activity_counter = 0; pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ce76d45549984..6b7c6f45a80a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -391,6 +391,27 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); + + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + struct kfd_process_device *pdd; + struct kfd_process *p; + struct mm_struct *mm; + + mm = svm_bo->eviction_fence->mm; + /* +* The forked child process takes svm_bo device pages ref, svm_bo could be +* released after parent process is gone. +*/ + p = kfd_looku
Re: [PATCH 2/2] Documentation/gpu/amdgpu: Add programming model for DCN
On 2024-10-16 23:34, Rodrigo Siqueira wrote: > One of the challenges to contributing to the display code is the > complexity of the DC component. This commit adds a documentation page > that discusses the programming model used by DCN and an overview of how > the display code is organized. > > Cc: Leo Li > Cc: Aurabindo Pillai > Cc: Hamza Mahfooz > Cc: Harry Wentland > Cc: Mario Limonciello > Cc: Christian Konig > Cc: Alex Deucher > Signed-off-by: Rodrigo Siqueira Series is Reviewed-by: Harry Wentland Harry > --- > .../gpu/amdgpu/display/dc-arch-overview.svg | 731 + > .../gpu/amdgpu/display/dc-components.svg | 732 ++ > .../gpu/amdgpu/display/dcn-blocks.rst | 2 + > .../gpu/amdgpu/display/dcn-overview.rst | 2 + > Documentation/gpu/amdgpu/display/index.rst| 1 + > .../amdgpu/display/programming-model-dcn.rst | 162 > 6 files changed, 1630 insertions(+) > create mode 100644 Documentation/gpu/amdgpu/display/dc-arch-overview.svg > create mode 100644 Documentation/gpu/amdgpu/display/dc-components.svg > create mode 100644 Documentation/gpu/amdgpu/display/programming-model-dcn.rst > > diff --git a/Documentation/gpu/amdgpu/display/dc-arch-overview.svg > b/Documentation/gpu/amdgpu/display/dc-arch-overview.svg > new file mode 100644 > index ..23394931cf26 > --- /dev/null > +++ b/Documentation/gpu/amdgpu/display/dc-arch-overview.svg > @@ -0,0 +1,731 @@ > + > + > + > + + width="1204.058" > + height="510.57321" > + viewBox="0 0 318.57366 135.08917" > + version="1.1" > + id="svg8" > + inkscape:version="1.2.2 (b0a8486541, 2022-12-01)" > + sodipodi:docname="dc-arch-overview.svg" > + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"; > + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"; > + xmlns="http://www.w3.org/2000/svg"; > + xmlns:svg="http://www.w3.org/2000/svg"; > + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"; > + xmlns:cc="http://creativecommons.org/ns#"; > + xmlns:dc="http://purl.org/dc/elements/1.1/";> > + + id="defs2"> > + + inkscape:stockid="Arrow2Mend" > + orient="auto" > + refY="0" > + refX="0" > + id="marker8858" > + style="overflow:visible" > + inkscape:isstock="true"> > + + id="path8616" > + > style="fill:#aa00d4;fill-opacity:1;fill-rule:evenodd;stroke:#aa00d4;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" > + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 > c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" > + transform="scale(-0.6)" > + inkscape:connector-curvature="0" /> > + > + + inkscape:stockid="Arrow2Send" > + orient="auto" > + refY="0" > + refX="0" > + id="Arrow2Send" > + style="overflow:visible" > + inkscape:isstock="true"> > + + id="path8622" > + > style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" > + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 > c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" > + transform="matrix(-0.3,0,0,-0.3,0.69,0)" > + inkscape:connector-curvature="0" /> > + > + + inkscape:stockid="Arrow1Lend" > + orient="auto" > + refY="0" > + refX="0" > + id="Arrow1Lend" > + style="overflow:visible" > + inkscape:isstock="true"> > + + id="path8592" > + d="M 0,0 5,-5 -12.5,0 5,5 Z" > + > style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:1pt;stroke-opacity:1" > + transform="matrix(-0.8,0,0,-0.8,-10,0)" > + inkscape:connector-curvature="0" /> > + > + + inkscape:stockid="Arrow2Lend" > + orient="auto" > + refY="0" > + refX="0" > + id="Arrow2Lend" > + style="overflow:visible" > + inkscape:isstock="true"> > + + id="path8610" > + > style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" > + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 > c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" > + transform="matrix(-1.1,0,0,-1.1,-1.1,0)" > + inkscape:connector-curvature="0" /> > + > + + inkscape:stockid="Arrow2Mend" > + orient="auto" > + refY="0" > + refX="0" > + id="Arrow2Mend" > + style="overflow:visible" > + inkscape:isstock="true"> > + + id="path1200" > + > style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" > + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 > c -1.7454984,2.372
[PATCH] drm/amd/amdgpu: limit single process inside MES
This is for MES to limit only one process for the user queues Signed-off-by: Shaoyun Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 11 +++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 11 +++ 5 files changed, 67 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e96984c53e72..72e38d621a29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, if (adev->enforce_isolation[i] && !partition_values[i]) { /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + amdgpu_mes_set_enforce_isolation(adev, i, false); } else if (!adev->enforce_isolation[i] && partition_values[i]) { /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + amdgpu_mes_set_enforce_isolation(adev, i, true); } adev->enforce_isolation[i] = partition_values[i]; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index bf584e9bcce4..29b6a2baae4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) return is_supported; } +/* Fix me -- node_id is used to identify the correct MES instances in the future */ +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) +{ + struct mes_misc_op_input op_input = {0}; + int r; + + op_input.op = MES_MISC_OP_CHANGE_CONFIG; + op_input.change_config.option.limit_single_process = enable ? 1 : 0; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes change config is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to change_config.\n"); + +error: + return r; + +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 79f13d7e5e16..91bff6443c05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -311,6 +311,7 @@ enum mes_misc_opcode { MES_MISC_OP_WRM_REG_WAIT, MES_MISC_OP_WRM_REG_WR_WAIT, MES_MISC_OP_SET_SHADER_DEBUGGER, + MES_MISC_OP_CHANGE_CONFIG, }; struct mes_misc_op_input { @@ -349,6 +350,21 @@ struct mes_misc_op_input { uint32_t tcp_watch_cntl[4]; uint32_t trap_en; } set_shader_debugger; + + struct { + union { + struct { + uint32_t limit_single_process : 1; + uint32_t enable_hws_logging_buffer : 1; + uint32_t reserved : 30; + }; + uint32_t all; + } option; + struct { + uint32_t tdr_level; + uint32_t tdr_delay; + } tdr_config; + } change_config; }; }; @@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) } bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); + +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); + #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 57db0c006c8f..1d6de7bced48 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -644,6 +644,14 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; break; + case MES_MISC_OP_CHANGE_CONFIG: + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; + misc_pkt.change_config.opcode = + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; + misc_pkt.change_config.option.bits.li
[pull] amdgpu drm-fixes-6.12
Hi Dave, Simona, Fixes for 6.12. The following changes since commit 42f7652d3eb527d03665b09edac47f85fb600924: Linux 6.12-rc4 (2024-10-20 15:19:38 -0700) are available in the Git repository at: https://gitlab.freedesktop.org/agd5f/linux.git tags/amd-drm-fixes-6.12-2024-10-23 for you to fetch changes up to 7c210ca5a2d72868e5a052fc533d5dcb7e070f89: drm/amdgpu: handle default profile on on devices without fullscreen 3D (2024-10-22 18:21:51 -0400) amd-drm-fixes-6.12-2024-10-23: amdgpu: - ACPI method handling fixes - SMU 14.x fixes - Display idle optimization fix - DP link layer compliance fix - SDMA 7.x fix - PSR-SU fix - SWSMU fix Alex Deucher (1): drm/amdgpu: handle default profile on on devices without fullscreen 3D Aurabindo Pillai (2): drm/amd/display: temp w/a for dGPU to enter idle optimizations drm/amd/display: temp w/a for DP Link Layer compliance Frank Min (1): drm/amdgpu: fix random data corruption for sdma 7 Kenneth Feng (3): drm/amd/pm: update the driver-fw interface file for smu v14.0.2/3 drm/amd/pm: update overdrive function on smu v14.0.2/3 drm/amd/pm: update deep sleep status on smu v14.0.2/3 Mario Limonciello (2): drm/amd: Guard against bad data for ATIF ACPI method drm/amd/display: Disable PSR-SU on Parade 08-01 TCON too drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 15 ++- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 9 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 13 ++ .../drm/amd/display/modules/power/power_helpers.c | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 11 +- .../pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h | 132 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 2 +- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 66 --- 9 files changed, 156 insertions(+), 97 deletions(-)
Re: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch data
[Public] Acked-by: Alex Deucher From: amd-gfx on behalf of Liu, Shaoyun Sent: Tuesday, October 22, 2024 11:21 AM To: Liu, Shaoyun ; amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch data [AMD Official Use Only - AMD Internal Distribution Only] [AMD Official Use Only - AMD Internal Distribution Only] ping -Original Message- From: amd-gfx On Behalf Of Liu, Shaoyun Sent: Friday, October 11, 2024 12:57 PM To: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch data [AMD Official Use Only - AMD Internal Distribution Only] [AMD Official Use Only - AMD Internal Distribution Only] Ping . -Original Message- From: Liu, Shaoyun Sent: Thursday, October 10, 2024 12:10 PM To: amd-gfx@lists.freedesktop.org Cc: Liu, Shaoyun Subject: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch data MES internal scratch data is useful for mes debug, it can only located in VRAM, change the allocation type and increase size for mes 11 Signed-off-by: shaoyunl --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 12 +++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 83d0f731fb65..bf584e9bcce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -104,7 +104,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return 0; r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 45e3508f0f8e..79f13d7e5e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -40,6 +40,7 @@ #define AMDGPU_MES_VERSION_MASK0x0fff #define AMDGPU_MES_API_VERSION_MASK0x00fff000 #define AMDGPU_MES_FEAT_VERSION_MASK 0xff00 +#define AMDGPU_MES_MSCRATCH_SIZE 0x8000 enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 03bf865fbdd4..aa2e9ef4ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -913,6 +913,16 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) uint32_t pipe, data = 0; if (enable) { + if (amdgpu_mes_log_enable) { + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO, + lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE)); + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI, + upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE)); + dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n", + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI), + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO)); + } + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, @@ -1375,7 +1385,7 @@ static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; - adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + +AMDGPU_MES_MSCRATCH_SIZE; r = amdgpu_mes_init(adev); if (r) -- 2.34.1
Re: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams
On 23/10/2024 17:36, Mohamed, Zaeem wrote: [AMD Official Use Only - AMD Internal Distribution Only] Hi, A patch addressing this will be sent out soon. Great! Thanks for the heads up! Melissa Thanks, Zaeem -Original Message- From: Melissa Wen Sent: Tuesday, October 22, 2024 11:58 AM To: Mohamed, Zaeem ; Wentland, Harry ; Li, Sun peng (Leo) ; Siqueira, Rodrigo ; Deucher, Alexander ; Koenig, Christian ; Pan, Xinhui ; airl...@gmail.com; dan...@ffwll.ch Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org Subject: Re: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams Hi, Gentle ping to land the fix for the kernel crash. If faster, I can send a new version increasing max surfaces to 4 as previously discussed. There are now two bug reports for the same issue: - https://gitlab.freedesktop.org/drm/amd/-/issues/3693 - https://gitlab.freedesktop.org/drm/amd/-/issues/3594 Best Regards, Melissa On 27/09/2024 15:20, Melissa Wen wrote: Hi Zaeem, Thanks for explaining their relationship. So IIUC, current DM implementation for dc_surface_updates array is wrong, since it's taking MAX_SURFACES (=3) for allocation but MAX_PLANES (=6) as the upper bound of size of the dc_surface_updates array, as you can see in this allocation and after an iteration from 0 to `plane_count`: https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/disp lay/amdgpu_dm/amdgpu_dm.c#L9861 Another question to understand what would be the right fix for the implementation above-mentioned: with the cursor overlay mode, it means we are using one of the overlay planes for this cursor overlay mode (one from the "max_slave_planes") or this cursor plane is an extra plane, which means, for some drivers, one primary plane + two overlay planes + one plane for cursor overlay mode (max 4 planes) ? BR, Melissa On 27/09/2024 14:40, Mohamed, Zaeem wrote: [AMD Official Use Only - AMD Internal Distribution Only] Hi Melissa, MAX_SURFACE_NUM and MAX_PLANES both represent the upper bound of planes that are supported by HW. It is best to replace MAX_SURFACE_NUM with MAX_PLANES to remove redundancy. MAX_SURFACES is used to represent the upper bound of surfaces that can be piped to a single CRTC. Keep MAX_SURFACES as is to keep stack size down, and replace MAX_SURFACE_NUM with MAX_PLANES. Thanks, Zaeem -Original Message- From: dri-devel On Behalf Of Melissa Wen Sent: Wednesday, September 25, 2024 11:37 AM To: harry.wentl...@amd.com; sunpeng...@amd.com; rodrigo.sique...@amd.com; alexander.deuc...@amd.com; christian.koe...@amd.com; xinhui@amd.com; airl...@gmail.com; dan...@ffwll.ch Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org Subject: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams 091a97e542cf ("drm/amd/display: Fix warning. Set MAX_SURFACES to 3") reduced the max number of surfaces since, at that time, there was no use for more. However, HW and driver evolves and there are now driver versions that allow two overlay planes (max_slave_planes). Moreover, commit 3cfd03b79425 ("drm/amd/display: update max streams per surface") states 6 is the max surfaces supported asics can have. Therefore, update MAX_SURFACES to match MAX_SURFACE_NUM, MAX_PLANES and MAX_STREAMS. It also addresses array-index-out-of-bounds reported in the link. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3594 Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3992ad73165b..08b00b263533 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -57,7 +57,7 @@ struct dmub_notification; #define DC_VER "3.2.301" -#define MAX_SURFACES 3 +#define MAX_SURFACES 6 #define MAX_PLANES 6 #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 -- 2.45.2
Re: [PATCH] drm/amd/pm: correct the workload setting
On Tue, Oct 22, 2024 at 11:23 PM Kenneth Feng wrote: > > Correct the workload setting in order not to mix the setting > with the end user. Update the workload mask accordingly. Might be better to actually treat the workload like a mask rather than as a discrete setting since that mirrors how the firmware actually works. The default value set at init time or whatever the user selects via sysfs should be what is reflected in sysfs, however, when KFD selects COMPUTE or the VCN code selects VIDEO, rather than clearing all of the other bits and just setting the selected profile, we should just add or remove the additional bits to the bit mask and store the whole bit mask in the driver. E.g., smu->workload_mask would be the actual mask and smu->power_profile_mode would be the value reflected in sysfs. Alex > > Signed-off-by: Kenneth Feng > --- > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 ++-- > drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 39 +++-- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 33 --- > .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 42 --- > 5 files changed, 115 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index accc96a03bd9..f1984736ff4a 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -1252,7 +1252,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) > atomic64_set(&smu->throttle_int_counter, 0); > smu->watermarks_bitmap = 0; > smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > - smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > + smu->user_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > + smu->driver_workload_mask = 0; > > atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); > atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); > @@ -1267,10 +1268,12 @@ static int smu_sw_init(struct amdgpu_ip_block > *ip_block) > smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; > smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; > > - if (smu->is_apu) > + if (smu->is_apu) { > smu->workload_mask = 1 << > smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; > - else > + } else { > smu->workload_mask = 1 << > smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; > + smu->user_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; > + } > > smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; > smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; > @@ -2346,11 +2349,13 @@ static int smu_switch_power_profile(void *handle, > > if (!en) { > smu->workload_mask &= ~(1 << smu->workload_prority[type]); > + smu->driver_workload_mask &= ~(1 << > smu->workload_prority[type]); > index = fls(smu->workload_mask); > index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 > : 0; > workload[0] = smu->workload_setting[index]; > } else { > smu->workload_mask |= (1 << smu->workload_prority[type]); > + smu->driver_workload_mask |= (1 << > smu->workload_prority[type]); > index = fls(smu->workload_mask); > index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; > workload[0] = smu->workload_setting[index]; > @@ -3045,12 +3050,16 @@ static int smu_set_power_profile_mode(void *handle, > uint32_t param_size) > { > struct smu_context *smu = handle; > + int ret; > > if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || > !smu->ppt_funcs->set_power_profile_mode) > return -EOPNOTSUPP; > + smu->user_profile_mode_setting = true; > + ret = smu_bump_power_profile_mode(smu, param, param_size); > + smu->user_profile_mode_setting = false; > > - return smu_bump_power_profile_mode(smu, param, param_size); > + return ret; > } > > static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > index 8bb32b3f0d9c..e43b23dd3457 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > @@ -557,10 +557,11 @@ struct smu_context { > bool disable_uclk_switch; > > uint32_t workload_mask; > + uint32_t driver_workload_mask; > uint32_t workload_prority[WORKLOAD_POLICY_MAX]; > uint32_t workload_setting[WORKLOAD_POLICY_MAX]; > uint32_t power_profile_mode; > - uint32_t default_power_profile_mode; > + uint32_
[PATCH] drm/amd/amdgpu: limit single process inside MES
This is for MES to limit only one process for the user queues Signed-off-by: Shaoyun Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 15 +++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 11 +++ 5 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e96984c53e72..72e38d621a29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, if (adev->enforce_isolation[i] && !partition_values[i]) { /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + amdgpu_mes_set_enforce_isolation(adev, i, false); } else if (!adev->enforce_isolation[i] && partition_values[i]) { /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + amdgpu_mes_set_enforce_isolation(adev, i, true); } adev->enforce_isolation[i] = partition_values[i]; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index bf584e9bcce4..dfc7d320fcbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) return is_supported; } +/* Fix me -- node_id is used to identify the correct MES instances in the future */ +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) +{ + struct mes_misc_op_input op_input = {0}; + int r; + + op_input.op = MES_MISC_OP_CHANGE_CONFIG; + op_input.change_config.option.limit_single_process = enable ? 1 : 0; + + if (!adev->mes.funcs->misc_op) { + dev_err(adev->dev,"mes change config is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + dev_err(adev->dev, "failed to change_config.\n"); + +error: + return r; + +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 79f13d7e5e16..91bff6443c05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -311,6 +311,7 @@ enum mes_misc_opcode { MES_MISC_OP_WRM_REG_WAIT, MES_MISC_OP_WRM_REG_WR_WAIT, MES_MISC_OP_SET_SHADER_DEBUGGER, + MES_MISC_OP_CHANGE_CONFIG, }; struct mes_misc_op_input { @@ -349,6 +350,21 @@ struct mes_misc_op_input { uint32_t tcp_watch_cntl[4]; uint32_t trap_en; } set_shader_debugger; + + struct { + union { + struct { + uint32_t limit_single_process : 1; + uint32_t enable_hws_logging_buffer : 1; + uint32_t reserved : 30; + }; + uint32_t all; + } option; + struct { + uint32_t tdr_level; + uint32_t tdr_delay; + } tdr_config; + } change_config; }; }; @@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) } bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); + +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); + #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 57db0c006c8f..c621ba805433 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -644,6 +644,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; break; + case MES_MISC_OP_CHANGE_CONFIG: + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { + dev_err(adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n"); +
RE: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams
[AMD Official Use Only - AMD Internal Distribution Only] Hi, A patch addressing this will be sent out soon. Thanks, Zaeem -Original Message- From: Melissa Wen Sent: Tuesday, October 22, 2024 11:58 AM To: Mohamed, Zaeem ; Wentland, Harry ; Li, Sun peng (Leo) ; Siqueira, Rodrigo ; Deucher, Alexander ; Koenig, Christian ; Pan, Xinhui ; airl...@gmail.com; dan...@ffwll.ch Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org Subject: Re: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams Hi, Gentle ping to land the fix for the kernel crash. If faster, I can send a new version increasing max surfaces to 4 as previously discussed. There are now two bug reports for the same issue: - https://gitlab.freedesktop.org/drm/amd/-/issues/3693 - https://gitlab.freedesktop.org/drm/amd/-/issues/3594 Best Regards, Melissa On 27/09/2024 15:20, Melissa Wen wrote: > Hi Zaeem, > > Thanks for explaining their relationship. > > So IIUC, current DM implementation for dc_surface_updates array is > wrong, since it's taking MAX_SURFACES (=3) for allocation but > MAX_PLANES (=6) as the upper bound of size of the dc_surface_updates > array, as you can see in this allocation and after an iteration from 0 > to `plane_count`: > > https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/disp > lay/amdgpu_dm/amdgpu_dm.c#L9861 > > > Another question to understand what would be the right fix for the > implementation above-mentioned: with the cursor overlay mode, it means > we are using one of the overlay planes for this cursor overlay mode > (one from the "max_slave_planes") or this cursor plane is an extra > plane, which means, for some drivers, one primary plane + two overlay > planes + one plane for cursor overlay mode (max 4 planes) ? > > BR, > > Melissa > > On 27/09/2024 14:40, Mohamed, Zaeem wrote: >> [AMD Official Use Only - AMD Internal Distribution Only] >> >> Hi Melissa, >> >> MAX_SURFACE_NUM and MAX_PLANES both represent the upper bound of >> planes that are supported by HW. It is best to replace >> MAX_SURFACE_NUM with MAX_PLANES to remove redundancy. MAX_SURFACES is >> used to represent the upper bound of surfaces that can be piped to a >> single CRTC. Keep MAX_SURFACES as is to keep stack size down, and >> replace MAX_SURFACE_NUM with MAX_PLANES. >> >> Thanks, >> Zaeem >> >> >> -Original Message- >> From: dri-devel On Behalf >> Of Melissa Wen >> Sent: Wednesday, September 25, 2024 11:37 AM >> To: harry.wentl...@amd.com; sunpeng...@amd.com; >> rodrigo.sique...@amd.com; alexander.deuc...@amd.com; >> christian.koe...@amd.com; xinhui@amd.com; airl...@gmail.com; >> dan...@ffwll.ch >> Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org >> Subject: [PATCH 1/2] drm/amd/display: increase max surfaces in line >> with planes and streams >> >> 091a97e542cf ("drm/amd/display: Fix warning. Set MAX_SURFACES to 3") >> reduced the max number of surfaces since, at that time, there was no >> use for more. However, HW and driver evolves and there are now driver >> versions that allow two overlay planes (max_slave_planes). Moreover, >> commit 3cfd03b79425 ("drm/amd/display: update max streams per >> surface") states 6 is the max surfaces supported asics can have. >> Therefore, update MAX_SURFACES to match MAX_SURFACE_NUM, MAX_PLANES >> and MAX_STREAMS. >> >> It also addresses array-index-out-of-bounds reported in the link. >> >> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3594 >> Signed-off-by: Melissa Wen >> --- >> drivers/gpu/drm/amd/display/dc/dc.h | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h >> b/drivers/gpu/drm/amd/display/dc/dc.h >> index 3992ad73165b..08b00b263533 100644 >> --- a/drivers/gpu/drm/amd/display/dc/dc.h >> +++ b/drivers/gpu/drm/amd/display/dc/dc.h >> @@ -57,7 +57,7 @@ struct dmub_notification; >> >> #define DC_VER "3.2.301" >> >> -#define MAX_SURFACES 3 >> +#define MAX_SURFACES 6 >> #define MAX_PLANES 6 >> #define MAX_STREAMS 6 >> #define MIN_VIEWPORT_SIZE 12 >> -- >> 2.45.2 >> >
Re: [PATCH] drm/amd/amdgpu: limit single process inside MES
On Wed, Oct 23, 2024 at 2:08 PM Shaoyun Liu wrote: > > This is for MES to limit only one process for the user queues > > Signed-off-by: Shaoyun Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++ > drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 11 +++ > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 11 +++ > 5 files changed, 67 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index e96984c53e72..72e38d621a29 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct > device *dev, > if (adev->enforce_isolation[i] && !partition_values[i]) { > /* Going from enabled to disabled */ > amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); > + amdgpu_mes_set_enforce_isolation(adev, i, false); > } else if (!adev->enforce_isolation[i] && > partition_values[i]) { > /* Going from disabled to enabled */ > amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); > + amdgpu_mes_set_enforce_isolation(adev, i, true); > } > adev->enforce_isolation[i] = partition_values[i]; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > index bf584e9bcce4..29b6a2baae4d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > @@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct > amdgpu_device *adev) > return is_supported; > } > > +/* Fix me -- node_id is used to identify the correct MES instances in the > future */ > +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t > node_id, bool enable) > +{ > + struct mes_misc_op_input op_input = {0}; > + int r; > + > + op_input.op = MES_MISC_OP_CHANGE_CONFIG; > + op_input.change_config.option.limit_single_process = enable ? 1 : 0; > + > + if (!adev->mes.funcs->misc_op) { > + DRM_ERROR("mes change config is not supported!\n"); Please use dev_err() so it's clear which GPU the error is coming from in a multi-GPU system. > + r = -EINVAL; > + goto error; > + } > + > + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); > + if (r) > + DRM_ERROR("failed to change_config.\n"); dev_err() > + > +error: > + return r; > + > +} > + > #if defined(CONFIG_DEBUG_FS) > > static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void > *unused) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > index 79f13d7e5e16..91bff6443c05 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > @@ -311,6 +311,7 @@ enum mes_misc_opcode { > MES_MISC_OP_WRM_REG_WAIT, > MES_MISC_OP_WRM_REG_WR_WAIT, > MES_MISC_OP_SET_SHADER_DEBUGGER, > + MES_MISC_OP_CHANGE_CONFIG, > }; > > struct mes_misc_op_input { > @@ -349,6 +350,21 @@ struct mes_misc_op_input { > uint32_t tcp_watch_cntl[4]; > uint32_t trap_en; > } set_shader_debugger; > + > + struct { > + union { > + struct { > + uint32_t limit_single_process : 1; > + uint32_t enable_hws_logging_buffer : > 1; > + uint32_t reserved : 30; > + }; > + uint32_t all; > + } option; > + struct { > + uint32_t tdr_level; > + uint32_t tdr_delay; > + } tdr_config; > + } change_config; > }; > }; > > @@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes > *mes) > } > > bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); > + > +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t > node_id, bool enable); > + > #endif /* __AMDGPU_MES_H__ */ > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > index 57db0c006c8f..1d6de7bced48 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > @@ -644,6 +644,14 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, > > sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); >
[PATCH 2/3] drm/amdgpu: Adjust debugfs eviction and IB access permissions
Users should not be able to run these. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index e44a44405266..2ef7bcfdb2e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2108,11 +2108,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); - debugfs_create_file("amdgpu_evict_vram", 0444, root, adev, + debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); - debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev, + debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev, &amdgpu_evict_gtt_fops); - debugfs_create_file("amdgpu_test_ib", 0444, root, adev, + debugfs_create_file("amdgpu_test_ib", 0400, root, adev, &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); -- 2.46.2
[PATCH 3/3] drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read()
Avoid a possible buffer overflow if size is larger than 4K. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 2ef7bcfdb2e2..3a118645b4bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz int r; uint32_t *data, x; - if (size & 0x3 || *pos & 0x3) + if (size > 4096 || size & 0x3 || *pos & 0x3) return -EINVAL; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); -- 2.46.2
[PATCH 1/3] drm/amdgpu: Adjust debugfs register access permissions
Regular users shouldn't have read access. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6e6092916d4e..e44a44405266 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1654,7 +1654,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | 0444, root, + S_IFREG | 0400, root, adev, debugfs_regs[i]); if (!i && !IS_ERR_OR_NULL(ent)) i_size_write(ent->d_inode, adev->rmmio_size); -- 2.46.2
RE: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
[Public] > From: Lazar, Lijo > Sent: Wednesday, October 23, 2024 6:55 PM > To: Liang, Prike ; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander > Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete > > > > On 10/14/2024 1:19 PM, Prike Liang wrote: > > To check the status of S3 suspend completion, use the PM core > > pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, > > clean up the AMDGPU driver's private flag suspend_complete. > > > > Signed-off-by: Prike Liang > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- > > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- > > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++- > > drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- > > 5 files changed, 5 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index 48c9b9b06905..9b35763ae0a7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -,8 +,6 @@ struct amdgpu_device { > > boolin_s3; > > boolin_s4; > > boolin_s0ix; > > - /* indicate amdgpu suspension status */ > > - boolsuspend_complete; > > > > enum pp_mp1_state mp1_state; > > struct amdgpu_doorbell_index doorbell_index; diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > index 680e44fdee6e..78972151b970 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev) > > struct drm_device *drm_dev = dev_get_drvdata(dev); > > struct amdgpu_device *adev = drm_to_adev(drm_dev); > > > > - adev->suspend_complete = false; > > if (amdgpu_acpi_is_s0ix_active(adev)) > > adev->in_s0ix = true; > > else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@ > > static int amdgpu_pmops_suspend_noirq(struct device *dev) > > struct drm_device *drm_dev = dev_get_drvdata(dev); > > struct amdgpu_device *adev = drm_to_adev(drm_dev); > > > > - adev->suspend_complete = true; > > if (amdgpu_acpi_should_gpu_reset(adev)) > > return amdgpu_asic_reset(adev); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > index be320d753507..ba8e66744376 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device > *adev) > > * confirmed that the APU gfx10/gfx11 needn't such update. > > */ > > if (adev->flags & AMD_IS_APU && > > - adev->in_s3 && !adev->suspend_complete) { > > - DRM_INFO(" Will skip the CSB packet resubmit\n"); > > + adev->in_s3 && !pm_resume_via_firmware()) { > > + DRM_INFO("Will skip the CSB packet resubmit\n"); > > return 0; > > } > > r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); > > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c > > b/drivers/gpu/drm/amd/amdgpu/soc15.c > > index 12ff6cf568dc..d9d11131a744 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > > @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct > amdgpu_device *adev) > > *performing pm core test. > > */ > > if (adev->flags & AMD_IS_APU && adev->in_s3 && > > - !pm_resume_via_firmware()) { > > - adev->suspend_complete = false; > > + !pm_resume_via_firmware()) > > return true; > > - } else { > > - adev->suspend_complete = true; > > + else > > return false; > > - } > > } > > > > static int soc15_asic_reset(struct amdgpu_device *adev) diff --git > > a/drivers/gpu/drm/amd/amdgpu/soc21.c > > b/drivers/gpu/drm/amd/amdgpu/soc21.c > > index c4b950e75133..7a47a21ef00f 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/soc21.c > > +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c > > @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct > amdgpu_device *adev) > > * 2) S3 suspend got aborted and TOS is active. > > */ > > if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && > > - !adev->suspend_complete) { > > + !pm_resume_via_firmware()) { > > Looks like this will cover only ACPI based systems. Not sure if that > assumption is > valid for dGPU cases. > > Thanks, > Lijo Yes, the pm_set_resume_via_firmware() function is only called during the ACPI_STATE_S3 suspend process. However, ACPI-enabled systems are popular in the desktop world. If there are concerns about ACPI configuration, one option could be to check if the dGPU needs a re
Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
On 10/24/2024 8:24 AM, Liang, Prike wrote: > [Public] > >> From: Lazar, Lijo >> Sent: Wednesday, October 23, 2024 6:55 PM >> To: Liang, Prike ; amd-gfx@lists.freedesktop.org >> Cc: Deucher, Alexander >> Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete >> >> >> >> On 10/14/2024 1:19 PM, Prike Liang wrote: >>> To check the status of S3 suspend completion, use the PM core >>> pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, >>> clean up the AMDGPU driver's private flag suspend_complete. >>> >>> Signed-off-by: Prike Liang >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- >>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- >>> drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++- >>> drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- >>> 5 files changed, 5 insertions(+), 12 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> index 48c9b9b06905..9b35763ae0a7 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> @@ -,8 +,6 @@ struct amdgpu_device { >>> boolin_s3; >>> boolin_s4; >>> boolin_s0ix; >>> - /* indicate amdgpu suspension status */ >>> - boolsuspend_complete; >>> >>> enum pp_mp1_state mp1_state; >>> struct amdgpu_doorbell_index doorbell_index; diff --git >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> index 680e44fdee6e..78972151b970 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev) >>> struct drm_device *drm_dev = dev_get_drvdata(dev); >>> struct amdgpu_device *adev = drm_to_adev(drm_dev); >>> >>> - adev->suspend_complete = false; >>> if (amdgpu_acpi_is_s0ix_active(adev)) >>> adev->in_s0ix = true; >>> else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@ >>> static int amdgpu_pmops_suspend_noirq(struct device *dev) >>> struct drm_device *drm_dev = dev_get_drvdata(dev); >>> struct amdgpu_device *adev = drm_to_adev(drm_dev); >>> >>> - adev->suspend_complete = true; >>> if (amdgpu_acpi_should_gpu_reset(adev)) >>> return amdgpu_asic_reset(adev); >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> index be320d753507..ba8e66744376 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device >> *adev) >>> * confirmed that the APU gfx10/gfx11 needn't such update. >>> */ >>> if (adev->flags & AMD_IS_APU && >>> - adev->in_s3 && !adev->suspend_complete) { >>> - DRM_INFO(" Will skip the CSB packet resubmit\n"); >>> + adev->in_s3 && !pm_resume_via_firmware()) { >>> + DRM_INFO("Will skip the CSB packet resubmit\n"); >>> return 0; >>> } >>> r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c >>> b/drivers/gpu/drm/amd/amdgpu/soc15.c >>> index 12ff6cf568dc..d9d11131a744 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c >>> @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct >> amdgpu_device *adev) >>> *performing pm core test. >>> */ >>> if (adev->flags & AMD_IS_APU && adev->in_s3 && >>> - !pm_resume_via_firmware()) { >>> - adev->suspend_complete = false; >>> + !pm_resume_via_firmware()) >>> return true; >>> - } else { >>> - adev->suspend_complete = true; >>> + else >>> return false; >>> - } >>> } >>> >>> static int soc15_asic_reset(struct amdgpu_device *adev) diff --git >>> a/drivers/gpu/drm/amd/amdgpu/soc21.c >>> b/drivers/gpu/drm/amd/amdgpu/soc21.c >>> index c4b950e75133..7a47a21ef00f 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/soc21.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c >>> @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct >> amdgpu_device *adev) >>> * 2) S3 suspend got aborted and TOS is active. >>> */ >>> if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && >>> - !adev->suspend_complete) { >>> + !pm_resume_via_firmware()) { >> >> Looks like this will cover only ACPI based systems. Not sure if that >> assumption is >> valid for dGPU cases. >> >> Thanks, >> Lijo > > Yes, the pm_set_resume_via_firmware() function is only called during the > ACPI_STATE_S3 suspend process. However, ACPI-enabled systems are popular in > the desktop world. If there are concerns a
[PATCH] drm/amdgpu: skip pci_restore_state under sriov during device init
during device init, under sriov, pci_restore_state happens after fullaccess released, and it can have race condition with mmio protection enable from host side. Since msix was toggled during pci_restore_state, if mmio protection happens during this time, guest side msix will not be properly programmed and leading to missing interrupts. So skip pci_restore_state during device init. Signed-off-by: Victor Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6c0ff1c2ae4c..52803cd91ef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4524,7 +4524,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, dev_err(adev->dev, "amdgpu_pmu_init failed\n"); /* Have stored pci confspace at hand for restore in sudden PCI error */ - if (amdgpu_device_cache_pci_state(adev->pdev)) + if (!amdgpu_sriov_vf(adev) && amdgpu_device_cache_pci_state(adev->pdev)) pci_restore_state(pdev); /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ -- 2.34.1
RE: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
[AMD Official Use Only - AMD Internal Distribution Only] > From: Lazar, Lijo > Sent: Thursday, October 24, 2024 11:39 AM > To: Liang, Prike ; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander > Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete > > > > On 10/24/2024 8:24 AM, Liang, Prike wrote: > > [Public] > > > >> From: Lazar, Lijo > >> Sent: Wednesday, October 23, 2024 6:55 PM > >> To: Liang, Prike ; amd-gfx@lists.freedesktop.org > >> Cc: Deucher, Alexander > >> Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete > >> > >> > >> > >> On 10/14/2024 1:19 PM, Prike Liang wrote: > >>> To check the status of S3 suspend completion, use the PM core > >>> pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, > >>> clean up the AMDGPU driver's private flag suspend_complete. > >>> > >>> Signed-off-by: Prike Liang > >>> --- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- > >>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- > >>> drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++- > >>> drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- > >>> 5 files changed, 5 insertions(+), 12 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> index 48c9b9b06905..9b35763ae0a7 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> @@ -,8 +,6 @@ struct amdgpu_device { > >>> boolin_s3; > >>> boolin_s4; > >>> boolin_s0ix; > >>> - /* indicate amdgpu suspension status */ > >>> - boolsuspend_complete; > >>> > >>> enum pp_mp1_state mp1_state; > >>> struct amdgpu_doorbell_index doorbell_index; diff --git > >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> index 680e44fdee6e..78972151b970 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device > *dev) > >>> struct drm_device *drm_dev = dev_get_drvdata(dev); > >>> struct amdgpu_device *adev = drm_to_adev(drm_dev); > >>> > >>> - adev->suspend_complete = false; > >>> if (amdgpu_acpi_is_s0ix_active(adev)) > >>> adev->in_s0ix = true; > >>> else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@ > >>> static int amdgpu_pmops_suspend_noirq(struct device *dev) > >>> struct drm_device *drm_dev = dev_get_drvdata(dev); > >>> struct amdgpu_device *adev = drm_to_adev(drm_dev); > >>> > >>> - adev->suspend_complete = true; > >>> if (amdgpu_acpi_should_gpu_reset(adev)) > >>> return amdgpu_asic_reset(adev); > >>> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > >>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > >>> index be320d753507..ba8e66744376 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > >>> @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct > >>> amdgpu_device > >> *adev) > >>> * confirmed that the APU gfx10/gfx11 needn't such update. > >>> */ > >>> if (adev->flags & AMD_IS_APU && > >>> - adev->in_s3 && !adev->suspend_complete) { > >>> - DRM_INFO(" Will skip the CSB packet resubmit\n"); > >>> + adev->in_s3 && !pm_resume_via_firmware()) { > >>> + DRM_INFO("Will skip the CSB packet resubmit\n"); > >>> return 0; > >>> } > >>> r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + > >>> 3); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c > >>> b/drivers/gpu/drm/amd/amdgpu/soc15.c > >>> index 12ff6cf568dc..d9d11131a744 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > >>> @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct > >> amdgpu_device *adev) > >>> *performing pm core test. > >>> */ > >>> if (adev->flags & AMD_IS_APU && adev->in_s3 && > >>> - !pm_resume_via_firmware()) { > >>> - adev->suspend_complete = false; > >>> + !pm_resume_via_firmware()) > >>> return true; > >>> - } else { > >>> - adev->suspend_complete = true; > >>> + else > >>> return false; > >>> - } > >>> } > >>> > >>> static int soc15_asic_reset(struct amdgpu_device *adev) diff --git > >>> a/drivers/gpu/drm/amd/amdgpu/soc21.c > >>> b/drivers/gpu/drm/amd/amdgpu/soc21.c > >>> index c4b950e75133..7a47a21ef00f 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/soc21.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c > >>> @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct > >> amdgpu_device *adev) > >>> * 2) S3 suspend got aborted and TOS is acti