date:20241023

Re: [PATCH 1/5 V2] drm/amdgpu: Add sysfs interface for gc reset mask

2024-10-23 Thread Lazar, Lijo




On 10/23/2024 8:13 AM, jesse.zh...@amd.com wrote:
> Add two sysfs interfaces for gfx and compute:
> gfx_reset_mask
> compute_reset_mask
> 
> These interfaces are read-only and show the resets supported by the IP.
> For example, full adapter reset (mode1/mode2/BACO/etc),
> soft reset, queue reset, and pipe reset.
> 
> V2: the sysfs node returns a text string instead of some flags (Christian)
> 
> Signed-off-by: Jesse Zhang 
> Suggested-by:Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 122 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |   2 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |   6 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  |   5 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  |   5 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |   5 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c |   5 +
>  7 files changed, 150 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index e96984c53e72..10d55755ee88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -1588,6 +1588,94 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct 
> device *dev,
>   return count;
>  }
>  
> +static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = drm_to_adev(ddev);
> + ssize_t size = 0;
> + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
> +
> + if (!adev || !ring)
> + return -ENODEV;
> +
> + if (amdgpu_device_should_recover_gpu(adev))
> + size += sysfs_emit_at(buf, size, "full ");
> +
> + if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery)
> + && !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery)
> + size += sysfs_emit_at(buf, size, "soft ");
> +

If amdgpu_gpu_recovery is disabled, then that check may be made before
creating the sysfs file itself.  It doesn't have to be here.

> + if (amdgpu_gpu_recovery && ring->funcs->reset) {
> +switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
> +case IP_VERSION(9, 2, 2): //reven2
> +case IP_VERSION(9, 3, 0): //renior
> +case IP_VERSION(9, 4, 0): //vega20
> +case IP_VERSION(10, 1, 0): //navi10
> +case IP_VERSION(10, 1, 1): //navi12
> +case IP_VERSION(10, 1, 2): //navi13
> +/* Skip flag setting because some cases
> + * are not supported by current firmware.
> + */
> +break;
> +
> +default:
> + size += sysfs_emit_at(buf, size, "queue ");
> +break;
> + }
> +}

This kind of version check is not good. Instead initialize
supported_reset_types in IP version files. As in the compute example
below, sometimes it requires FW support/other checks also, not just the
existence of callback implementation.

This function may just iterate over the type mask to print the text version.

Thanks,
Lijo

> +
> + size += sysfs_emit_at(buf, size, "\n");
> + return size;
> +}
> +
> +static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = drm_to_adev(ddev);
> + ssize_t size = 0;
> + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
> +
> + if (!adev || !ring)
> + return -ENODEV;
> +
> + if (amdgpu_device_should_recover_gpu(adev))
> + size += sysfs_emit_at(buf, size, "full ");
> +
> + if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery)
> + && !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery)
> + size += sysfs_emit_at(buf, size, "soft ");
> +
> + if (amdgpu_gpu_recovery && ring->funcs->reset) {
> +switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
> +case IP_VERSION(9, 2, 2): //reven2
> +case IP_VERSION(9, 3, 0): //renior
> +case IP_VERSION(9, 4, 0): //vega20
> +case IP_VERSION(10, 1, 0): //navi10
> +case IP_VERSION(10, 1, 1): //navi12
> +case IP_VERSION(10, 1, 2): //navi13
> +/* Skip flag setting because some test cases
> + * are not supported by current firmware.
> + */
> +break;
> +
> +default:
> + size += sysfs_emit_at(buf, size,

RE: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete

2024-10-23 Thread Liang, Prike

[AMD Official Use Only - AMD Internal Distribution Only]

Ping on this series.

Thanks,
Prike

> -Original Message-
> From: Liang, Prike 
> Sent: Monday, October 14, 2024 3:49 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Lazar, Lijo
> ; Liang, Prike 
> Subject: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
>
> To check the status of S3 suspend completion, use the PM core
> pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, clean up 
> the
> AMDGPU driver's private flag suspend_complete.
>
> Signed-off-by: Prike Liang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 --
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
>  drivers/gpu/drm/amd/amdgpu/soc15.c  | 7 ++-
>  drivers/gpu/drm/amd/amdgpu/soc21.c  | 2 +-
>  5 files changed, 5 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 48c9b9b06905..9b35763ae0a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -,8 +,6 @@ struct amdgpu_device {
>   boolin_s3;
>   boolin_s4;
>   boolin_s0ix;
> - /* indicate amdgpu suspension status */
> - boolsuspend_complete;
>
>   enum pp_mp1_state   mp1_state;
>   struct amdgpu_doorbell_index doorbell_index; diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 680e44fdee6e..78972151b970 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev)
>   struct drm_device *drm_dev = dev_get_drvdata(dev);
>   struct amdgpu_device *adev = drm_to_adev(drm_dev);
>
> - adev->suspend_complete = false;
>   if (amdgpu_acpi_is_s0ix_active(adev))
>   adev->in_s0ix = true;
>   else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@ static
> int amdgpu_pmops_suspend_noirq(struct device *dev)
>   struct drm_device *drm_dev = dev_get_drvdata(dev);
>   struct amdgpu_device *adev = drm_to_adev(drm_dev);
>
> - adev->suspend_complete = true;
>   if (amdgpu_acpi_should_gpu_reset(adev))
>   return amdgpu_asic_reset(adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index be320d753507..ba8e66744376 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
> *adev)
>* confirmed that the APU gfx10/gfx11 needn't such update.
>*/
>   if (adev->flags & AMD_IS_APU &&
> - adev->in_s3 && !adev->suspend_complete) {
> - DRM_INFO(" Will skip the CSB packet resubmit\n");
> + adev->in_s3 && !pm_resume_via_firmware()) {
> + DRM_INFO("Will skip the CSB packet resubmit\n");
>   return 0;
>   }
>   r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); diff 
> --git
> a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 12ff6cf568dc..d9d11131a744 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct
> amdgpu_device *adev)
>*performing pm core test.
>*/
>   if (adev->flags & AMD_IS_APU && adev->in_s3 &&
> - !pm_resume_via_firmware()) {
> - adev->suspend_complete = false;
> + !pm_resume_via_firmware())
>   return true;
> - } else {
> - adev->suspend_complete = true;
> + else
>   return false;
> - }
>  }
>
>  static int soc15_asic_reset(struct amdgpu_device *adev) diff --git
> a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
> index c4b950e75133..7a47a21ef00f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc21.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
> @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct
> amdgpu_device *adev)
>* 2) S3 suspend got aborted and TOS is active.
>*/
>   if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
> - !adev->suspend_complete) {
> + !pm_resume_via_firmware()) {
>   sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
>   msleep(100);
>   sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
> --
> 2.34.1

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Christian König


Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin:


On 22/10/2024 17:24, Christian König wrote:

Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy):

[Public]


+static uint32_t fold_memtype(uint32_t memtype) {
In general please add prefixes to even static functions, e.g. 
amdgpu_vm_ or

amdgpu_bo_.

+   /* Squash private placements into 'cpu' to keep the legacy 
userspace view.

*/

+   switch (mem_type) {
+   case TTM_PL_VRAM:
+   case TTM_PL_TT:
+   return memtype
+   default:
+   return TTM_PL_SYSTEM;
+   }
+}
+
+static uint32_t bo_get_memtype(struct amdgpu_bo *bo) {

That whole function belongs into amdgpu_bo.c
Do you mean bo_get_memtype or fold_memtype? I debated whether 
bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since 
it's using fold_memtype and only useful for memory stats because of 
folding the private placements I just left them here together with 
the other mem stats code.


I can move it to amdgpu_bo.c make it return the memtype verbatim and 
just fold it when I do the accounting.


I think that folding GDS, GWS and OA into system is also a bug. We 
should really not doing that.


Just wanted to point out for this round that the code to query the 
current placement from a BO should probably go into amdgpu_bo.c and 
not amdgpu_vm.c





+   struct ttm_resource *res = bo->tbo.resource;
+   const uint32_t domain_to_pl[] = {
+   [ilog2(AMDGPU_GEM_DOMAIN_CPU)]  = TTM_PL_SYSTEM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GTT)]  = TTM_PL_TT,
+   [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GDS)]  = AMDGPU_PL_GDS,
+   [ilog2(AMDGPU_GEM_DOMAIN_GWS)]  = AMDGPU_PL_GWS,
+   [ilog2(AMDGPU_GEM_DOMAIN_OA)]   = AMDGPU_PL_OA,
+   [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] =

AMDGPU_PL_DOORBELL,

+   };
+   uint32_t domain;
+
+   if (res)
+   return fold_memtype(res->mem_type);
+
+   /*
+    * If no backing store use one of the preferred domain for basic
+    * stats. We take the MSB since that should give a reasonable
+    * view.
+    */
+   BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM <

TTM_PL_SYSTEM);

+   domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
+   if (drm_WARN_ON_ONCE(&adev->ddev,
+    domain == 0 || --domain >= 
ARRAY_SIZE(domain_to_pl)))
It's perfectly legal to create a BO without a placement. That one 
just won't have a

backing store.

This is lifted from the previous change I'm rebasing onto. I think 
what it’s trying to do is if the BO doesn't have a placement, use 
the "biggest" (VRAM > TT > SYSTEM) preferred placement for the 
purpose of accounting. Previously we just ignore BOs that doesn't 
have a placement. I guess there's argument for going with either 
approaches.


I was not arguing, I'm simply pointing out a bug. It's perfectly 
valid for bo->preferred_domains to be 0.


So the following WARN_ON() that no bit is set is incorrect.




+   return 0;
+   return fold_memtype(domain_to_pl[domain])
That would need specular execution mitigation if I'm not completely 
mistaken.


Better use a switch/case statement.


Do you mean change the array indexing to a switch statement?


Yes.


Did you mean array_index_nospec?


Yes.

Domain is not a direct userspace input and is calculated from the mask 
which sanitized to allowed values prior to this call. So I *think* 
switch is an overkill but don't mind it either. Just commenting FWIW.


I missed that the mask is applied.

Thinking more about it I'm not sure if we should do this conversion in 
the first place. IIRC Tvrtko you once suggested a patch which switched a 
bunch of code to use the TTM placement instead of the UAPI flags.


Going more into this direction I think when we want to look at the 
current placement we should probably also use the TTM PL enumeration 
directly.


Regards,
Christian.



Regards,

Tvrtko

Re: [PATCH] drm/amdgpu: handle default profile on on devices without fullscreen 3D

2024-10-23 Thread Michel Dänzer

On 2024-10-22 15:50, Alex Deucher wrote:
> Some devices do not support fullscreen 3D.
> 
> v2: Make the check generic.
> 
> Fixes: 336568de918e ("drm/amdgpu/swsmu: default to fullscreen 3D profile for 
> dGPUs")
> Signed-off-by: Alex Deucher 
> Cc: Kenneth Feng 
> Cc: Lijo Lazar 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index accc96a03bd9..8d4aee4e2287 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1236,6 +1236,14 @@ static void smu_init_xgmi_plpd_mode(struct smu_context 
> *smu)
>   }
>  }
>  
> +static bool smu_is_workload_profile_available(struct smu_context *smu,
> +   u32 profile)
> +{
> + if (profile >= PP_SMC_POWER_PROFILE_COUNT)
> + return false;
> + return smu->workload_map && smu->workload_map[profile].valid_mapping;
> +}
> +
>  static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>  {
>   struct amdgpu_device *adev = ip_block->adev;
> @@ -1267,7 +1275,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>   smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
>   smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
>  
> - if (smu->is_apu)
> + if (smu->is_apu ||
> + !smu_is_workload_profile_available(smu, 
> PP_SMC_POWER_PROFILE_FULLSCREEN3D))
>   smu->workload_mask = 1 << 
> smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
>   else
>   smu->workload_mask = 1 << 
> smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];

FWIW, PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT isn't really suitable for compositing 
with an APU either (certainly not while the machine is connected to AC, though 
I'm getting good battery life even forcing the compute profile).


-- 
Earthling Michel Dänzer   \GNOME / Xwayland / Mesa developer
https://redhat.com \   Libre software enthusiast

[PATCH v3] drm/amdgpu: Add dcn30 drm_panic support

2024-10-23 Thread Jocelyn Falempe

Add support for the drm_panic module, which displays a pretty user
friendly message on the screen when a Linux kernel panic occurs.

It should work on all radeon using amdgpu_dm_plane.c, when the
framebuffer is linear (like when in a VT). For tiled framebuffer, it
will only work on radeon with dcn3x. It should be easy to add support
for dcn20, but I can't test it.
I've tested it on a Radeon W6400 pro, Radeon 7900XTX and
Radeon RX 5700.
Also it doesn't work yet on laptop's panel, maybe due to PSR.

Signed-off-by: Jocelyn Falempe 
---

v2:
 - Disable tiling, and force page flip in the panic_flush() callback.
 - Enable tiling support for dcn31.
 - Force immediate page flip.

v3:
 * Add hubp_clear_tiling hook to dcn32 and dcn35

 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 136 +-
 .../amd/display/dc/hubp/dcn30/dcn30_hubp.c|  17 +++
 .../amd/display/dc/hubp/dcn30/dcn30_hubp.h|   2 +
 .../amd/display/dc/hubp/dcn31/dcn31_hubp.c|   1 +
 .../amd/display/dc/hubp/dcn32/dcn32_hubp.c|   3 +-
 .../amd/display/dc/hubp/dcn35/dcn35_hubp.c|   1 +
 drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h  |   1 +
 7 files changed, 159 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 495e3cd70426d..60606b36f07bd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -26,7 +26,9 @@
 
 #include 
 #include 
+#include "drm/drm_framebuffer.h"
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -36,6 +38,7 @@
 #include "amdgpu_display.h"
 #include "amdgpu_dm_trace.h"
 #include "amdgpu_dm_plane.h"
+#include "bif/bif_4_1_d.h"
 #include "gc/gc_11_0_0_offset.h"
 #include "gc/gc_11_0_0_sh_mask.h"
 
@@ -1421,6 +1424,124 @@ static void amdgpu_dm_plane_atomic_async_update(struct 
drm_plane *plane,
amdgpu_dm_plane_handle_cursor_update(plane, old_state);
 }
 
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in 
amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic 
spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_dm_set_pixel(struct drm_scanout_buffer *sb,
+   unsigned int x,
+   unsigned int y,
+   u32 color)
+{
+   struct amdgpu_res_cursor cursor;
+   unsigned long offset;
+   struct amdgpu_bo *abo = panic_abo;
+   struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+   uint32_t tmp;
+
+   offset = x * 4 + y * sb->pitch[0];
+   amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+   tmp = cursor.start >> 31;
+   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x8000);
+   if (tmp != 0x)
+   WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+   WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+static int amdgpu_dm_plane_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb)
+{
+   struct amdgpu_bo *abo;
+   struct drm_framebuffer *fb = plane->state->fb;
+
+   if (!fb)
+   return -EINVAL;
+
+   DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, 
&fb->format->format);
+
+   abo = gem_to_amdgpu_bo(fb->obj[0]);
+   if (!abo)
+   return -EINVAL;
+
+   sb->width = fb->width;
+   sb->height = fb->height;
+   /* Use the generic linear format, because tiling will be disabled in 
panic_flush() */
+   sb->format = drm_format_info(fb->format->format);
+   if (!sb->format)
+   return -EINVAL;
+
+   sb->pitch[0] = fb->pitches[0];
+
+   if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+   if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+   drm_warn(plane->dev, "amdgpu panic, framebuffer not in 
VRAM\n");
+   return -EINVAL;
+   }
+   /* Only handle 32bits format, to simplify mmio access */
+   if (fb->format->cpp[0] != 4) {
+   drm_warn(plane->dev, "amdgpu panic, pixel format is not 
32bits\n");
+   return -EINVAL;
+   }
+   sb->set_pixel = amdgpu_dm_set_pixel;
+   panic_abo = abo;
+   return 0;
+   }
+   if (!abo->kmap.virtual &&
+   ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+   drm_warn(plane->dev, "amdgpu bo map failed, panic won't be 
displayed\n");
+   return -ENOMEM;
+   }
+   if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+   iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+   else
+   iosys_

Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete

2024-10-23 Thread Lazar, Lijo




On 10/14/2024 1:19 PM, Prike Liang wrote:
> To check the status of S3 suspend completion,
> use the PM core pm_suspend_global_flags bit(1)
> to detect S3 abort events. Therefore, clean up
> the AMDGPU driver's private flag suspend_complete.
> 
> Signed-off-by: Prike Liang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 --
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
>  drivers/gpu/drm/amd/amdgpu/soc15.c  | 7 ++-
>  drivers/gpu/drm/amd/amdgpu/soc21.c  | 2 +-
>  5 files changed, 5 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 48c9b9b06905..9b35763ae0a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -,8 +,6 @@ struct amdgpu_device {
>   boolin_s3;
>   boolin_s4;
>   boolin_s0ix;
> - /* indicate amdgpu suspension status */
> - boolsuspend_complete;
>  
>   enum pp_mp1_state   mp1_state;
>   struct amdgpu_doorbell_index doorbell_index;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 680e44fdee6e..78972151b970 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev)
>   struct drm_device *drm_dev = dev_get_drvdata(dev);
>   struct amdgpu_device *adev = drm_to_adev(drm_dev);
>  
> - adev->suspend_complete = false;
>   if (amdgpu_acpi_is_s0ix_active(adev))
>   adev->in_s0ix = true;
>   else if (amdgpu_acpi_is_s3_active(adev))
> @@ -2516,7 +2515,6 @@ static int amdgpu_pmops_suspend_noirq(struct device 
> *dev)
>   struct drm_device *drm_dev = dev_get_drvdata(dev);
>   struct amdgpu_device *adev = drm_to_adev(drm_dev);
>  
> - adev->suspend_complete = true;
>   if (amdgpu_acpi_should_gpu_reset(adev))
>   return amdgpu_asic_reset(adev);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index be320d753507..ba8e66744376 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device 
> *adev)
>* confirmed that the APU gfx10/gfx11 needn't such update.
>*/
>   if (adev->flags & AMD_IS_APU &&
> - adev->in_s3 && !adev->suspend_complete) {
> - DRM_INFO(" Will skip the CSB packet resubmit\n");
> + adev->in_s3 && !pm_resume_via_firmware()) {
> + DRM_INFO("Will skip the CSB packet resubmit\n");
>   return 0;
>   }
>   r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 12ff6cf568dc..d9d11131a744 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct 
> amdgpu_device *adev)
>*performing pm core test.
>*/
>   if (adev->flags & AMD_IS_APU && adev->in_s3 &&
> - !pm_resume_via_firmware()) {
> - adev->suspend_complete = false;
> + !pm_resume_via_firmware())
>   return true;
> - } else {
> - adev->suspend_complete = true;
> + else
>   return false;
> - }
>  }
>  
>  static int soc15_asic_reset(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
> b/drivers/gpu/drm/amd/amdgpu/soc21.c
> index c4b950e75133..7a47a21ef00f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc21.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
> @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct 
> amdgpu_device *adev)
>* 2) S3 suspend got aborted and TOS is active.
>*/
>   if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
> - !adev->suspend_complete) {
> + !pm_resume_via_firmware()) {

Looks like this will cover only ACPI based systems. Not sure if that
assumption is valid for dGPU cases.

Thanks,
Lijo

>   sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
>   msleep(100);
>   sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);

Re: [PATCH 2/5 V2] drm/amdgpu: Add sysfs interface for sdma reset mask

2024-10-23 Thread Christian König





Am 23.10.24 um 04:43 schrieb jesse.zh...@amd.com:

Add the sysfs interface for sdma:
sdma_reset_mask

The interface is read-only and show the resets supported by the IP.
For example, full adapter reset (mode1/mode2/BACO/etc),
soft reset, queue reset, and pipe reset.

V2: the sysfs node returns a text string instead of some flags (Christian)

Signed-off-by: Jesse Zhang 
Suggested-by:Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 48 
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  2 +
  drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   |  5 +++
  drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   |  5 +++
  10 files changed, 90 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 183a976ba29d..f20b7285f5fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -343,3 +343,51 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
  
  	return 0;

  }
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+   struct device_attribute *attr,
+   char *buf)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   ssize_t size = 0;
+   struct amdgpu_ring *ring = &adev->sdma.instance[0].ring;
+
+   if (!adev || !ring)
+   return -ENODEV;
+



+   if (amdgpu_device_should_recover_gpu(adev))
+   size += sysfs_emit_at(buf, size, "full ");
+
+   if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery)
+   && !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery)


A total nit pick, but the indentation here is wrong and I think 
checkpatch.pl might complain about that.


In general the Linux kernel coding styles says that for "if"s you should 
indent with tabs and then spaces so that in this case the && is under 
the amdgpu_gpu_recovery of the previous line.



+   size += sysfs_emit_at(buf, size, "soft ");
+
+   if (amdgpu_gpu_recovery && ring->funcs->reset)
+   size += sysfs_emit_at(buf, size, "queue ");


You could add a generic helper which takes the ring as parameter and 
prints "full soft queue" into the buffer. Patch #1 is kind of special 
because of the FW limitations, but that should make patch #2-#5 a bit 
smaller and use more generic code.


And should we print the strings in the order they are applied? In other 
words "soft queue full" ?


Apart from that the patches look totally clean to me.

Regards,
Christian.


+
+   size += sysfs_emit_at(buf, size, "\n");
+   return size;
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+  amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+   int r = 0;
+
+   if (adev->sdma.num_instances) {
+   r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+   if (r)
+   return r;
+   }
+
+   return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+   if (adev->sdma.num_instances)
+   device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 087ce0f6fa07..3058548d0733 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -175,5 +175,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, 
u32 instance,
  void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
  bool duplicate);
  int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 10fd772cb80f..bd04310cb2b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -863,6 +863,10 @@ static int sdma_v2_4_sw_init(struct amdgpu_ip_block 
*ip_block)
return r;
}
  
+	r = amdgpu_sdma_sysfs_reset_mask_init(adev);

+   if (r)
+   return r;
+
return r;
  }
  
@@ -874,6 +878,7 @@ static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block)

for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
  
+	amdgpu_sdma_sysfs_reset_mask_fini(adev);

sdma

Re: [PATCH] drm/amd/pm: correct the workload setting

2024-10-23 Thread Lazar, Lijo




On 10/23/2024 8:42 AM, Kenneth Feng wrote:
> Correct the workload setting in order not to mix the setting
> with the end user. Update the workload mask accordingly.
> 
> Signed-off-by: Kenneth Feng 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 ++--
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  4 +-
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 39 +++--
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 33 ---
>  .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c  | 42 ---
>  5 files changed, 115 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index accc96a03bd9..f1984736ff4a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1252,7 +1252,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>   atomic64_set(&smu->throttle_int_counter, 0);
>   smu->watermarks_bitmap = 0;
>   smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> - smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> + smu->user_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> + smu->driver_workload_mask = 0;
>  
>   atomic_set(&smu->smu_power.power_gate.vcn_gated, 1);
>   atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1);
> @@ -1267,10 +1268,12 @@ static int smu_sw_init(struct amdgpu_ip_block 
> *ip_block)
>   smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
>   smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
>  
> - if (smu->is_apu)
> + if (smu->is_apu) {
>   smu->workload_mask = 1 << 
> smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
> - else
> + } else {
>   smu->workload_mask = 1 << 
> smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
> + smu->user_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;

This is driver decided, not user decided. Keeping it as user decided is
confusing.

Also, ideally user settings is preferred to be kept here -
smu_user_dpm_profile

> + }
>  
>   smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>   smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> @@ -2346,11 +2349,13 @@ static int smu_switch_power_profile(void *handle,
>  
>   if (!en) {
>   smu->workload_mask &= ~(1 << smu->workload_prority[type]);
> + smu->driver_workload_mask &= ~(1 << 
> smu->workload_prority[type]);
>   index = fls(smu->workload_mask);
>   index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 
> 0;
>   workload[0] = smu->workload_setting[index];
>   } else {
>   smu->workload_mask |= (1 << smu->workload_prority[type]);
> + smu->driver_workload_mask |= (1 << smu->workload_prority[type]);
>   index = fls(smu->workload_mask);
>   index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
>   workload[0] = smu->workload_setting[index];
> @@ -3045,12 +3050,16 @@ static int smu_set_power_profile_mode(void *handle,
> uint32_t param_size)
>  {
>   struct smu_context *smu = handle;
> + int ret;
>  
>   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
>   !smu->ppt_funcs->set_power_profile_mode)
>   return -EOPNOTSUPP;
> + smu->user_profile_mode_setting = true;
> + ret = smu_bump_power_profile_mode(smu, param, param_size);
> + smu->user_profile_mode_setting = false;

Instead of doing this way, I think it's better to save the user
preference as a mask here. Later decide what to be applied whenever
workload mask is applied.

if (user_profile = default or unknown) // No user preference
user_mask = 0 ; // user driver default mask
else if (user_profile = xyz)
user_mask = xyz_workload_mask;

Save user_mask to user_dpm profile.

Whenever workload mask is applied, select user_mask if non-zero or
driver_mask, or a combination of both.

Thanks,
Lijo

>  
> - return smu_bump_power_profile_mode(smu, param, param_size);
> + return ret;
>  }
>  
>  static int smu_get_fan_control_mode(void *handle, u32 *fan_mode)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 8bb32b3f0d9c..e43b23dd3457 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -557,10 +557,11 @@ struct smu_context {
>   bool disable_uclk_switch;
>  
>   uint32_t workload_mask;
> + uint32_t driver_workload_mask;
>   uint32_t workload_prority[WORKLOAD_POLICY_MAX];
>   uint32_t workload_setting[WORKLOAD_POLICY_MAX];
>   uint32_t power_profile_mode;
> - uint32_t default_power_profile_mode;
> + uint32_t user_profile_mode;
>   bool pm_enabled;
>   bool is_apu;
>

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Christian König


Am 23.10.24 um 13:37 schrieb Tvrtko Ursulin:


On 23/10/2024 10:14, Christian König wrote:

Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin:


On 22/10/2024 17:24, Christian König wrote:

Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy):

[Public]


+static uint32_t fold_memtype(uint32_t memtype) {
In general please add prefixes to even static functions, e.g. 
amdgpu_vm_ or

amdgpu_bo_.

+   /* Squash private placements into 'cpu' to keep the legacy 
userspace view.

*/

+   switch (mem_type) {
+   case TTM_PL_VRAM:
+   case TTM_PL_TT:
+   return memtype
+   default:
+   return TTM_PL_SYSTEM;
+   }
+}
+
+static uint32_t bo_get_memtype(struct amdgpu_bo *bo) {

That whole function belongs into amdgpu_bo.c
Do you mean bo_get_memtype or fold_memtype? I debated whether 
bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and 
since it's using fold_memtype and only useful for memory stats 
because of folding the private placements I just left them here 
together with the other mem stats code.


I can move it to amdgpu_bo.c make it return the memtype verbatim 
and just fold it when I do the accounting.


I think that folding GDS, GWS and OA into system is also a bug. We 
should really not doing that.


Just wanted to point out for this round that the code to query the 
current placement from a BO should probably go into amdgpu_bo.c and 
not amdgpu_vm.c





+   struct ttm_resource *res = bo->tbo.resource;
+   const uint32_t domain_to_pl[] = {
+   [ilog2(AMDGPU_GEM_DOMAIN_CPU)]  = TTM_PL_SYSTEM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GTT)]  = TTM_PL_TT,
+   [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GDS)]  = AMDGPU_PL_GDS,
+   [ilog2(AMDGPU_GEM_DOMAIN_GWS)]  = AMDGPU_PL_GWS,
+   [ilog2(AMDGPU_GEM_DOMAIN_OA)]   = AMDGPU_PL_OA,
+   [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] =

AMDGPU_PL_DOORBELL,

+   };
+   uint32_t domain;
+
+   if (res)
+   return fold_memtype(res->mem_type);
+
+   /*
+    * If no backing store use one of the preferred domain for 
basic

+    * stats. We take the MSB since that should give a reasonable
+    * view.
+    */
+   BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM <

TTM_PL_SYSTEM);

+   domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
+   if (drm_WARN_ON_ONCE(&adev->ddev,
+    domain == 0 || --domain >= 
ARRAY_SIZE(domain_to_pl)))
It's perfectly legal to create a BO without a placement. That one 
just won't have a

backing store.

This is lifted from the previous change I'm rebasing onto. I think 
what it’s trying to do is if the BO doesn't have a placement, use 
the "biggest" (VRAM > TT > SYSTEM) preferred placement for the 
purpose of accounting. Previously we just ignore BOs that doesn't 
have a placement. I guess there's argument for going with either 
approaches.


I was not arguing, I'm simply pointing out a bug. It's perfectly 
valid for bo->preferred_domains to be 0.


So the following WARN_ON() that no bit is set is incorrect.




+   return 0;
+   return fold_memtype(domain_to_pl[domain])
That would need specular execution mitigation if I'm not 
completely mistaken.


Better use a switch/case statement.


Do you mean change the array indexing to a switch statement?


Yes.


Did you mean array_index_nospec?


Yes.

Domain is not a direct userspace input and is calculated from the 
mask which sanitized to allowed values prior to this call. So I 
*think* switch is an overkill but don't mind it either. Just 
commenting FWIW.


I missed that the mask is applied.

Thinking more about it I'm not sure if we should do this conversion 
in the first place. IIRC Tvrtko you once suggested a patch which 
switched a bunch of code to use the TTM placement instead of the UAPI 
flags.


Maybe 8fb0efb10184 ("drm/amdgpu: Reduce mem_type to domain double 
indirection") is what are you thinking of?


Yes, exactly that one.



Going more into this direction I think when we want to look at the 
current placement we should probably also use the TTM PL enumeration 
directly.


It does this already. The placement flags are just to "invent" a TTM 
PL enum when bo->tbo.resource == NULL.


Ah, good point! I though we would do the mapping the other way around.

In this case that is even more something we should probably not do at all.

When bo->tbo.resource is NULL then this BO isn't resident at all, so it 
should not account to resident memory.


Again, based of the same enum. Not sure if you have something other in 
mind or you are happy with that?


I think that for drm-total-* we should use the GEM flags and for 
drm-resident-* we should use the TTM placement.




Then what Teddy does is IMO only tangential, he just changes when 
stats are collected and not this aspect.


Yeah, right but we should probably fix it up in the right way while on it.



To fold or not the special placements (GWS, GDS & co) is also 
tangential. In my patch I

Re: [PATCH] drm/amd/display: add missing tracepoint event in DM atomic_commit_tail

2024-10-23 Thread Leo Li





On 2024-10-23 09:53, Melissa Wen wrote:

There are two events to trace the beginning and the end of
amdgpu_dm_atomic_commit_tail, but only the one ate the beginning was
placed. Place amdgpu_dm_atomic_commit_tail_finish tracepoint at the end
than.

Signed-off-by: Melissa Wen 


Reviewed-by: Leo Li 
Thanks!


---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 98f0b142f497..bbfc47f6595f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10088,6 +10088,8 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
for (i = 0; i < crtc_disable_count; i++)
pm_runtime_put_autosuspend(dev->dev);
pm_runtime_mark_last_busy(dev->dev);
+
+   trace_amdgpu_dm_atomic_commit_tail_finish(state);
  }
  
  static int dm_force_atomic_commit(struct drm_connector *connector)

[PATCH] drm/amd/display: add missing tracepoint event in DM atomic_commit_tail

2024-10-23 Thread Melissa Wen

There are two events to trace the beginning and the end of
amdgpu_dm_atomic_commit_tail, but only the one ate the beginning was
placed. Place amdgpu_dm_atomic_commit_tail_finish tracepoint at the end
than.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 98f0b142f497..bbfc47f6595f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10088,6 +10088,8 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
for (i = 0; i < crtc_disable_count; i++)
pm_runtime_put_autosuspend(dev->dev);
pm_runtime_mark_last_busy(dev->dev);
+
+   trace_amdgpu_dm_atomic_commit_tail_finish(state);
 }
 
 static int dm_force_atomic_commit(struct drm_connector *connector)
-- 
2.45.2

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Tvrtko Ursulin




On 23/10/2024 10:14, Christian König wrote:

Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin:


On 22/10/2024 17:24, Christian König wrote:

Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy):

[Public]


+static uint32_t fold_memtype(uint32_t memtype) {
In general please add prefixes to even static functions, e.g. 
amdgpu_vm_ or

amdgpu_bo_.

+   /* Squash private placements into 'cpu' to keep the legacy 
userspace view.

*/

+   switch (mem_type) {
+   case TTM_PL_VRAM:
+   case TTM_PL_TT:
+   return memtype
+   default:
+   return TTM_PL_SYSTEM;
+   }
+}
+
+static uint32_t bo_get_memtype(struct amdgpu_bo *bo) {

That whole function belongs into amdgpu_bo.c
Do you mean bo_get_memtype or fold_memtype? I debated whether 
bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since 
it's using fold_memtype and only useful for memory stats because of 
folding the private placements I just left them here together with 
the other mem stats code.


I can move it to amdgpu_bo.c make it return the memtype verbatim and 
just fold it when I do the accounting.


I think that folding GDS, GWS and OA into system is also a bug. We 
should really not doing that.


Just wanted to point out for this round that the code to query the 
current placement from a BO should probably go into amdgpu_bo.c and 
not amdgpu_vm.c





+   struct ttm_resource *res = bo->tbo.resource;
+   const uint32_t domain_to_pl[] = {
+   [ilog2(AMDGPU_GEM_DOMAIN_CPU)]  = TTM_PL_SYSTEM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GTT)]  = TTM_PL_TT,
+   [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GDS)]  = AMDGPU_PL_GDS,
+   [ilog2(AMDGPU_GEM_DOMAIN_GWS)]  = AMDGPU_PL_GWS,
+   [ilog2(AMDGPU_GEM_DOMAIN_OA)]   = AMDGPU_PL_OA,
+   [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] =

AMDGPU_PL_DOORBELL,

+   };
+   uint32_t domain;
+
+   if (res)
+   return fold_memtype(res->mem_type);
+
+   /*
+    * If no backing store use one of the preferred domain for basic
+    * stats. We take the MSB since that should give a reasonable
+    * view.
+    */
+   BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM <

TTM_PL_SYSTEM);

+   domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
+   if (drm_WARN_ON_ONCE(&adev->ddev,
+    domain == 0 || --domain >= 
ARRAY_SIZE(domain_to_pl)))
It's perfectly legal to create a BO without a placement. That one 
just won't have a

backing store.

This is lifted from the previous change I'm rebasing onto. I think 
what it’s trying to do is if the BO doesn't have a placement, use 
the "biggest" (VRAM > TT > SYSTEM) preferred placement for the 
purpose of accounting. Previously we just ignore BOs that doesn't 
have a placement. I guess there's argument for going with either 
approaches.


I was not arguing, I'm simply pointing out a bug. It's perfectly 
valid for bo->preferred_domains to be 0.


So the following WARN_ON() that no bit is set is incorrect.




+   return 0;
+   return fold_memtype(domain_to_pl[domain])
That would need specular execution mitigation if I'm not completely 
mistaken.


Better use a switch/case statement.


Do you mean change the array indexing to a switch statement?


Yes.


Did you mean array_index_nospec?


Yes.

Domain is not a direct userspace input and is calculated from the mask 
which sanitized to allowed values prior to this call. So I *think* 
switch is an overkill but don't mind it either. Just commenting FWIW.


I missed that the mask is applied.

Thinking more about it I'm not sure if we should do this conversion in 
the first place. IIRC Tvrtko you once suggested a patch which switched a 
bunch of code to use the TTM placement instead of the UAPI flags.


Maybe 8fb0efb10184 ("drm/amdgpu: Reduce mem_type to domain double 
indirection") is what are you thinking of?


Going more into this direction I think when we want to look at the 
current placement we should probably also use the TTM PL enumeration 
directly.


It does this already. The placement flags are just to "invent" a TTM PL 
enum when bo->tbo.resource == NULL.


if (!res) {
/*
 * If no backing store use one of the preferred domain 
for basic
 * stats. We take the MSB since that should give a 
reasonable

 * view.
 */
BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT ||
 TTM_PL_VRAM < TTM_PL_SYSTEM);
type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
if (!type)
return;
type--;
if (drm_WARN_ON_ONCE(&adev->ddev,
 type >= ARRAY_SIZE(domain_to_pl)))
return;
type = domain_to_pl[type];
} else {
type = res->mem_type;
}
...
stats[type].total += size;
if (drm_ge

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Tvrtko Ursulin




On 22/10/2024 17:24, Christian König wrote:

Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy):

[Public]


+static uint32_t fold_memtype(uint32_t memtype) {
In general please add prefixes to even static functions, e.g. 
amdgpu_vm_ or

amdgpu_bo_.

+   /* Squash private placements into 'cpu' to keep the legacy 
userspace view.

*/

+   switch (mem_type) {
+   case TTM_PL_VRAM:
+   case TTM_PL_TT:
+   return memtype
+   default:
+   return TTM_PL_SYSTEM;
+   }
+}
+
+static uint32_t bo_get_memtype(struct amdgpu_bo *bo) {

That whole function belongs into amdgpu_bo.c
Do you mean bo_get_memtype or fold_memtype? I debated whether 
bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and since 
it's using fold_memtype and only useful for memory stats because of 
folding the private placements I just left them here together with the 
other mem stats code.


I can move it to amdgpu_bo.c make it return the memtype verbatim and 
just fold it when I do the accounting.


I think that folding GDS, GWS and OA into system is also a bug. We 
should really not doing that.


Just wanted to point out for this round that the code to query the 
current placement from a BO should probably go into amdgpu_bo.c and not 
amdgpu_vm.c





+   struct ttm_resource *res = bo->tbo.resource;
+   const uint32_t domain_to_pl[] = {
+   [ilog2(AMDGPU_GEM_DOMAIN_CPU)]  = TTM_PL_SYSTEM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GTT)]  = TTM_PL_TT,
+   [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GDS)]  = AMDGPU_PL_GDS,
+   [ilog2(AMDGPU_GEM_DOMAIN_GWS)]  = AMDGPU_PL_GWS,
+   [ilog2(AMDGPU_GEM_DOMAIN_OA)]   = AMDGPU_PL_OA,
+   [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] =

AMDGPU_PL_DOORBELL,

+   };
+   uint32_t domain;
+
+   if (res)
+   return fold_memtype(res->mem_type);
+
+   /*
+    * If no backing store use one of the preferred domain for basic
+    * stats. We take the MSB since that should give a reasonable
+    * view.
+    */
+   BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM <

TTM_PL_SYSTEM);

+   domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
+   if (drm_WARN_ON_ONCE(&adev->ddev,
+    domain == 0 || --domain >= 
ARRAY_SIZE(domain_to_pl)))
It's perfectly legal to create a BO without a placement. That one 
just won't have a

backing store.

This is lifted from the previous change I'm rebasing onto. I think 
what it’s trying to do is if the BO doesn't have a placement, use the 
"biggest" (VRAM > TT > SYSTEM) preferred placement for the purpose of 
accounting. Previously we just ignore BOs that doesn't have a 
placement. I guess there's argument for going with either approaches.


I was not arguing, I'm simply pointing out a bug. It's perfectly valid 
for bo->preferred_domains to be 0.


So the following WARN_ON() that no bit is set is incorrect.




+   return 0;
+   return fold_memtype(domain_to_pl[domain])
That would need specular execution mitigation if I'm not completely 
mistaken.


Better use a switch/case statement.


Do you mean change the array indexing to a switch statement?


Yes.


Did you mean array_index_nospec? Domain is not a direct userspace input 
and is calculated from the mask which sanitized to allowed values prior 
to this call. So I *think* switch is an overkill but don't mind it 
either. Just commenting FWIW.


Regards,

Tvrtko

[PATCH] drm/amdgpu/smu13: fix profile reporting

2024-10-23 Thread Alex Deucher

The following 3 commits landed in parallel:
commit d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting")
commit 7a1613e47e65 ("drm/amdgpu/smu13: always apply the powersave 
optimization")
commit 7c210ca5a2d7 ("drm/amdgpu: handle default profile on on devices without 
fullscreen 3D")
While everything is set correctly, this caused the profile to be
reported incorrectly because both the powersave and fullscreen3d bits
were set in the mask and when the driver prints the profile, it looks
for the first bit set.

Fixes: d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index cb923e33fd6f..d53e162dcd8d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2485,7 +2485,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
smu_context *smu,
DpmActivityMonitorCoeffInt_t *activity_monitor =
&(activity_monitor_external.DpmActivityMonitorCoeffInt);
int workload_type, ret = 0;
-   u32 workload_mask;
+   u32 workload_mask, selected_workload_mask;
 
smu->power_profile_mode = input[size];
 
@@ -2552,7 +2552,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
smu_context *smu,
if (workload_type < 0)
return -EINVAL;
 
-   workload_mask = 1 << workload_type;
+   selected_workload_mask = workload_mask = 1 << workload_type;
 
/* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile 
*/
if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) 
&&
@@ -2572,7 +2572,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
smu_context *smu,
   workload_mask,
   NULL);
if (!ret)
-   smu->workload_mask = workload_mask;
+   smu->workload_mask = selected_workload_mask;
 
return ret;
 }
-- 
2.46.2

RE: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Li, Yunxiang (Teddy)

[AMD Official Use Only - AMD Internal Distribution Only]

Yeah it looks like I missed the whole active/purgeable thing as well...

Teddy

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Tvrtko Ursulin




On 23/10/2024 13:12, Christian König wrote:

Am 23.10.24 um 13:37 schrieb Tvrtko Ursulin:


On 23/10/2024 10:14, Christian König wrote:

Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin:


On 22/10/2024 17:24, Christian König wrote:

Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy):

[Public]


+static uint32_t fold_memtype(uint32_t memtype) {
In general please add prefixes to even static functions, e.g. 
amdgpu_vm_ or

amdgpu_bo_.

+   /* Squash private placements into 'cpu' to keep the legacy 
userspace view.

*/

+   switch (mem_type) {
+   case TTM_PL_VRAM:
+   case TTM_PL_TT:
+   return memtype
+   default:
+   return TTM_PL_SYSTEM;
+   }
+}
+
+static uint32_t bo_get_memtype(struct amdgpu_bo *bo) {

That whole function belongs into amdgpu_bo.c
Do you mean bo_get_memtype or fold_memtype? I debated whether 
bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and 
since it's using fold_memtype and only useful for memory stats 
because of folding the private placements I just left them here 
together with the other mem stats code.


I can move it to amdgpu_bo.c make it return the memtype verbatim 
and just fold it when I do the accounting.


I think that folding GDS, GWS and OA into system is also a bug. We 
should really not doing that.


Just wanted to point out for this round that the code to query the 
current placement from a BO should probably go into amdgpu_bo.c and 
not amdgpu_vm.c





+   struct ttm_resource *res = bo->tbo.resource;
+   const uint32_t domain_to_pl[] = {
+   [ilog2(AMDGPU_GEM_DOMAIN_CPU)]  = TTM_PL_SYSTEM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GTT)]  = TTM_PL_TT,
+   [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
+   [ilog2(AMDGPU_GEM_DOMAIN_GDS)]  = AMDGPU_PL_GDS,
+   [ilog2(AMDGPU_GEM_DOMAIN_GWS)]  = AMDGPU_PL_GWS,
+   [ilog2(AMDGPU_GEM_DOMAIN_OA)]   = AMDGPU_PL_OA,
+   [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] =

AMDGPU_PL_DOORBELL,

+   };
+   uint32_t domain;
+
+   if (res)
+   return fold_memtype(res->mem_type);
+
+   /*
+    * If no backing store use one of the preferred domain for 
basic

+    * stats. We take the MSB since that should give a reasonable
+    * view.
+    */
+   BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || TTM_PL_VRAM <

TTM_PL_SYSTEM);

+   domain = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
+   if (drm_WARN_ON_ONCE(&adev->ddev,
+    domain == 0 || --domain >= 
ARRAY_SIZE(domain_to_pl)))
It's perfectly legal to create a BO without a placement. That one 
just won't have a

backing store.

This is lifted from the previous change I'm rebasing onto. I think 
what it’s trying to do is if the BO doesn't have a placement, use 
the "biggest" (VRAM > TT > SYSTEM) preferred placement for the 
purpose of accounting. Previously we just ignore BOs that doesn't 
have a placement. I guess there's argument for going with either 
approaches.


I was not arguing, I'm simply pointing out a bug. It's perfectly 
valid for bo->preferred_domains to be 0.


So the following WARN_ON() that no bit is set is incorrect.




+   return 0;
+   return fold_memtype(domain_to_pl[domain])
That would need specular execution mitigation if I'm not 
completely mistaken.


Better use a switch/case statement.


Do you mean change the array indexing to a switch statement?


Yes.


Did you mean array_index_nospec?


Yes.

Domain is not a direct userspace input and is calculated from the 
mask which sanitized to allowed values prior to this call. So I 
*think* switch is an overkill but don't mind it either. Just 
commenting FWIW.


I missed that the mask is applied.

Thinking more about it I'm not sure if we should do this conversion 
in the first place. IIRC Tvrtko you once suggested a patch which 
switched a bunch of code to use the TTM placement instead of the UAPI 
flags.


Maybe 8fb0efb10184 ("drm/amdgpu: Reduce mem_type to domain double 
indirection") is what are you thinking of?


Yes, exactly that one.



Going more into this direction I think when we want to look at the 
current placement we should probably also use the TTM PL enumeration 
directly.


It does this already. The placement flags are just to "invent" a TTM 
PL enum when bo->tbo.resource == NULL.


Ah, good point! I though we would do the mapping the other way around.

In this case that is even more something we should probably not do at all.

When bo->tbo.resource is NULL then this BO isn't resident at all, so it 
should not account to resident memory.


It doesn't, only for total. I should have pasted more context..:

struct ttm_resource *res = bo->tbo.resource;
...
/* DRM stats common fields: */

stats[type].total += size;
if (drm_gem_object_is_shared_for_memory_stats(obj))
stats[type].drm.shared += size;
else
stats[type].drm.private += size;

if (res) {
stats[type].drm.resident += size

So if no current

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Tvrtko Ursulin




On 22/10/2024 18:06, Christian König wrote:

Am 22.10.24 um 18:46 schrieb Li, Yunxiang (Teddy):

[Public]

I suppose we could add a field like amd-memory-private: to cover the 
private placements.


No, that is not really appropriate either. GWS, GDS and OA are not 
memory in the first place.


Those BOs are HW blocks which the driver allocated to use.

So accounting them for the memory usage doesn't make any sense at all.

We could print them in the fdinfo as something special for statistics, 
but it's probably not that useful.



  When would a BO not have a placement, is it when it is being moved?


There are BOs which are only temporary, so when they are evicted their 
backing store is just discarded.


Additional to that allocation of backing store is sometimes delayed 
until the first use.


Would this work correctly if instead of preferred allowed mask was used?

Point being, to correctly support fdinfo stats drm-total-, *if* a BO 
*can* have a backing store at any point it should always be counted there.


*If* it currently has a placement it is drm-resident-.

If it has a placement but can be discarded it is drm-purgeable-. Etc.

Regards,

Tvrtko



  Since we are tracking the state changes, I wonder if such situations 
can be avoided now so whenever we call these stat update functions the 
BO would always have a placement.


No, as I said before those use cases are perfectly valid. BO don't need 
a backing store nor do they need a placement.


So the code has to gracefully handle that.

Regards,
Christian.



Teddy

Re: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Christian König


Am 23.10.24 um 14:24 schrieb Tvrtko Ursulin:

[SNIP]
To fold or not the special placements (GWS, GDS & co) is also 
tangential. In my patch I just preserved the legacy behaviour so it 
can easily be tweaked on top.


Yeah, but again the original behavior is completely broken.

GWS, GDS and OA are counted in blocks of HW units (multiplied by 
PAGE_SIZE IIRC to avoid some GEM&TTM warnings).


When you accumulate that anywhere in the memory stats then that is 
just completely off.


Ooops. :) Are they backed by some memory though, be it system or VRAM?


GDS is an internal 4 or 64KiB memory block which is only valid while 
shaders are running. It is used to communicate stuff between different 
shader stages and not even CPU accessible.


GWS and OA are not even memory, those are just HW blocks which implement 
a fixed function.


IIRC most HW generation have 16 of each and when setting up the 
application virtual address space you can specify how many will be used 
by the application.


Regards,
Christian.



Regards,

Tvrtko

RE: [PATCH v5 4/4] drm/amdgpu: track bo memory stats at runtime

2024-10-23 Thread Li, Yunxiang (Teddy)

[AMD Official Use Only - AMD Internal Distribution Only]

> From: Tvrtko Ursulin 
> Sent: Wednesday, October 23, 2024 8:25
> On 23/10/2024 13:12, Christian König wrote:
> > Am 23.10.24 um 13:37 schrieb Tvrtko Ursulin:
> >>
> >> On 23/10/2024 10:14, Christian König wrote:
> >>> Am 23.10.24 um 09:38 schrieb Tvrtko Ursulin:
> 
>  On 22/10/2024 17:24, Christian König wrote:
> > Am 22.10.24 um 17:17 schrieb Li, Yunxiang (Teddy):
> >> [Public]
> >>
>  +static uint32_t fold_memtype(uint32_t memtype) {
> >>> In general please add prefixes to even static functions, e.g.
> >>> amdgpu_vm_ or
> >>> amdgpu_bo_.
> >>>
>  +   /* Squash private placements into 'cpu' to keep the legacy
>  userspace view.
> >>> */
>  +   switch (mem_type) {
>  +   case TTM_PL_VRAM:
>  +   case TTM_PL_TT:
>  +   return memtype
>  +   default:
>  +   return TTM_PL_SYSTEM;
>  +   }
>  +}
>  +
>  +static uint32_t bo_get_memtype(struct amdgpu_bo *bo) {
> >>> That whole function belongs into amdgpu_bo.c
> >> Do you mean bo_get_memtype or fold_memtype? I debated whether
> >> bo_get_memtype should go into amdgpu_vm.c or amdgpu_bo.c, and
> >> since it's using fold_memtype and only useful for memory stats
> >> because of folding the private placements I just left them here
> >> together with the other mem stats code.
> >>
> >> I can move it to amdgpu_bo.c make it return the memtype verbatim
> >> and just fold it when I do the accounting.
> >
> > I think that folding GDS, GWS and OA into system is also a bug. We
> > should really not doing that.
> >
> > Just wanted to point out for this round that the code to query the
> > current placement from a BO should probably go into amdgpu_bo.c
> > and not amdgpu_vm.c
> >
> >>
>  +   struct ttm_resource *res = bo->tbo.resource;
>  +   const uint32_t domain_to_pl[] = {
>  +   [ilog2(AMDGPU_GEM_DOMAIN_CPU)]  =
>  +TTM_PL_SYSTEM,
>  +   [ilog2(AMDGPU_GEM_DOMAIN_GTT)]  = TTM_PL_TT,
>  +   [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] =
> TTM_PL_VRAM,
>  +   [ilog2(AMDGPU_GEM_DOMAIN_GDS)]  =
>  +AMDGPU_PL_GDS,
>  +   [ilog2(AMDGPU_GEM_DOMAIN_GWS)]  =
>  +AMDGPU_PL_GWS,
>  +   [ilog2(AMDGPU_GEM_DOMAIN_OA)]   =
> AMDGPU_PL_OA,
>  +   [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] =
> >>> AMDGPU_PL_DOORBELL,
>  +   };
>  +   uint32_t domain;
>  +
>  +   if (res)
>  +   return fold_memtype(res->mem_type);
>  +
>  +   /*
>  +* If no backing store use one of the preferred domain for
>  basic
>  +* stats. We take the MSB since that should give a
>  +reasonable
>  +* view.
>  +*/
>  +   BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT ||
> TTM_PL_VRAM <
> >>> TTM_PL_SYSTEM);
>  +   domain = fls(bo->preferred_domains &
>  +AMDGPU_GEM_DOMAIN_MASK);
>  +   if (drm_WARN_ON_ONCE(&adev->ddev,
>  +domain == 0 || --domain >=
>  ARRAY_SIZE(domain_to_pl)))
> >>> It's perfectly legal to create a BO without a placement. That
> >>> one just won't have a backing store.
> >>>
> >> This is lifted from the previous change I'm rebasing onto. I
> >> think what it’s trying to do is if the BO doesn't have a
> >> placement, use the "biggest" (VRAM > TT > SYSTEM) preferred
> >> placement for the purpose of accounting. Previously we just
> >> ignore BOs that doesn't have a placement. I guess there's
> >> argument for going with either approaches.
> >
> > I was not arguing, I'm simply pointing out a bug. It's perfectly
> > valid for bo->preferred_domains to be 0.
> >
> > So the following WARN_ON() that no bit is set is incorrect.
> >
> >>
>  +   return 0;
>  +   return fold_memtype(domain_to_pl[domain])
> >>> That would need specular execution mitigation if I'm not
> >>> completely mistaken.
> >>>
> >>> Better use a switch/case statement.
> >>>
> >> Do you mean change the array indexing to a switch statement?
> >
> > Yes.
> 
>  Did you mean array_index_nospec?
> >>>
> >>> Yes.
> >>>
>  Domain is not a direct userspace input and is calculated from the
>  mask which sanitized to allowed values prior to this call. So I
>  *think* switch is an overkill but don't mind it either. Just
>  commenting FWIW.
> >>>
> >>> I missed that the mask is applied.
> >>>
> >>> Thinking more about it I'm not sure if we should do this conversion
> >>> in the first place. IIRC Tvrtko you once suggested a patch which
> >>> switched a bunch of code to use the TTM

[PATCH AUTOSEL 6.11 25/30] drm/amdkfd: Accounting pdd vram_usage for svm

2024-10-23 Thread Sasha Levin

From: Philip Yang 

[ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ]

Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
is currently missing the svm_bo usage accounting, so "rocm-smi
--showpids" per process VRAM usage report is incorrect.

Add pdd->vram_usage accounting when svm_bo allocation and release,
change to atomic64_t type because it is updated outside process mutex
now.

Signed-off-by: Philip Yang 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
(cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04)
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 546b02f2241a6..5953bc5f31192 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1170,7 +1170,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
 
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+   atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
}
 
mutex_unlock(&p->mutex);
@@ -1241,7 +1241,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
*filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
 
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+   atomic64_sub(size, &pdd->vram_usage);
 
 err_unlock:
 err_pdd:
@@ -2346,7 +2346,7 @@ static int criu_restore_memory_of_gpu(struct 
kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+   atomic64_add(bo_bucket->size, &pdd->vram_usage);
}
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2b3ec92981e8f..f35741fade911 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -766,7 +766,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
 
/* VRAM usage */
-   uint64_t vram_usage;
+   atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index e44892109f71b..8343b3e4de7b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct 
attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct 
kfd_process_device,
  attr_vram);
-   return snprintf(buffer, PAGE_SIZE, "%llu\n", 
READ_ONCE(pdd->vram_usage));
+   return snprintf(buffer, PAGE_SIZE, "%llu\n", 
atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct 
kfd_process_device,
  attr_sdma);
@@ -1599,7 +1599,7 @@ struct kfd_process_device 
*kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
-   pdd->vram_usage = 0;
+   atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bd9c2921e0dcc..7d00d89586a10 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -404,6 +404,27 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
+
+   if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+   struct kfd_process_device *pdd;
+   struct kfd_process *p;
+   struct mm_struct *mm;
+
+   mm = svm_bo->eviction_fence->mm;
+   /*
+* The forked child process takes svm_bo device pages ref, 
svm_bo could be
+* released after parent process is gone.
+*/
+   p = kfd_looku

[PATCH AUTOSEL 6.6 19/23] drm/amdkfd: Accounting pdd vram_usage for svm

2024-10-23 Thread Sasha Levin

From: Philip Yang 

[ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ]

Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
is currently missing the svm_bo usage accounting, so "rocm-smi
--showpids" per process VRAM usage report is incorrect.

Add pdd->vram_usage accounting when svm_bo allocation and release,
change to atomic64_t type because it is updated outside process mutex
now.

Signed-off-by: Philip Yang 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
(cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04)
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 19d46be639429..8669677662d0c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1164,7 +1164,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
 
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+   atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
}
 
mutex_unlock(&p->mutex);
@@ -1235,7 +1235,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
*filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
 
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+   atomic64_sub(size, &pdd->vram_usage);
 
 err_unlock:
 err_pdd:
@@ -2352,7 +2352,7 @@ static int criu_restore_memory_of_gpu(struct 
kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+   atomic64_add(bo_bucket->size, &pdd->vram_usage);
}
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 67204c3dfbb8f..27c9d5c43765a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -765,7 +765,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
 
/* VRAM usage */
-   uint64_t vram_usage;
+   atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 43f520b379670..6c90231e0aec2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct 
attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct 
kfd_process_device,
  attr_vram);
-   return snprintf(buffer, PAGE_SIZE, "%llu\n", 
READ_ONCE(pdd->vram_usage));
+   return snprintf(buffer, PAGE_SIZE, "%llu\n", 
atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct 
kfd_process_device,
  attr_sdma);
@@ -1589,7 +1589,7 @@ struct kfd_process_device 
*kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
-   pdd->vram_usage = 0;
+   atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ce76d45549984..6b7c6f45a80a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -391,6 +391,27 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
+
+   if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+   struct kfd_process_device *pdd;
+   struct kfd_process *p;
+   struct mm_struct *mm;
+
+   mm = svm_bo->eviction_fence->mm;
+   /*
+* The forked child process takes svm_bo device pages ref, 
svm_bo could be
+* released after parent process is gone.
+*/
+   p = kfd_looku

Re: [PATCH 2/2] Documentation/gpu/amdgpu: Add programming model for DCN

2024-10-23 Thread Harry Wentland




On 2024-10-16 23:34, Rodrigo Siqueira wrote:
> One of the challenges to contributing to the display code is the
> complexity of the DC component. This commit adds a documentation page
> that discusses the programming model used by DCN and an overview of how
> the display code is organized.
> 
> Cc: Leo Li 
> Cc: Aurabindo Pillai 
> Cc: Hamza Mahfooz 
> Cc: Harry Wentland 
> Cc: Mario Limonciello 
> Cc: Christian Konig 
> Cc: Alex Deucher 
> Signed-off-by: Rodrigo Siqueira 

Series is
Reviewed-by: Harry Wentland 

Harry

> ---
>  .../gpu/amdgpu/display/dc-arch-overview.svg   | 731 +
>  .../gpu/amdgpu/display/dc-components.svg  | 732 ++
>  .../gpu/amdgpu/display/dcn-blocks.rst |   2 +
>  .../gpu/amdgpu/display/dcn-overview.rst   |   2 +
>  Documentation/gpu/amdgpu/display/index.rst|   1 +
>  .../amdgpu/display/programming-model-dcn.rst  | 162 
>  6 files changed, 1630 insertions(+)
>  create mode 100644 Documentation/gpu/amdgpu/display/dc-arch-overview.svg
>  create mode 100644 Documentation/gpu/amdgpu/display/dc-components.svg
>  create mode 100644 Documentation/gpu/amdgpu/display/programming-model-dcn.rst
> 
> diff --git a/Documentation/gpu/amdgpu/display/dc-arch-overview.svg 
> b/Documentation/gpu/amdgpu/display/dc-arch-overview.svg
> new file mode 100644
> index ..23394931cf26
> --- /dev/null
> +++ b/Documentation/gpu/amdgpu/display/dc-arch-overview.svg
> @@ -0,0 +1,731 @@
> +
> +
> +
> + +   width="1204.058"
> +   height="510.57321"
> +   viewBox="0 0 318.57366 135.08917"
> +   version="1.1"
> +   id="svg8"
> +   inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
> +   sodipodi:docname="dc-arch-overview.svg"
> +   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape";
> +   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd";
> +   xmlns="http://www.w3.org/2000/svg";
> +   xmlns:svg="http://www.w3.org/2000/svg";
> +   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
> +   xmlns:cc="http://creativecommons.org/ns#";
> +   xmlns:dc="http://purl.org/dc/elements/1.1/";>
> +   + id="defs2">
> + +   inkscape:stockid="Arrow2Mend"
> +   orient="auto"
> +   refY="0"
> +   refX="0"
> +   id="marker8858"
> +   style="overflow:visible"
> +   inkscape:isstock="true">
> +   + id="path8616"
> + 
> style="fill:#aa00d4;fill-opacity:1;fill-rule:evenodd;stroke:#aa00d4;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
> + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 
> c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
> + transform="scale(-0.6)"
> + inkscape:connector-curvature="0" />
> +
> + +   inkscape:stockid="Arrow2Send"
> +   orient="auto"
> +   refY="0"
> +   refX="0"
> +   id="Arrow2Send"
> +   style="overflow:visible"
> +   inkscape:isstock="true">
> +   + id="path8622"
> + 
> style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
> + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 
> c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
> + transform="matrix(-0.3,0,0,-0.3,0.69,0)"
> + inkscape:connector-curvature="0" />
> +
> + +   inkscape:stockid="Arrow1Lend"
> +   orient="auto"
> +   refY="0"
> +   refX="0"
> +   id="Arrow1Lend"
> +   style="overflow:visible"
> +   inkscape:isstock="true">
> +   + id="path8592"
> + d="M 0,0 5,-5 -12.5,0 5,5 Z"
> + 
> style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:1pt;stroke-opacity:1"
> + transform="matrix(-0.8,0,0,-0.8,-10,0)"
> + inkscape:connector-curvature="0" />
> +
> + +   inkscape:stockid="Arrow2Lend"
> +   orient="auto"
> +   refY="0"
> +   refX="0"
> +   id="Arrow2Lend"
> +   style="overflow:visible"
> +   inkscape:isstock="true">
> +   + id="path8610"
> + 
> style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
> + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 
> c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
> + transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
> + inkscape:connector-curvature="0" />
> +
> + +   inkscape:stockid="Arrow2Mend"
> +   orient="auto"
> +   refY="0"
> +   refX="0"
> +   id="Arrow2Mend"
> +   style="overflow:visible"
> +   inkscape:isstock="true">
> +   + id="path1200"
> + 
> style="fill:#ff;fill-opacity:1;fill-rule:evenodd;stroke:#ff;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
> + d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 
> c -1.7454984,2.372

[PATCH] drm/amd/amdgpu: limit single process inside MES

2024-10-23 Thread Shaoyun Liu

This is for MES to limit only one process for the user queues

Signed-off-by: Shaoyun Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 11 +++
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c  | 11 +++
 5 files changed, 67 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index e96984c53e72..72e38d621a29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct 
device *dev,
if (adev->enforce_isolation[i] && !partition_values[i]) {
/* Going from enabled to disabled */
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
+   amdgpu_mes_set_enforce_isolation(adev, i, false);
} else if (!adev->enforce_isolation[i] && partition_values[i]) {
/* Going from disabled to enabled */
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+   amdgpu_mes_set_enforce_isolation(adev, i, true);
}
adev->enforce_isolation[i] = partition_values[i];
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index bf584e9bcce4..29b6a2baae4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct 
amdgpu_device *adev)
return is_supported;
 }
 
+/* Fix me -- node_id is used to identify the correct MES instances in the 
future */
+int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
node_id, bool enable)
+{
+   struct mes_misc_op_input op_input = {0};
+   int r;
+
+   op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+   op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+   if (!adev->mes.funcs->misc_op) {
+   DRM_ERROR("mes change config is not supported!\n");
+   r = -EINVAL;
+   goto error;
+   }
+
+   r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+   if (r)
+   DRM_ERROR("failed to change_config.\n");
+
+error:
+   return r;
+
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 79f13d7e5e16..91bff6443c05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -311,6 +311,7 @@ enum mes_misc_opcode {
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
+   MES_MISC_OP_CHANGE_CONFIG,
 };
 
 struct mes_misc_op_input {
@@ -349,6 +350,21 @@ struct mes_misc_op_input {
uint32_t tcp_watch_cntl[4];
uint32_t trap_en;
} set_shader_debugger;
+
+   struct {
+   union {
+   struct {
+   uint32_t limit_single_process : 1;
+   uint32_t enable_hws_logging_buffer : 1;
+   uint32_t reserved : 30;
+   };
+   uint32_t all;
+   } option;
+   struct {
+   uint32_t tdr_level;
+   uint32_t tdr_delay;
+   } tdr_config;
+   } change_config;
};
 };
 
@@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
 }
 
 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
node_id, bool enable);
+
 #endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 57db0c006c8f..1d6de7bced48 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -644,6 +644,14 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,

sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
misc_pkt.set_shader_debugger.trap_en = 
input->set_shader_debugger.trap_en;
break;
+   case MES_MISC_OP_CHANGE_CONFIG:
+   misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+   misc_pkt.change_config.opcode =
+   
MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+   misc_pkt.change_config.option.bits.li

[pull] amdgpu drm-fixes-6.12

2024-10-23 Thread Alex Deucher

Hi Dave, Simona,

Fixes for 6.12.

The following changes since commit 42f7652d3eb527d03665b09edac47f85fb600924:

  Linux 6.12-rc4 (2024-10-20 15:19:38 -0700)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-6.12-2024-10-23

for you to fetch changes up to 7c210ca5a2d72868e5a052fc533d5dcb7e070f89:

  drm/amdgpu: handle default profile on on devices without fullscreen 3D 
(2024-10-22 18:21:51 -0400)


amd-drm-fixes-6.12-2024-10-23:

amdgpu:
- ACPI method handling fixes
- SMU 14.x fixes
- Display idle optimization fix
- DP link layer compliance fix
- SDMA 7.x fix
- PSR-SU fix
- SWSMU fix


Alex Deucher (1):
  drm/amdgpu: handle default profile on on devices without fullscreen 3D

Aurabindo Pillai (2):
  drm/amd/display: temp w/a for dGPU to enter idle optimizations
  drm/amd/display: temp w/a for DP Link Layer compliance

Frank Min (1):
  drm/amdgpu: fix random data corruption for sdma 7

Kenneth Feng (3):
  drm/amd/pm: update the driver-fw interface file for smu v14.0.2/3
  drm/amd/pm: update overdrive function on smu v14.0.2/3
  drm/amd/pm: update deep sleep status on smu v14.0.2/3

Mario Limonciello (2):
  drm/amd: Guard against bad data for ATIF ACPI method
  drm/amd/display: Disable PSR-SU on Parade 08-01 TCON too

 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c   |  15 ++-
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c |   9 +-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  |   3 +-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c  |  13 ++
 .../drm/amd/display/modules/power/power_helpers.c  |   2 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c  |  11 +-
 .../pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h   | 132 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h   |   2 +-
 .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c   |  66 ---
 9 files changed, 156 insertions(+), 97 deletions(-)

Re: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch data

2024-10-23 Thread Deucher, Alexander

[Public]

Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Liu, Shaoyun 

Sent: Tuesday, October 22, 2024 11:21 AM
To: Liu, Shaoyun ; amd-gfx@lists.freedesktop.org 

Subject: RE: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch 
data

[AMD Official Use Only - AMD Internal Distribution Only]

[AMD Official Use Only - AMD Internal Distribution Only]

ping

-Original Message-
From: amd-gfx  On Behalf Of Liu, Shaoyun
Sent: Friday, October 11, 2024 12:57 PM
To: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch 
data

[AMD Official Use Only - AMD Internal Distribution Only]

[AMD Official Use Only - AMD Internal Distribution Only]

Ping .

-Original Message-
From: Liu, Shaoyun 
Sent: Thursday, October 10, 2024 12:10 PM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Shaoyun 
Subject: [PATCH] drm/amdgpu: Increase MES log buffer to dump mes scratch data

MES internal scratch data is useful for mes debug, it can only located in VRAM, 
change the allocation type and increase size for mes 11

Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c |  2 +-  
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  1 +  
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 12 +++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 83d0f731fb65..bf584e9bcce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -104,7 +104,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device 
*adev)
return 0;

r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT,
+   AMDGPU_GEM_DOMAIN_VRAM,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
&adev->mes.event_log_cpu_addr); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 45e3508f0f8e..79f13d7e5e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -40,6 +40,7 @@
 #define AMDGPU_MES_VERSION_MASK0x0fff
 #define AMDGPU_MES_API_VERSION_MASK0x00fff000
 #define AMDGPU_MES_FEAT_VERSION_MASK   0xff00
+#define AMDGPU_MES_MSCRATCH_SIZE   0x8000

 enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW   = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 03bf865fbdd4..aa2e9ef4ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -913,6 +913,16 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, 
bool enable)
uint32_t pipe, data = 0;

if (enable) {
+   if (amdgpu_mes_log_enable) {
+   WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+   lower_32_bits(adev->mes.event_log_gpu_addr + 
AMDGPU_MES_LOG_BUFFER_SIZE));
+   WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+   upper_32_bits(adev->mes.event_log_gpu_addr + 
AMDGPU_MES_LOG_BUFFER_SIZE));
+   dev_info(adev->dev, "Setup CP MES MSCRATCH address : 
0x%x. 0x%x\n",
+   RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+   RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+   }
+
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, @@ -1375,7 +1385,7 @@ 
static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;

-   adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE;
+   adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE +
+AMDGPU_MES_MSCRATCH_SIZE;

r = amdgpu_mes_init(adev);
if (r)
--
2.34.1

Re: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams

2024-10-23 Thread Melissa Wen






On 23/10/2024 17:36, Mohamed, Zaeem wrote:

[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

A patch addressing this will be sent out soon.

Great! Thanks for the heads up!

Melissa


Thanks,
Zaeem

-Original Message-
From: Melissa Wen 
Sent: Tuesday, October 22, 2024 11:58 AM
To: Mohamed, Zaeem ; Wentland, Harry ; Li, Sun peng (Leo) 
; Siqueira, Rodrigo ; Deucher, Alexander 
; Koenig, Christian ; Pan, Xinhui 
; airl...@gmail.com; dan...@ffwll.ch
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amd/display: increase max surfaces in line with 
planes and streams

Hi,

Gentle ping to land the fix for the kernel crash.
If faster, I can send a new version increasing max surfaces to 4 as previously 
discussed.

There are now two bug reports for the same issue:
- https://gitlab.freedesktop.org/drm/amd/-/issues/3693
- https://gitlab.freedesktop.org/drm/amd/-/issues/3594

Best Regards,

Melissa


On 27/09/2024 15:20, Melissa Wen wrote:

Hi Zaeem,

Thanks for explaining their relationship.

So IIUC, current DM implementation for dc_surface_updates array is
wrong, since it's taking MAX_SURFACES (=3) for allocation but
MAX_PLANES (=6) as the upper bound of size of the dc_surface_updates
array, as you can see in this allocation and after an iteration from 0
to `plane_count`:

https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/disp
lay/amdgpu_dm/amdgpu_dm.c#L9861


Another question to understand what would be the right fix for the
implementation above-mentioned: with the cursor overlay mode, it means
we are using one of the overlay planes for this cursor overlay mode
(one from the "max_slave_planes") or this cursor plane is an extra
plane, which means, for some drivers, one primary plane + two overlay
planes + one plane for cursor overlay mode (max 4 planes) ?

BR,

Melissa

On 27/09/2024 14:40, Mohamed, Zaeem wrote:

[AMD Official Use Only - AMD Internal Distribution Only]

Hi Melissa,

MAX_SURFACE_NUM and MAX_PLANES both represent the upper bound of
planes that are supported by HW. It is best to replace
MAX_SURFACE_NUM with MAX_PLANES to remove redundancy. MAX_SURFACES is
used to represent the upper bound of surfaces that can be piped to a
single CRTC. Keep MAX_SURFACES as is to keep stack size down, and
replace MAX_SURFACE_NUM with MAX_PLANES.

Thanks,
Zaeem


-Original Message-
From: dri-devel  On Behalf
Of Melissa Wen
Sent: Wednesday, September 25, 2024 11:37 AM
To: harry.wentl...@amd.com; sunpeng...@amd.com;
rodrigo.sique...@amd.com; alexander.deuc...@amd.com;
christian.koe...@amd.com; xinhui@amd.com; airl...@gmail.com;
dan...@ffwll.ch
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: [PATCH 1/2] drm/amd/display: increase max surfaces in line
with planes and streams

091a97e542cf ("drm/amd/display: Fix warning. Set MAX_SURFACES to 3")
reduced the max number of surfaces since, at that time, there was no
use for more. However, HW and driver evolves and there are now driver
versions that allow two overlay planes (max_slave_planes). Moreover,
commit 3cfd03b79425 ("drm/amd/display: update max streams per
surface") states 6 is the max surfaces supported asics can have.
Therefore, update MAX_SURFACES to match MAX_SURFACE_NUM, MAX_PLANES
and MAX_STREAMS.

It also addresses array-index-out-of-bounds reported in the link.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3594
Signed-off-by: Melissa Wen 
---
   drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h
b/drivers/gpu/drm/amd/display/dc/dc.h
index 3992ad73165b..08b00b263533 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -57,7 +57,7 @@ struct dmub_notification;

   #define DC_VER "3.2.301"

-#define MAX_SURFACES 3
+#define MAX_SURFACES 6
   #define MAX_PLANES 6
   #define MAX_STREAMS 6
   #define MIN_VIEWPORT_SIZE 12
--
2.45.2

Re: [PATCH] drm/amd/pm: correct the workload setting

2024-10-23 Thread Alex Deucher

On Tue, Oct 22, 2024 at 11:23 PM Kenneth Feng  wrote:
>
> Correct the workload setting in order not to mix the setting
> with the end user. Update the workload mask accordingly.

Might be better to actually treat the workload like a mask rather than
as a discrete setting since that mirrors how the firmware actually
works.  The default value set at init time or whatever the user
selects via sysfs should be what is reflected in sysfs, however, when
KFD selects COMPUTE or the VCN code selects VIDEO, rather than
clearing all of the other bits and just setting the selected profile,
we should just add or remove the additional bits to the bit mask and
store the whole bit mask in the driver.  E.g., smu->workload_mask
would be the actual mask and smu->power_profile_mode would be the
value reflected in sysfs.

Alex

>
> Signed-off-by: Kenneth Feng 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 ++--
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  4 +-
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 39 +++--
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 33 ---
>  .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c  | 42 ---
>  5 files changed, 115 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index accc96a03bd9..f1984736ff4a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1252,7 +1252,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
> atomic64_set(&smu->throttle_int_counter, 0);
> smu->watermarks_bitmap = 0;
> smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> -   smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> +   smu->user_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> +   smu->driver_workload_mask = 0;
>
> atomic_set(&smu->smu_power.power_gate.vcn_gated, 1);
> atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1);
> @@ -1267,10 +1268,12 @@ static int smu_sw_init(struct amdgpu_ip_block 
> *ip_block)
> smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
> smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
>
> -   if (smu->is_apu)
> +   if (smu->is_apu) {
> smu->workload_mask = 1 << 
> smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
> -   else
> +   } else {
> smu->workload_mask = 1 << 
> smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
> +   smu->user_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> +   }
>
> smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> @@ -2346,11 +2349,13 @@ static int smu_switch_power_profile(void *handle,
>
> if (!en) {
> smu->workload_mask &= ~(1 << smu->workload_prority[type]);
> +   smu->driver_workload_mask &= ~(1 << 
> smu->workload_prority[type]);
> index = fls(smu->workload_mask);
> index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 
> : 0;
> workload[0] = smu->workload_setting[index];
> } else {
> smu->workload_mask |= (1 << smu->workload_prority[type]);
> +   smu->driver_workload_mask |= (1 << 
> smu->workload_prority[type]);
> index = fls(smu->workload_mask);
> index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
> workload[0] = smu->workload_setting[index];
> @@ -3045,12 +3050,16 @@ static int smu_set_power_profile_mode(void *handle,
>   uint32_t param_size)
>  {
> struct smu_context *smu = handle;
> +   int ret;
>
> if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
> !smu->ppt_funcs->set_power_profile_mode)
> return -EOPNOTSUPP;
> +   smu->user_profile_mode_setting = true;
> +   ret = smu_bump_power_profile_mode(smu, param, param_size);
> +   smu->user_profile_mode_setting = false;
>
> -   return smu_bump_power_profile_mode(smu, param, param_size);
> +   return ret;
>  }
>
>  static int smu_get_fan_control_mode(void *handle, u32 *fan_mode)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 8bb32b3f0d9c..e43b23dd3457 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -557,10 +557,11 @@ struct smu_context {
> bool disable_uclk_switch;
>
> uint32_t workload_mask;
> +   uint32_t driver_workload_mask;
> uint32_t workload_prority[WORKLOAD_POLICY_MAX];
> uint32_t workload_setting[WORKLOAD_POLICY_MAX];
> uint32_t power_profile_mode;
> -   uint32_t default_power_profile_mode;
> +   uint32_

[PATCH] drm/amd/amdgpu: limit single process inside MES

2024-10-23 Thread Shaoyun Liu

This is for MES to limit only one process for the user queues

Signed-off-by: Shaoyun Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 15 +++
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c  | 11 +++
 5 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index e96984c53e72..72e38d621a29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct 
device *dev,
if (adev->enforce_isolation[i] && !partition_values[i]) {
/* Going from enabled to disabled */
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
+   amdgpu_mes_set_enforce_isolation(adev, i, false);
} else if (!adev->enforce_isolation[i] && partition_values[i]) {
/* Going from disabled to enabled */
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+   amdgpu_mes_set_enforce_isolation(adev, i, true);
}
adev->enforce_isolation[i] = partition_values[i];
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index bf584e9bcce4..dfc7d320fcbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct 
amdgpu_device *adev)
return is_supported;
 }
 
+/* Fix me -- node_id is used to identify the correct MES instances in the 
future */
+int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
node_id, bool enable)
+{
+   struct mes_misc_op_input op_input = {0};
+   int r;
+
+   op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+   op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+   if (!adev->mes.funcs->misc_op) {
+   dev_err(adev->dev,"mes change config is not supported!\n");
+   r = -EINVAL;
+   goto error;
+   }
+
+   r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+   if (r)
+   dev_err(adev->dev, "failed to change_config.\n");
+
+error:
+   return r;
+
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 79f13d7e5e16..91bff6443c05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -311,6 +311,7 @@ enum mes_misc_opcode {
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
+   MES_MISC_OP_CHANGE_CONFIG,
 };
 
 struct mes_misc_op_input {
@@ -349,6 +350,21 @@ struct mes_misc_op_input {
uint32_t tcp_watch_cntl[4];
uint32_t trap_en;
} set_shader_debugger;
+
+   struct {
+   union {
+   struct {
+   uint32_t limit_single_process : 1;
+   uint32_t enable_hws_logging_buffer : 1;
+   uint32_t reserved : 30;
+   };
+   uint32_t all;
+   } option;
+   struct {
+   uint32_t tdr_level;
+   uint32_t tdr_delay;
+   } tdr_config;
+   } change_config;
};
 };
 
@@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
 }
 
 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
node_id, bool enable);
+
 #endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 57db0c006c8f..c621ba805433 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -644,6 +644,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,

sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
misc_pkt.set_shader_debugger.trap_en = 
input->set_shader_debugger.trap_en;
break;
+   case MES_MISC_OP_CHANGE_CONFIG:
+   if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) 
{
+   dev_err(adev->dev, "MES FW versoin must be larger than 
0x63 to support limit single process feature.\n");
+

RE: [PATCH 1/2] drm/amd/display: increase max surfaces in line with planes and streams

2024-10-23 Thread Mohamed, Zaeem

[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

A patch addressing this will be sent out soon.

Thanks,
Zaeem

-Original Message-
From: Melissa Wen 
Sent: Tuesday, October 22, 2024 11:58 AM
To: Mohamed, Zaeem ; Wentland, Harry 
; Li, Sun peng (Leo) ; Siqueira, 
Rodrigo ; Deucher, Alexander 
; Koenig, Christian ; Pan, 
Xinhui ; airl...@gmail.com; dan...@ffwll.ch
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amd/display: increase max surfaces in line with 
planes and streams

Hi,

Gentle ping to land the fix for the kernel crash.
If faster, I can send a new version increasing max surfaces to 4 as previously 
discussed.

There are now two bug reports for the same issue:
- https://gitlab.freedesktop.org/drm/amd/-/issues/3693
- https://gitlab.freedesktop.org/drm/amd/-/issues/3594

Best Regards,

Melissa


On 27/09/2024 15:20, Melissa Wen wrote:
> Hi Zaeem,
>
> Thanks for explaining their relationship.
>
> So IIUC, current DM implementation for dc_surface_updates array is
> wrong, since it's taking MAX_SURFACES (=3) for allocation but
> MAX_PLANES (=6) as the upper bound of size of the dc_surface_updates
> array, as you can see in this allocation and after an iteration from 0
> to `plane_count`:
>
> https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/disp
> lay/amdgpu_dm/amdgpu_dm.c#L9861
>
>
> Another question to understand what would be the right fix for the
> implementation above-mentioned: with the cursor overlay mode, it means
> we are using one of the overlay planes for this cursor overlay mode
> (one from the "max_slave_planes") or this cursor plane is an extra
> plane, which means, for some drivers, one primary plane + two overlay
> planes + one plane for cursor overlay mode (max 4 planes) ?
>
> BR,
>
> Melissa
>
> On 27/09/2024 14:40, Mohamed, Zaeem wrote:
>> [AMD Official Use Only - AMD Internal Distribution Only]
>>
>> Hi Melissa,
>>
>> MAX_SURFACE_NUM and MAX_PLANES both represent the upper bound of
>> planes that are supported by HW. It is best to replace
>> MAX_SURFACE_NUM with MAX_PLANES to remove redundancy. MAX_SURFACES is
>> used to represent the upper bound of surfaces that can be piped to a
>> single CRTC. Keep MAX_SURFACES as is to keep stack size down, and
>> replace MAX_SURFACE_NUM with MAX_PLANES.
>>
>> Thanks,
>> Zaeem
>>
>>
>> -Original Message-
>> From: dri-devel  On Behalf
>> Of Melissa Wen
>> Sent: Wednesday, September 25, 2024 11:37 AM
>> To: harry.wentl...@amd.com; sunpeng...@amd.com;
>> rodrigo.sique...@amd.com; alexander.deuc...@amd.com;
>> christian.koe...@amd.com; xinhui@amd.com; airl...@gmail.com;
>> dan...@ffwll.ch
>> Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
>> Subject: [PATCH 1/2] drm/amd/display: increase max surfaces in line
>> with planes and streams
>>
>> 091a97e542cf ("drm/amd/display: Fix warning. Set MAX_SURFACES to 3")
>> reduced the max number of surfaces since, at that time, there was no
>> use for more. However, HW and driver evolves and there are now driver
>> versions that allow two overlay planes (max_slave_planes). Moreover,
>> commit 3cfd03b79425 ("drm/amd/display: update max streams per
>> surface") states 6 is the max surfaces supported asics can have.
>> Therefore, update MAX_SURFACES to match MAX_SURFACE_NUM, MAX_PLANES
>> and MAX_STREAMS.
>>
>> It also addresses array-index-out-of-bounds reported in the link.
>>
>> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3594
>> Signed-off-by: Melissa Wen 
>> ---
>>   drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h
>> b/drivers/gpu/drm/amd/display/dc/dc.h
>> index 3992ad73165b..08b00b263533 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dc.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dc.h
>> @@ -57,7 +57,7 @@ struct dmub_notification;
>>
>>   #define DC_VER "3.2.301"
>>
>> -#define MAX_SURFACES 3
>> +#define MAX_SURFACES 6
>>   #define MAX_PLANES 6
>>   #define MAX_STREAMS 6
>>   #define MIN_VIEWPORT_SIZE 12
>> --
>> 2.45.2
>>
>

Re: [PATCH] drm/amd/amdgpu: limit single process inside MES

2024-10-23 Thread Alex Deucher

On Wed, Oct 23, 2024 at 2:08 PM Shaoyun Liu  wrote:
>
> This is for MES to limit only one process for the user queues
>
> Signed-off-by: Shaoyun Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 11 +++
>  drivers/gpu/drm/amd/amdgpu/mes_v12_0.c  | 11 +++
>  5 files changed, 67 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index e96984c53e72..72e38d621a29 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct 
> device *dev,
> if (adev->enforce_isolation[i] && !partition_values[i]) {
> /* Going from enabled to disabled */
> amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
> +   amdgpu_mes_set_enforce_isolation(adev, i, false);
> } else if (!adev->enforce_isolation[i] && 
> partition_values[i]) {
> /* Going from disabled to enabled */
> amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
> +   amdgpu_mes_set_enforce_isolation(adev, i, true);
> }
> adev->enforce_isolation[i] = partition_values[i];
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index bf584e9bcce4..29b6a2baae4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct 
> amdgpu_device *adev)
> return is_supported;
>  }
>
> +/* Fix me -- node_id is used to identify the correct MES instances in the 
> future */
> +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
> node_id, bool enable)
> +{
> +   struct mes_misc_op_input op_input = {0};
> +   int r;
> +
> +   op_input.op = MES_MISC_OP_CHANGE_CONFIG;
> +   op_input.change_config.option.limit_single_process = enable ? 1 : 0;
> +
> +   if (!adev->mes.funcs->misc_op) {
> +   DRM_ERROR("mes change config is not supported!\n");

Please use dev_err() so it's clear which GPU the error is coming from
in a multi-GPU system.

> +   r = -EINVAL;
> +   goto error;
> +   }
> +
> +   r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> +   if (r)
> +   DRM_ERROR("failed to change_config.\n");

dev_err()

> +
> +error:
> +   return r;
> +
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>
>  static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void 
> *unused)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 79f13d7e5e16..91bff6443c05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -311,6 +311,7 @@ enum mes_misc_opcode {
> MES_MISC_OP_WRM_REG_WAIT,
> MES_MISC_OP_WRM_REG_WR_WAIT,
> MES_MISC_OP_SET_SHADER_DEBUGGER,
> +   MES_MISC_OP_CHANGE_CONFIG,
>  };
>
>  struct mes_misc_op_input {
> @@ -349,6 +350,21 @@ struct mes_misc_op_input {
> uint32_t tcp_watch_cntl[4];
> uint32_t trap_en;
> } set_shader_debugger;
> +
> +   struct {
> +   union {
> +   struct {
> +   uint32_t limit_single_process : 1;
> +   uint32_t enable_hws_logging_buffer : 
> 1;
> +   uint32_t reserved : 30;
> +   };
> +   uint32_t all;
> +   } option;
> +   struct {
> +   uint32_t tdr_level;
> +   uint32_t tdr_delay;
> +   } tdr_config;
> +   } change_config;
> };
>  };
>
> @@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes 
> *mes)
>  }
>
>  bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
> +
> +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
> node_id, bool enable);
> +
>  #endif /* __AMDGPU_MES_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 57db0c006c8f..1d6de7bced48 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -644,6 +644,14 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
> 
> sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
>

[PATCH 2/3] drm/amdgpu: Adjust debugfs eviction and IB access permissions

2024-10-23 Thread Alex Deucher

Users should not be able to run these.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index e44a44405266..2ef7bcfdb2e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2108,11 +2108,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
 
-   debugfs_create_file("amdgpu_evict_vram", 0444, root, adev,
+   debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
&amdgpu_evict_vram_fops);
-   debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev,
+   debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev,
&amdgpu_evict_gtt_fops);
-   debugfs_create_file("amdgpu_test_ib", 0444, root, adev,
+   debugfs_create_file("amdgpu_test_ib", 0400, root, adev,
&amdgpu_debugfs_test_ib_fops);
debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
&amdgpu_debugfs_vm_info_fops);
-- 
2.46.2

[PATCH 3/3] drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read()

2024-10-23 Thread Alex Deucher

Avoid a possible buffer overflow if size is larger than 4K.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 2ef7bcfdb2e2..3a118645b4bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, 
char __user *buf, siz
int r;
uint32_t *data, x;
 
-   if (size & 0x3 || *pos & 0x3)
+   if (size > 4096 || size & 0x3 || *pos & 0x3)
return -EINVAL;
 
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
-- 
2.46.2

[PATCH 1/3] drm/amdgpu: Adjust debugfs register access permissions

2024-10-23 Thread Alex Deucher

Regular users shouldn't have read access.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 6e6092916d4e..e44a44405266 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1654,7 +1654,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
- S_IFREG | 0444, root,
+ S_IFREG | 0400, root,
  adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
-- 
2.46.2

RE: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete

2024-10-23 Thread Liang, Prike

[Public]

> From: Lazar, Lijo 
> Sent: Wednesday, October 23, 2024 6:55 PM
> To: Liang, Prike ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
>
>
>
> On 10/14/2024 1:19 PM, Prike Liang wrote:
> > To check the status of S3 suspend completion, use the PM core
> > pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore,
> > clean up the AMDGPU driver's private flag suspend_complete.
> >
> > Signed-off-by: Prike Liang 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 --
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
> >  drivers/gpu/drm/amd/amdgpu/soc15.c  | 7 ++-
> >  drivers/gpu/drm/amd/amdgpu/soc21.c  | 2 +-
> >  5 files changed, 5 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 48c9b9b06905..9b35763ae0a7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -,8 +,6 @@ struct amdgpu_device {
> > boolin_s3;
> > boolin_s4;
> > boolin_s0ix;
> > -   /* indicate amdgpu suspension status */
> > -   boolsuspend_complete;
> >
> > enum pp_mp1_state   mp1_state;
> > struct amdgpu_doorbell_index doorbell_index; diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 680e44fdee6e..78972151b970 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev)
> > struct drm_device *drm_dev = dev_get_drvdata(dev);
> > struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >
> > -   adev->suspend_complete = false;
> > if (amdgpu_acpi_is_s0ix_active(adev))
> > adev->in_s0ix = true;
> > else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@
> > static int amdgpu_pmops_suspend_noirq(struct device *dev)
> > struct drm_device *drm_dev = dev_get_drvdata(dev);
> > struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >
> > -   adev->suspend_complete = true;
> > if (amdgpu_acpi_should_gpu_reset(adev))
> > return amdgpu_asic_reset(adev);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index be320d753507..ba8e66744376 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
> *adev)
> >  * confirmed that the APU gfx10/gfx11 needn't such update.
> >  */
> > if (adev->flags & AMD_IS_APU &&
> > -   adev->in_s3 && !adev->suspend_complete) {
> > -   DRM_INFO(" Will skip the CSB packet resubmit\n");
> > +   adev->in_s3 && !pm_resume_via_firmware()) {
> > +   DRM_INFO("Will skip the CSB packet resubmit\n");
> > return 0;
> > }
> > r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> > b/drivers/gpu/drm/amd/amdgpu/soc15.c
> > index 12ff6cf568dc..d9d11131a744 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> > @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct
> amdgpu_device *adev)
> >  *performing pm core test.
> >  */
> > if (adev->flags & AMD_IS_APU && adev->in_s3 &&
> > -   !pm_resume_via_firmware()) {
> > -   adev->suspend_complete = false;
> > +   !pm_resume_via_firmware())
> > return true;
> > -   } else {
> > -   adev->suspend_complete = true;
> > +   else
> > return false;
> > -   }
> >  }
> >
> >  static int soc15_asic_reset(struct amdgpu_device *adev) diff --git
> > a/drivers/gpu/drm/amd/amdgpu/soc21.c
> > b/drivers/gpu/drm/amd/amdgpu/soc21.c
> > index c4b950e75133..7a47a21ef00f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/soc21.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
> > @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct
> amdgpu_device *adev)
> >  * 2) S3 suspend got aborted and TOS is active.
> >  */
> > if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
> > -   !adev->suspend_complete) {
> > +   !pm_resume_via_firmware()) {
>
> Looks like this will cover only ACPI based systems. Not sure if that 
> assumption is
> valid for dGPU cases.
>
> Thanks,
> Lijo

Yes, the pm_set_resume_via_firmware() function is only called during the 
ACPI_STATE_S3 suspend process. However, ACPI-enabled systems are popular in the 
desktop world. If there are concerns about ACPI configuration, one option could 
be to check if the dGPU needs a re

Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete

2024-10-23 Thread Lazar, Lijo




On 10/24/2024 8:24 AM, Liang, Prike wrote:
> [Public]
> 
>> From: Lazar, Lijo 
>> Sent: Wednesday, October 23, 2024 6:55 PM
>> To: Liang, Prike ; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander 
>> Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
>>
>>
>>
>> On 10/14/2024 1:19 PM, Prike Liang wrote:
>>> To check the status of S3 suspend completion, use the PM core
>>> pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore,
>>> clean up the AMDGPU driver's private flag suspend_complete.
>>>
>>> Signed-off-by: Prike Liang 
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 --
>>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
>>>  drivers/gpu/drm/amd/amdgpu/soc15.c  | 7 ++-
>>>  drivers/gpu/drm/amd/amdgpu/soc21.c  | 2 +-
>>>  5 files changed, 5 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 48c9b9b06905..9b35763ae0a7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -,8 +,6 @@ struct amdgpu_device {
>>> boolin_s3;
>>> boolin_s4;
>>> boolin_s0ix;
>>> -   /* indicate amdgpu suspension status */
>>> -   boolsuspend_complete;
>>>
>>> enum pp_mp1_state   mp1_state;
>>> struct amdgpu_doorbell_index doorbell_index; diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 680e44fdee6e..78972151b970 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device *dev)
>>> struct drm_device *drm_dev = dev_get_drvdata(dev);
>>> struct amdgpu_device *adev = drm_to_adev(drm_dev);
>>>
>>> -   adev->suspend_complete = false;
>>> if (amdgpu_acpi_is_s0ix_active(adev))
>>> adev->in_s0ix = true;
>>> else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@
>>> static int amdgpu_pmops_suspend_noirq(struct device *dev)
>>> struct drm_device *drm_dev = dev_get_drvdata(dev);
>>> struct amdgpu_device *adev = drm_to_adev(drm_dev);
>>>
>>> -   adev->suspend_complete = true;
>>> if (amdgpu_acpi_should_gpu_reset(adev))
>>> return amdgpu_asic_reset(adev);
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index be320d753507..ba8e66744376 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
>> *adev)
>>>  * confirmed that the APU gfx10/gfx11 needn't such update.
>>>  */
>>> if (adev->flags & AMD_IS_APU &&
>>> -   adev->in_s3 && !adev->suspend_complete) {
>>> -   DRM_INFO(" Will skip the CSB packet resubmit\n");
>>> +   adev->in_s3 && !pm_resume_via_firmware()) {
>>> +   DRM_INFO("Will skip the CSB packet resubmit\n");
>>> return 0;
>>> }
>>> r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> index 12ff6cf568dc..d9d11131a744 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct
>> amdgpu_device *adev)
>>>  *performing pm core test.
>>>  */
>>> if (adev->flags & AMD_IS_APU && adev->in_s3 &&
>>> -   !pm_resume_via_firmware()) {
>>> -   adev->suspend_complete = false;
>>> +   !pm_resume_via_firmware())
>>> return true;
>>> -   } else {
>>> -   adev->suspend_complete = true;
>>> +   else
>>> return false;
>>> -   }
>>>  }
>>>
>>>  static int soc15_asic_reset(struct amdgpu_device *adev) diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/soc21.c
>>> b/drivers/gpu/drm/amd/amdgpu/soc21.c
>>> index c4b950e75133..7a47a21ef00f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/soc21.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
>>> @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct
>> amdgpu_device *adev)
>>>  * 2) S3 suspend got aborted and TOS is active.
>>>  */
>>> if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
>>> -   !adev->suspend_complete) {
>>> +   !pm_resume_via_firmware()) {
>>
>> Looks like this will cover only ACPI based systems. Not sure if that 
>> assumption is
>> valid for dGPU cases.
>>
>> Thanks,
>> Lijo
> 
> Yes, the pm_set_resume_via_firmware() function is only called during the 
> ACPI_STATE_S3 suspend process. However, ACPI-enabled systems are popular in 
> the desktop world. If there are concerns a

[PATCH] drm/amdgpu: skip pci_restore_state under sriov during device init

2024-10-23 Thread Victor Zhao

during device init, under sriov, pci_restore_state happens after
fullaccess released, and it can have race condition with mmio protection
enable from host side.

Since msix was toggled during pci_restore_state, if mmio protection
happens during this time, guest side msix will not be properly
programmed and leading to missing interrupts.

So skip pci_restore_state during device init.

Signed-off-by: Victor Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6c0ff1c2ae4c..52803cd91ef5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4524,7 +4524,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
dev_err(adev->dev, "amdgpu_pmu_init failed\n");
 
/* Have stored pci confspace at hand for restore in sudden PCI error */
-   if (amdgpu_device_cache_pci_state(adev->pdev))
+   if (!amdgpu_sriov_vf(adev) && amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(pdev);
 
/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
-- 
2.34.1

RE: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete

2024-10-23 Thread Liang, Prike

[AMD Official Use Only - AMD Internal Distribution Only]

> From: Lazar, Lijo 
> Sent: Thursday, October 24, 2024 11:39 AM
> To: Liang, Prike ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
>
>
>
> On 10/24/2024 8:24 AM, Liang, Prike wrote:
> > [Public]
> >
> >> From: Lazar, Lijo 
> >> Sent: Wednesday, October 23, 2024 6:55 PM
> >> To: Liang, Prike ; amd-gfx@lists.freedesktop.org
> >> Cc: Deucher, Alexander 
> >> Subject: Re: [PATCH v2 2/2] drm/amdgpu: clean up the suspend_complete
> >>
> >>
> >>
> >> On 10/14/2024 1:19 PM, Prike Liang wrote:
> >>> To check the status of S3 suspend completion, use the PM core
> >>> pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore,
> >>> clean up the AMDGPU driver's private flag suspend_complete.
> >>>
> >>> Signed-off-by: Prike Liang 
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 --
> >>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
> >>>  drivers/gpu/drm/amd/amdgpu/soc15.c  | 7 ++-
> >>>  drivers/gpu/drm/amd/amdgpu/soc21.c  | 2 +-
> >>>  5 files changed, 5 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index 48c9b9b06905..9b35763ae0a7 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -,8 +,6 @@ struct amdgpu_device {
> >>> boolin_s3;
> >>> boolin_s4;
> >>> boolin_s0ix;
> >>> -   /* indicate amdgpu suspension status */
> >>> -   boolsuspend_complete;
> >>>
> >>> enum pp_mp1_state   mp1_state;
> >>> struct amdgpu_doorbell_index doorbell_index; diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> index 680e44fdee6e..78972151b970 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> @@ -2501,7 +2501,6 @@ static int amdgpu_pmops_suspend(struct device
> *dev)
> >>> struct drm_device *drm_dev = dev_get_drvdata(dev);
> >>> struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >>>
> >>> -   adev->suspend_complete = false;
> >>> if (amdgpu_acpi_is_s0ix_active(adev))
> >>> adev->in_s0ix = true;
> >>> else if (amdgpu_acpi_is_s3_active(adev)) @@ -2516,7 +2515,6 @@
> >>> static int amdgpu_pmops_suspend_noirq(struct device *dev)
> >>> struct drm_device *drm_dev = dev_get_drvdata(dev);
> >>> struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >>>
> >>> -   adev->suspend_complete = true;
> >>> if (amdgpu_acpi_should_gpu_reset(adev))
> >>> return amdgpu_asic_reset(adev);
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>> index be320d753507..ba8e66744376 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>> @@ -3276,8 +3276,8 @@ static int gfx_v9_0_cp_gfx_start(struct
> >>> amdgpu_device
> >> *adev)
> >>>  * confirmed that the APU gfx10/gfx11 needn't such update.
> >>>  */
> >>> if (adev->flags & AMD_IS_APU &&
> >>> -   adev->in_s3 && !adev->suspend_complete) {
> >>> -   DRM_INFO(" Will skip the CSB packet resubmit\n");
> >>> +   adev->in_s3 && !pm_resume_via_firmware()) {
> >>> +   DRM_INFO("Will skip the CSB packet resubmit\n");
> >>> return 0;
> >>> }
> >>> r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 +
> >>> 3); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>> index 12ff6cf568dc..d9d11131a744 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>> @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct
> >> amdgpu_device *adev)
> >>>  *performing pm core test.
> >>>  */
> >>> if (adev->flags & AMD_IS_APU && adev->in_s3 &&
> >>> -   !pm_resume_via_firmware()) {
> >>> -   adev->suspend_complete = false;
> >>> +   !pm_resume_via_firmware())
> >>> return true;
> >>> -   } else {
> >>> -   adev->suspend_complete = true;
> >>> +   else
> >>> return false;
> >>> -   }
> >>>  }
> >>>
> >>>  static int soc15_asic_reset(struct amdgpu_device *adev) diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/soc21.c
> >>> b/drivers/gpu/drm/amd/amdgpu/soc21.c
> >>> index c4b950e75133..7a47a21ef00f 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/soc21.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
> >>> @@ -904,7 +904,7 @@ static bool soc21_need_reset_on_resume(struct
> >> amdgpu_device *adev)
> >>>  * 2) S3 suspend got aborted and TOS is acti

37 matches

Mail list logo