RE: [PATCH 2/2] drm/amdgpu: Add support for CPERs on virtualization

2025-02-27 Thread Zhang, Hawking
[AMD Official Use Only - AMD Internal Distribution Only]


+ @Liu, Xiang(Dean)/@Zhou1, 
Tao for the code review

+   if (amdgpu_sriov_vf(adev))
+   debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+&amdgpu_debugfs_virt_ring_fops,
+ring->ring_size + 12);
+   else
+   debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+&amdgpu_debugfs_ring_fops,
+ring->ring_size + 12);

Hi Tony,

Is there any reason the VF requires a separate file system node? Is it because 
the VF has its own CPER ring? If so, can you please check if the VF can reuse 
the CPER created for bare-metal?

Regards,
Hawking

-Original Message-
From: Yi, Tony 
Sent: Thursday, February 27, 2025 23:12
To: Yi, Tony ; Skvortsov, Victor ; 
amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Luo, 
Zhigang 
Cc: Yi, Tony 
Subject: [PATCH 2/2] drm/amdgpu: Add support for CPERs on virtualization

Add support for CPERs on VFs.

VFs do not receive PMFW messages directly; as such, they need to query them 
from the host. To avoid hitting host event guard, CPER queries need to be rate 
limited. CPER queries share the same RAS telemetry buffer as error count query, 
so a mutex protecting the shared buffer was added as well.

For readability, the amdgpu_detect_virtualization was refactored into multiple 
individual functions.

Signed-off-by: Tony Yi mailto:tony...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   |  31 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 138 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  18 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  14 +++
 5 files changed, 195 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5e1d8f0039d0..198d29faa754 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3099,7 +3099,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)

amdgpu_fru_get_product_info(adev);

-   r = amdgpu_cper_init(adev);
+   if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+   r = amdgpu_cper_init(adev);

 init_failed:

@@ -4335,10 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 * for throttling interrupt) = 60 seconds.
 */
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
-   ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1);

ratelimit_set_flags(&adev->throttling_logging_rs, 
RATELIMIT_MSG_ON_RELEASE);
-   ratelimit_set_flags(&adev->virt.ras_telemetry_rs, 
RATELIMIT_MSG_ON_RELEASE);

/* Registers mapping */
/* TODO: block userspace mapping of io register */ @@ -4370,7 +4369,7 
@@ int amdgpu_device_init(struct amdgpu_device *adev,
return -ENOMEM;

/* detect hw virtualization here */
-   amdgpu_detect_virtualization(adev);
+   amdgpu_virt_init(adev);

amdgpu_device_get_pcie_info(adev);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 81a7d4faac9c..d55c8b7fdb59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -578,12 +578,32 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, 
char __user *buf,
return result;
 }

+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+   size_t size, loff_t *pos)
+{
+   struct amdgpu_ring *ring = file_inode(f)->i_private;
+
+   if (*pos & 3 || size & 3)
+   return -EINVAL;
+
+   if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+   amdgpu_virt_req_ras_cper_dump(ring->adev, false);
+
+   return amdgpu_debugfs_ring_read(f, buf, size, pos); }
+
 static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
 };

+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+   .owner = THIS_MODULE,
+   .read = amdgpu_debugfs_virt_ring_read,
+   .llseek = default_llseek
+};
+
 static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
   size_t size, loff_t *pos)
 {
@@ -671,9 +691,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];

sprintf(name, "amdgpu_ring_%s", ring->name);
-   debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
-&amdgpu_debugfs_ring_fops,
-ring->ring_size + 12);
+   if (amdgpu_sriov_vf(adev))
+   deb

RE: [PATCH] drm/amdgpu: Free CPER entry after committing to ring

2025-02-27 Thread Zhang, Hawking
[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Liu, Xiang(Dean) 
Sent: Friday, February 28, 2025 11:12
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; 
Chai, Thomas ; Liu, Xiang(Dean) 
Subject: [PATCH] drm/amdgpu: Free CPER entry after committing to ring

Free CPER entry when it's committed to CPER ring to avoid memory leak.

Signed-off-by: Xiang Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index 15cd0a007b71..0415ed222342 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -301,6 +301,7 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device 
*adev,
return ret;

amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+   kfree(fatal);

return 0;
 }
@@ -323,6 +324,7 @@ int amdgpu_cper_generate_bp_threshold_record(struct 
amdgpu_device *adev)
return ret;

amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+   kfree(bp_threshold);

return 0;
 }
@@ -399,6 +401,7 @@ int amdgpu_cper_generate_ce_records(struct amdgpu_device 
*adev,
}

amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+   kfree(corrected);

return 0;
 }
--
2.34.1



Re: [PATCH 1/2] drm/amdgpu: Use the right struct for VCN v5.0.1

2025-02-27 Thread Lazar, Lijo


On 2/26/2025 1:01 PM, Lijo Lazar wrote:
> VCN IP versions >= 5.0 uses VCN5 fw shared struct.
> 
> Signed-off-by: Lijo Lazar 
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
> index d29e8d685194..7ef83c9346e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
> @@ -153,7 +153,7 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block 
> *ip_block)
>  
>   if (drm_dev_enter(adev_to_drm(adev), &idx)) {
>   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> - volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> + volatile struct amdgpu_vcn5_fw_shared *fw_shared;
>  
>   fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
>   fw_shared->present_flag_0 = 0;
> @@ -341,7 +341,7 @@ static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst 
> *vinst)
>   upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
>   WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
>   WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
> - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
> + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
>  }
>  
>  /**
> @@ -451,7 +451,7 @@ static void vcn_v5_0_1_mc_resume_dpg_mode(struct 
> amdgpu_vcn_inst *vinst,
>   VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
>   WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
>   VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
> - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, 
> indirect);
> + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, 
> indirect);
>  
>   /* VCN global tiling registers */
>   WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
> @@ -493,7 +493,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct 
> amdgpu_vcn_inst *vinst,
>  {
>   struct amdgpu_device *adev = vinst->adev;
>   int inst_idx = vinst->inst;
> - volatile struct amdgpu_vcn4_fw_shared *fw_shared =
> + volatile struct amdgpu_vcn5_fw_shared *fw_shared =
>   adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
>   struct amdgpu_ring *ring;
>   int vcn_inst;
> @@ -602,7 +602,7 @@ static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)
>  {
>   struct amdgpu_device *adev = vinst->adev;
>   int i = vinst->inst;
> - volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> + volatile struct amdgpu_vcn5_fw_shared *fw_shared;
>   struct amdgpu_ring *ring;
>   uint32_t tmp;
>   int j, k, r, vcn_inst;
> @@ -780,7 +780,7 @@ static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)
>  {
>   struct amdgpu_device *adev = vinst->adev;
>   int i = vinst->inst;
> - volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> + volatile struct amdgpu_vcn5_fw_shared *fw_shared;
>   uint32_t tmp;
>   int r = 0, vcn_inst;
>  



[PATCH] drm/amdgpu: Free CPER entry after committing to ring

2025-02-27 Thread Xiang Liu
Free CPER entry when it's committed to CPER ring to avoid memory leak.

Signed-off-by: Xiang Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index 15cd0a007b71..0415ed222342 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -301,6 +301,7 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device 
*adev,
return ret;
 
amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+   kfree(fatal);
 
return 0;
 }
@@ -323,6 +324,7 @@ int amdgpu_cper_generate_bp_threshold_record(struct 
amdgpu_device *adev)
return ret;
 
amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+   kfree(bp_threshold);
 
return 0;
 }
@@ -399,6 +401,7 @@ int amdgpu_cper_generate_ce_records(struct amdgpu_device 
*adev,
}
 
amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+   kfree(corrected);
 
return 0;
 }
-- 
2.34.1



RE: [PATCH 1/2] drm/amdgpu: Use the right struct for VCN v5.0.1

2025-02-27 Thread Zhang, Hawking
[AMD Official Use Only - AMD Internal Distribution Only]

The series is

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Lazar, Lijo 
Sent: Wednesday, February 26, 2025 15:32
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Deucher, Alexander 
; Liu, Leo ; Jiang, Sonny 

Subject: [PATCH 1/2] drm/amdgpu: Use the right struct for VCN v5.0.1

VCN IP versions >= 5.0 uses VCN5 fw shared struct.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index d29e8d685194..7ef83c9346e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -153,7 +153,7 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block 
*ip_block)

if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-   volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+   volatile struct amdgpu_vcn5_fw_shared *fw_shared;

fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
@@ -341,7 +341,7 @@ static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst 
*vinst)
upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
-   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
 }

 /**
@@ -451,7 +451,7 @@ static void vcn_v5_0_1_mc_resume_dpg_mode(struct 
amdgpu_vcn_inst *vinst,
VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
-   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, 
indirect);
+   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0,
+indirect);

/* VCN global tiling registers */
WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( @@ -493,7 +493,7 
@@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,  {
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
-   volatile struct amdgpu_vcn4_fw_shared *fw_shared =
+   volatile struct amdgpu_vcn5_fw_shared *fw_shared =
adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
int vcn_inst;
@@ -602,7 +602,7 @@ static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)  
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
-   volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+   volatile struct amdgpu_vcn5_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int j, k, r, vcn_inst;
@@ -780,7 +780,7 @@ static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)  {
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
-   volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+   volatile struct amdgpu_vcn5_fw_shared *fw_shared;
uint32_t tmp;
int r = 0, vcn_inst;

--
2.25.1



[PATCH] drm/amdkfd: remove unused debug gws support status variable

2025-02-27 Thread Jonathan Kim
Remove unused declaration of gws_debug_workaround.

Signed-off-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 59619f794b6b..43950f3e6672 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -289,7 +289,6 @@ struct kfd_node {
 
/* Global GWS resource shared between processes */
void *gws;
-   bool gws_debug_workaround;
 
/* Clients watching SMI events */
struct list_head smi_clients;
-- 
2.34.1



Re: [PATCH 2/6] drm/amdgpu: add dce_v6_0_soft_reset() to DCE6

2025-02-27 Thread Alexandre Demers
On Thu, Feb 27, 2025 at 2:01 PM Alex Deucher  wrote:
>
> On Thu, Feb 27, 2025 at 1:52 PM Alexandre Demers
>  wrote:
> >
> > On Thu, Feb 27, 2025 at 9:23 AM Alex Deucher  wrote:
> > >
> > > On Thu, Feb 27, 2025 at 12:49 AM Alexandre Demers
> > >  wrote:
> > > >
> > > > DCE6 was missing soft reset, but it was easily identifiable under 
> > > > radeon.
> > > > This should be it, pretty much as it is done under DCE8 and DCE10.
> > > >
> > > > Signed-off-by: Alexandre Demers 
> > > > ---
> > > >  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 62 ---
> > > >  1 file changed, 57 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> > > > b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > index bd763fde1c50..254cb73324c6 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > @@ -371,27 +371,58 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct 
> > > > amdgpu_device *adev)
> > > > return mmDC_GPIO_HPD_A;
> > > >  }
> > > >
> > > > +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
> > > > +{
> > > > +   u32 crtc_hung = 0;
> > > > +   u32 crtc_status[6];
> > > > +   u32 i, j, tmp;
> > > > +
> > > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > > +   if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & 
> > > > CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
> > > > +   crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT 
> > > > + crtc_offsets[i]);
> > > > +   crtc_hung |= (1 << i);
> > > > +   }
> > > > +   }
> > > > +
> > > > +   for (j = 0; j < 10; j++) {
> > > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > > +   if (crtc_hung & (1 << i)) {
> > > > +   tmp = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > > > crtc_offsets[i]);
> > > > +   if (tmp != crtc_status[i])
> > > > +   crtc_hung &= ~(1 << i);
> > > > +   }
> > > > +   }
> > > > +   if (crtc_hung == 0)
> > > > +   return false;
> > > > +   udelay(100);
> > > > +   }
> > > > +
> > > > +   return true;
> > > > +}
> > > > +
> > > >  static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
> > > >   bool render)
> > > >  {
> > > > if (!render)
> > > > WREG32(mmVGA_RENDER_CONTROL,
> > > > RREG32(mmVGA_RENDER_CONTROL) & 
> > > > VGA_VSTATUS_CNTL);
> > > > -
> > > >  }
> > > >
> > > >  static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
> > > >  {
> > > > +   int num_crtc = 0;
> > > > +
> > > > switch (adev->asic_type) {
> > > > case CHIP_TAHITI:
> > > > case CHIP_PITCAIRN:
> > > > case CHIP_VERDE:
> > > > -   return 6;
> > > > +   num_crtc = 6;
> > > > case CHIP_OLAND:
> > > > -   return 2;
> > > > +   num_crtc = 2;
> > > > default:
> > > > -   return 0;
> > > > +   num_crtc = 0;
> > > > }
> > > > +   return num_crtc;
> > >
> > > Any particular reason for this change?  It just adds an extra variable.
> > >
> > > Alex
> >
> > Just for uniformisation with DCE8 and DCE10. We could also remove the
> > variable and use returns everywhere.
> >
> > Any preferences?
>
> ah, ok. I think the direct returns are cleaner.
>
> Alex

Ok. Should I submit a new version or should this patch go through and
be changed everywhere in a different patch?

Alexandre

>
> > Alexandre
> >
> > >
> > > >  }
> > > >
> > > >  void dce_v6_0_disable_dce(struct amdgpu_device *adev)
> > > > @@ -2846,7 +2877,28 @@ static bool dce_v6_0_is_idle(void *handle)
> > > >
> > > >  static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
> > > >  {
> > > > -   DRM_INFO(": dce_v6_0_soft_reset --- no impl!!\n");
> > > > +   u32 srbm_soft_reset = 0, tmp;
> > > > +   struct amdgpu_device *adev = ip_block->adev;
> > > > +
> > > > +   if (dce_v6_0_is_display_hung(adev))
> > > > +   srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
> > > > +
> > > > +   if (srbm_soft_reset) {
> > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > +   tmp |= srbm_soft_reset;
> > > > +   dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
> > > > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > +
> > > > +   udelay(50);
> > > > +
> > > > +   tmp &= ~srbm_soft_reset;
> > > > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > +
> > > > +   /* Wait a little for things to settle down */
> > > > +   udelay(50);
> >

Re: [PATCH 2/6] drm/amdgpu: add dce_v6_0_soft_reset() to DCE6

2025-02-27 Thread Alexandre Demers
On Thu, Feb 27, 2025 at 2:05 PM Alex Deucher  wrote:
>
> On Thu, Feb 27, 2025 at 2:01 PM Alex Deucher  wrote:
> >
> > On Thu, Feb 27, 2025 at 1:52 PM Alexandre Demers
> >  wrote:
> > >
> > > On Thu, Feb 27, 2025 at 9:23 AM Alex Deucher  
> > > wrote:
> > > >
> > > > On Thu, Feb 27, 2025 at 12:49 AM Alexandre Demers
> > > >  wrote:
> > > > >
> > > > > DCE6 was missing soft reset, but it was easily identifiable under 
> > > > > radeon.
> > > > > This should be it, pretty much as it is done under DCE8 and DCE10.
> > > > >
> > > > > Signed-off-by: Alexandre Demers 
> > > > > ---
> > > > >  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 62 
> > > > > ---
> > > > >  1 file changed, 57 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> > > > > b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > > index bd763fde1c50..254cb73324c6 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > > @@ -371,27 +371,58 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct 
> > > > > amdgpu_device *adev)
> > > > > return mmDC_GPIO_HPD_A;
> > > > >  }
> > > > >
> > > > > +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
> > > > > +{
> > > > > +   u32 crtc_hung = 0;
> > > > > +   u32 crtc_status[6];
> > > > > +   u32 i, j, tmp;
> > > > > +
> > > > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > > > +   if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & 
> > > > > CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
> > > > > +   crtc_status[i] = 
> > > > > RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
> > > > > +   crtc_hung |= (1 << i);
> > > > > +   }
> > > > > +   }
> > > > > +
> > > > > +   for (j = 0; j < 10; j++) {
> > > > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > > > +   if (crtc_hung & (1 << i)) {
> > > > > +   tmp = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > > > > crtc_offsets[i]);
> > > > > +   if (tmp != crtc_status[i])
> > > > > +   crtc_hung &= ~(1 << i);
> > > > > +   }
> > > > > +   }
> > > > > +   if (crtc_hung == 0)
> > > > > +   return false;
> > > > > +   udelay(100);
> > > > > +   }
> > > > > +
> > > > > +   return true;
> > > > > +}
> > > > > +
> > > > >  static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
> > > > >   bool render)
> > > > >  {
> > > > > if (!render)
> > > > > WREG32(mmVGA_RENDER_CONTROL,
> > > > > RREG32(mmVGA_RENDER_CONTROL) & 
> > > > > VGA_VSTATUS_CNTL);
> > > > > -
> > > > >  }
> > > > >
> > > > >  static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
> > > > >  {
> > > > > +   int num_crtc = 0;
> > > > > +
> > > > > switch (adev->asic_type) {
> > > > > case CHIP_TAHITI:
> > > > > case CHIP_PITCAIRN:
> > > > > case CHIP_VERDE:
> > > > > -   return 6;
> > > > > +   num_crtc = 6;
> > > > > case CHIP_OLAND:
> > > > > -   return 2;
> > > > > +   num_crtc = 2;
> > > > > default:
> > > > > -   return 0;
> > > > > +   num_crtc = 0;
> > > > > }
> > > > > +   return num_crtc;
> > > >
> > > > Any particular reason for this change?  It just adds an extra variable.
> > > >
> > > > Alex
> > >
> > > Just for uniformisation with DCE8 and DCE10. We could also remove the
> > > variable and use returns everywhere.
> > >
> > > Any preferences?
> >
> > ah, ok. I think the direct returns are cleaner.
>
> I would maybe split up your patches into maybe 3 logical patch sets:
> one to fix spelling typos and comments, one to make the DCE code more
> uniform across versions, and another to add new DCE6 functionality.
>
> Alex
>
Ok, I'll split them and send new patch sets. Should they be identified as V2?

Alexandre
> >
> > Alex
> >
> > > Alexandre
> > >
> > > >
> > > > >  }
> > > > >
> > > > >  void dce_v6_0_disable_dce(struct amdgpu_device *adev)
> > > > > @@ -2846,7 +2877,28 @@ static bool dce_v6_0_is_idle(void *handle)
> > > > >
> > > > >  static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
> > > > >  {
> > > > > -   DRM_INFO(": dce_v6_0_soft_reset --- no impl!!\n");
> > > > > +   u32 srbm_soft_reset = 0, tmp;
> > > > > +   struct amdgpu_device *adev = ip_block->adev;
> > > > > +
> > > > > +   if (dce_v6_0_is_display_hung(adev))
> > > > > +   srbm_soft_reset |= 
> > > > > SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
> > > > > +
> > > > > +   if (srbm_soft_reset) {
> > > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > > +   tmp |= srbm_soft_reset;
> > > > > 

Re: [PATCH v2] drm/amdkfd: Fix instruction hazard in gfx12 trap handler

2025-02-27 Thread Alex Deucher
On Fri, Feb 7, 2025 at 6:57 PM Jay Cornwall  wrote:
>
> VALU instructions with SGPR source need wait states to avoid hazard
> with SALU using different SGPR.
>
> v2: Eliminate some hazards to reduce code explosion
>
> Signed-off-by: Jay Cornwall 
> Cc: Lancelot Six 

Acked-by: Alex Deucher 

> ---
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 677 ++
>  .../amd/amdkfd/cwsr_trap_handler_gfx12.asm|  82 ++-
>  2 files changed, 404 insertions(+), 355 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index 651660958e5b..0320163b6e74 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -3644,7 +3644,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
>  };
>
>  static const uint32_t cwsr_trap_gfx12_hex[] = {
> -   0xbfa1, 0xbfa0024b,
> +   0xbfa1, 0xbfa002a2,
> 0xb0804009, 0xb8f8f804,
> 0x9178ff78, 0x8c00,
> 0xb8fbf811, 0x8b6eff78,
> @@ -3718,7 +3718,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
> 0x00011677, 0xd761,
> 0x00011a79, 0xd761,
> 0x00011c7e, 0xd761,
> -   0x00011e7f, 0xbefe00ff,
> +   0x00011e7f, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xbefe00ff,
> 0x3fff, 0xbeff0080,
> 0xee0a407a, 0x000c,
> 0x4000, 0xd760007a,
> @@ -3755,38 +3763,46 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
> 0x0200, 0xbef600ff,
> 0x0100, 0x7e000280,
> 0x7e020280, 0x7e040280,
> -   0xbefd0080, 0xbe804ec2,
> -   0xbf94fffe, 0xb8faf804,
> -   0x8b7a847a, 0x91788478,
> -   0x8c787a78, 0xd7610002,
> -   0xfa71, 0x807d817d,
> -   0xd7610002, 0xfa6c,
> -   0x807d817d, 0x917aff6d,
> -   0x8000, 0xd7610002,
> -   0xfa7a, 0x807d817d,
> -   0xd7610002, 0xfa6e,
> -   0x807d817d, 0xd7610002,
> -   0xfa6f, 0x807d817d,
> -   0xd7610002, 0xfa78,
> -   0x807d817d, 0xb8faf811,
> -   0xd7610002, 0xfa7a,
> -   0x807d817d, 0xd7610002,
> -   0xfa7b, 0x807d817d,
> -   0xb8f1f801, 0xd7610002,
> -   0xfa71, 0x807d817d,
> -   0xb8f1f814, 0xd7610002,
> -   0xfa71, 0x807d817d,
> -   0xb8f1f815, 0xd7610002,
> -   0xfa71, 0x807d817d,
> -   0xb8f1f812, 0xd7610002,
> -   0xfa71, 0x807d817d,
> -   0xb8f1f813, 0xd7610002,
> -   0xfa71, 0x807d817d,
> +   0xbe804ec2, 0xbf94fffe,
> +   0xb8faf804, 0x8b7a847a,
> +   0x91788478, 0x8c787a78,
> +   0x917aff6d, 0x8000,
> +   0xd7610002, 0x00010071,
> +   0xd7610002, 0x0001026c,
> +   0xd7610002, 0x0001047a,
> +   0xd7610002, 0x0001066e,
> +   0xd7610002, 0x0001086f,
> +   0xd7610002, 0x00010a78,
> +   0xd7610002, 0x00010e7b,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xd850, 0x,
> +   0xb8faf811, 0xd7610002,
> +   0x00010c7a, 0xb8faf801,
> +   0xd7610002, 0x0001107a,
> +   0xb8faf814, 0xd7610002,
> +   0x0001127a, 0xb8faf815,
> +   0xd7610002, 0x0001147a,
> +   0xb8faf812, 0xd7610002,
> +   0x0001167a, 0xb8faf813,
> +   0xd7610002, 0x0001187a,
> 0xb8faf802, 0xd7610002,
> -   0xfa7a, 0x807d817d,
> -   0xbefa50c1, 0xbfc7,
> -   0xd7610002, 0xfa7a,
> -   0x807d817d, 0xbefe00ff,
> +   0x00011a7a, 0xbefa50c1,
> +   0xbfc7, 0xd7610002,
> +   0x00011c7a, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xd850,
> +   0x, 0xbefe00ff,
> 0x, 0xbeff0080,
> 0xc4068070, 0x008ce802,
> 0x, 0xbefe00c1,
> @@ -3801,331 +3817,358 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
> 0xbe824102, 0xbe844104,
> 0xbe864106, 0xbe884108,
> 0xbe8a410a, 0xbe8c410c,
> -   0xbe8e410e, 0xd7610002,
> -   0xf200, 0x80798179,
> -   0xd7610002, 0xf201,
> -   0x80798179, 0xd7610002,
> -   0xf202, 0x80798179,
> -   0xd7610002, 0xf203,
> -   0x80798179, 0xd7610002,
> -   0xf204, 0x80798179,
> -   0xd7610002, 0xf205,
> -   0x80798179, 0xd7610002,
> -   0xf206, 0x80798179,
> -   0xd7610002, 0xf207,
> -   0x80798179, 0xd7610002,
> -   

[PATCH] drm/amdkfd: remove unused debug gws support status variable

2025-02-27 Thread Jonathan Kim
Remove unused declaration of gws_debug_workaround.

Signed-off-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 59619f794b6b..43950f3e6672 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -289,7 +289,6 @@ struct kfd_node {
 
/* Global GWS resource shared between processes */
void *gws;
-   bool gws_debug_workaround;
 
/* Clients watching SMI events */
struct list_head smi_clients;
-- 
2.34.1



RE: [PATCH] amdkfd: initialize svm lists at where they are defined

2025-02-27 Thread Zhu, Lingshan
[Public]

Ping

-Original Message-
From: Zhu, Lingshan 
Sent: Friday, February 21, 2025 5:24 PM
To: Kuehling, Felix ; Deucher, Alexander 

Cc: Huang, Ray ; amd-gfx@lists.freedesktop.org; Zhu, 
Lingshan 
Subject: [PATCH] amdkfd: initialize svm lists at where they are defined

This commit initialized svm lists at where they are defined. This is defensive 
programing for security and consistency.

Initalizing variables ensures that their states are always valid, avoiding 
issues caused by uninitialized variables that could lead to unpredictable 
behaviros.

And we should not assume the callee would always initialize them

Signed-off-by: Zhu Lingshan 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bd3e20d981e0..cbc997449379 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2130,11 +2130,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, 
uint64_t size,

pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);

-   INIT_LIST_HEAD(update_list);
-   INIT_LIST_HEAD(insert_list);
-   INIT_LIST_HEAD(remove_list);
INIT_LIST_HEAD(&new_list);
-   INIT_LIST_HEAD(remap_list);

node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
@@ -3635,6 +3631,11 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
if (r)
return r;

+   INIT_LIST_HEAD(&update_list);
+   INIT_LIST_HEAD(&insert_list);
+   INIT_LIST_HEAD(&remove_list);
+   INIT_LIST_HEAD(&remap_list);
+
svms = &p->svms;

mutex_lock(&process_info->lock);
--
2.47.1



[PATCH 0/4] drm/amd/display: move from kzalloc(size * nr, ...) to kcalloc(nr, size, ...)

2025-02-27 Thread Ethan Carter Edwards
We are trying to get rid of all multiplications from allocation
functions to prevent integer overflows. Here the multiplications are
probably safe, but using kcalloc() is more appropriate and improves
readability. It is also safer. This series contains a few patches
with these fixes.

Part of the Kernel Self Protection Project efforts. Links below have
more details.

Link: https://github.com/KSPP/linux/issues/162
Link: 
https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments

Signed-off-by: Ethan Carter Edwards 
---
Ethan Carter Edwards (4):
  drm/amd/display: change kzalloc to kcalloc in dcn30_validate_bandwidth()
  drm/amd/display: change kzalloc to kcalloc in dcn31_validate_bandwidth()
  drm/amd/display: change kzalloc to kcalloc in dcn314_validate_bandwidth()
  drm/amd/display: change kzalloc to kcalloc in dml1_validate()

 drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c   | 3 ++-
 drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c   | 3 ++-
 drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 3 ++-
 drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c   | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)
---
base-commit: be5c7bbb3a64baf884481a1ba0c2f8fb2f93f7c3
change-id: 20250227-amd-display-a8342c55a9a0

Best regards,
-- 
Ethan Carter Edwards 




[PATCH 2/4] drm/amd/display: change kzalloc to kcalloc in dcn31_validate_bandwidth()

2025-02-27 Thread Ethan Carter Edwards
We are trying to get rid of all multiplications from allocation
functions to prevent integer overflows. Here the multiplication is
probably safe, but using kcalloc() is more appropriate and improves
readability. This patch has no effect on runtime behavior.

Signed-off-by: Ethan Carter Edwards 
---
 drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index 
3c42ba8566cf8b79106de25c9e0aad4a70898ea6..dbfef85f8f65a6f76c86cbb6db59d608a74b
 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -1768,7 +1768,8 @@ bool dcn31_validate_bandwidth(struct dc *dc,
 
int vlevel = 0;
int pipe_cnt = 0;
-   display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+   display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+   sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
DC_LOGGER_INIT(dc->ctx->logger);
 
BW_VAL_TRACE_COUNT();

-- 
2.48.1




[PATCH 1/4] drm/amd/display: change kzalloc to kcalloc in dcn30_validate_bandwidth()

2025-02-27 Thread Ethan Carter Edwards
We are trying to get rid of all multiplications from allocation
functions to prevent integer overflows. Here the multiplication is
probably safe, but using kcalloc() is more appropriate and improves
readability. This patch has no effect on runtime behavior.

Signed-off-by: Ethan Carter Edwards 
---
 drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 
13202ce30d668e0c9e66632a9016a1597e2705b2..f01ced0150726b2efbd123de0084101cfc763ff5
 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -2047,7 +2047,8 @@ bool dcn30_validate_bandwidth(struct dc *dc,
 
int vlevel = 0;
int pipe_cnt = 0;
-   display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+   display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+   sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
DC_LOGGER_INIT(dc->ctx->logger);
 
BW_VAL_TRACE_COUNT();

-- 
2.48.1




[PATCH 4/4] drm/amd/display: change kzalloc to kcalloc in dml1_validate()

2025-02-27 Thread Ethan Carter Edwards
We are trying to get rid of all multiplications from allocation
functions to prevent integer overflows. Here the multiplication is
probably safe, but using kcalloc() is more appropriate and improves
readability. This patch has no effect on runtime behavior.

Signed-off-by: Ethan Carter Edwards 
---
 drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 
664302876019072a77b9330229f7fe8545787396..2a59cc61ed8c918a4b5beb1d90bf3a8f77fcdeb9
 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1749,7 +1749,8 @@ static bool dml1_validate(struct dc *dc, struct dc_state 
*context, bool fast_val
 
int vlevel = 0;
int pipe_cnt = 0;
-   display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+   display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+   sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
 
/* To handle Freesync properly, setting FreeSync DML parameters
 * to its default state for the first stage of validation

-- 
2.48.1




[PATCH 3/4] drm/amd/display: change kzalloc to kcalloc in dcn314_validate_bandwidth()

2025-02-27 Thread Ethan Carter Edwards
We are trying to get rid of all multiplications from allocation
functions to prevent integer overflows. Here the multiplication is
probably safe, but using kcalloc() is more appropriate and improves
readability. This patch has no effect on runtime behavior.

Signed-off-by: Ethan Carter Edwards 
---
 drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 
e3ba105034f83434c3e77d343ee267069d34d926..26becc4cb80408cb2778f6af62c7a1c497f06505
 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -1704,7 +1704,8 @@ bool dcn314_validate_bandwidth(struct dc *dc,
 
int vlevel = 0;
int pipe_cnt = 0;
-   display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+   display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+   sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
DC_LOGGER_INIT(dc->ctx->logger);
 
BW_VAL_TRACE_COUNT();

-- 
2.48.1




Re: [PATCH 3/3] drm/amdgpu: fix typos in SI

2025-02-27 Thread Alex Deucher
Applied this series with some minor changes.

Thanks!

Alex

On Thu, Feb 27, 2025 at 12:14 AM Alexandre Demers
 wrote:
>
> Fix typos
>
> Signed-off-by: Alexandre Demers 
> ---
>  drivers/gpu/drm/amd/amdgpu/si.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index d1c06d0d6a2d..68f6f4ec8a47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -919,7 +919,7 @@ static const u32 hainan_mgcg_cgcg_init[] =
>
>  /* XXX: update when we support VCE */
>  #if 0
> -/* tahiti, pitcarin, verde */
> +/* tahiti, pitcairn, verde */
>  static const struct amdgpu_video_codec_info 
> tahiti_video_codecs_encode_array[] =
>  {
> {
> @@ -950,7 +950,7 @@ static const struct amdgpu_video_codecs 
> hainan_video_codecs_encode =
> .codec_array = NULL,
>  };
>
> -/* tahiti, pitcarin, verde, oland */
> +/* tahiti, pitcairn, verde, oland */
>  static const struct amdgpu_video_codec_info 
> tahiti_video_codecs_decode_array[] =
>  {
> {
> @@ -1898,7 +1898,7 @@ static int si_vce_send_vcepll_ctlreq(struct 
> amdgpu_device *adev)
> WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
>
> if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
> -   DRM_ERROR("Timeout setting UVD clocks!\n");
> +   DRM_ERROR("Timeout setting VCE clocks!\n");
> return -ETIMEDOUT;
> }
>
> --
> 2.48.1
>


Re: [V7 07/45] drm/colorop: Add 1D Curve subtype

2025-02-27 Thread Alex Hung




On 2/25/25 03:13, Louis Chauvet wrote:



Le 20/12/2024 à 05:33, Alex Hung a écrit :

From: Harry Wentland 

Add a new drm_colorop with DRM_COLOROP_1D_CURVE with two subtypes:
DRM_COLOROP_1D_CURVE_SRGB_EOTF and DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF.

Signed-off-by: Harry Wentland 
Co-developed-by: Alex Hung 
Signed-off-by: Alex Hung 
---
v5:
  - Add drm_get_colorop_curve_1d_type_name in header
  - Add drm_colorop_init
  - Set default curve
  - Add kernel docs

v4:
  - Use drm_colorop_curve_1d_type_enum_list to get name (Pekka)
  - Create separate init function for 1D curve
  - Pass supported TFs into 1D curve init function

  drivers/gpu/drm/drm_atomic_uapi.c |  18 ++--
  drivers/gpu/drm/drm_colorop.c | 134 ++
  include/drm/drm_colorop.h |  60 +
  3 files changed, 207 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/ 
drm_atomic_uapi.c

index 59fc25b59100..9a5dbf0a1306 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -648,11 +648,17 @@ static int 
drm_atomic_colorop_set_property(struct drm_colorop *colorop,

  struct drm_colorop_state *state, struct drm_file *file_priv,
  struct drm_property *property, uint64_t val)
  {
-    drm_dbg_atomic(colorop->dev,
-    "[COLOROP:%d] unknown property [PROP:%d:%s]]\n",
-    colorop->base.id,
-    property->base.id, property->name);
-    return -EINVAL;
+    if (property == colorop->curve_1d_type_property) {
+    state->curve_1d_type = val;
+    } else {
+    drm_dbg_atomic(colorop->dev,
+   "[COLOROP:%d:%d] unknown property [PROP:%d:%s]]\n",
+   colorop->base.id, colorop->type,
+   property->base.id, property->name);
+    return -EINVAL;
+    }
+
+    return 0;
  }
  static int
@@ -662,6 +668,8 @@ drm_atomic_colorop_get_property(struct drm_colorop 
*colorop,

  {
  if (property == colorop->type_property) {
  *val = colorop->type;
+    } else if (property == colorop->curve_1d_type_property) {
+    *val = state->curve_1d_type;
  } else {
  return -EINVAL;
  }
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/ 
drm_colorop.c

index 1459a28c7e7b..a42de0aa48e1 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -31,6 +31,123 @@
  #include "drm_crtc_internal.h"
+static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = {
+    { DRM_COLOROP_1D_CURVE, "1D Curve" },
+};
+
+static const char * const colorop_curve_1d_type_names[] = {
+    [DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF",
+    [DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF",
+};
+
+
+/* Init Helpers */
+
+static int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,

+    struct drm_plane *plane, enum drm_colorop_type type)
+{
+    struct drm_mode_config *config = &dev->mode_config;
+    struct drm_property *prop;
+    int ret = 0;
+
+    ret = drm_mode_object_add(dev, &colorop->base, 
DRM_MODE_OBJECT_COLOROP);

+    if (ret)
+    return ret;
+
+    colorop->base.properties = &colorop->properties;
+    colorop->dev = dev;
+    colorop->type = type;
+    colorop->plane = plane;
+
+    list_add_tail(&colorop->head, &config->colorop_list);
+    colorop->index = config->num_colorop++;
+
+    /* add properties */
+
+    /* type */
+    prop = drm_property_create_enum(dev,
+    DRM_MODE_PROP_IMMUTABLE,
+    "TYPE", drm_colorop_type_enum_list,
+    ARRAY_SIZE(drm_colorop_type_enum_list));


I think this function belongs to the previous patch "Add TYPE property".


This function is only called by the first colorop. Some pieces of the 
code in this function are introduced with the first colorop (1D curve) 
so it makes sense to include it here.





+
+    if (!prop)
+    return -ENOMEM;
+
+    colorop->type_property = prop;
+
+    drm_object_attach_property(&colorop->base,
+   colorop->type_property,
+   colorop->type);
+
+    return ret;
+}
+
+/**
+ * drm_colorop_curve_1d_init - Initialize a DRM_COLOROP_1D_CURVE
+ *
+ * @dev: DRM device
+ * @colorop: The drm_colorop object to initialize
+ * @plane: The associated drm_plane
+ * @supported_tfs: A bitfield of supported drm_colorop_curve_1d_init 
enum values,
+ * created using BIT(curve_type) and combined with 
the OR '|'

+ * operator.
+ * @return zero on success, -E value on failure
+ */
+int drm_colorop_curve_1d_init(struct drm_device *dev, struct 
drm_colorop *colorop,

+  struct drm_plane *plane, u64 supported_tfs)
+{
+    struct drm_prop_enum_list enum_list[DRM_COLOROP_1D_CURVE_COUNT];
+    int i, len;
+
+    struct drm_property *prop;
+    int ret;
+
+    if (!supported_tfs) {
+    drm_err(dev,
+    "No supported TFs for new 1D curve colorop on [PLANE:%d: 
%s]\n",

Re: [PATCH 2/6] drm/amdgpu: add dce_v6_0_soft_reset() to DCE6

2025-02-27 Thread Alex Deucher
On Thu, Feb 27, 2025 at 1:52 PM Alexandre Demers
 wrote:
>
> On Thu, Feb 27, 2025 at 9:23 AM Alex Deucher  wrote:
> >
> > On Thu, Feb 27, 2025 at 12:49 AM Alexandre Demers
> >  wrote:
> > >
> > > DCE6 was missing soft reset, but it was easily identifiable under radeon.
> > > This should be it, pretty much as it is done under DCE8 and DCE10.
> > >
> > > Signed-off-by: Alexandre Demers 
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 62 ---
> > >  1 file changed, 57 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> > > b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > index bd763fde1c50..254cb73324c6 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > @@ -371,27 +371,58 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct 
> > > amdgpu_device *adev)
> > > return mmDC_GPIO_HPD_A;
> > >  }
> > >
> > > +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
> > > +{
> > > +   u32 crtc_hung = 0;
> > > +   u32 crtc_status[6];
> > > +   u32 i, j, tmp;
> > > +
> > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > +   if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & 
> > > CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
> > > +   crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > > crtc_offsets[i]);
> > > +   crtc_hung |= (1 << i);
> > > +   }
> > > +   }
> > > +
> > > +   for (j = 0; j < 10; j++) {
> > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > +   if (crtc_hung & (1 << i)) {
> > > +   tmp = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > > crtc_offsets[i]);
> > > +   if (tmp != crtc_status[i])
> > > +   crtc_hung &= ~(1 << i);
> > > +   }
> > > +   }
> > > +   if (crtc_hung == 0)
> > > +   return false;
> > > +   udelay(100);
> > > +   }
> > > +
> > > +   return true;
> > > +}
> > > +
> > >  static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
> > >   bool render)
> > >  {
> > > if (!render)
> > > WREG32(mmVGA_RENDER_CONTROL,
> > > RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
> > > -
> > >  }
> > >
> > >  static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
> > >  {
> > > +   int num_crtc = 0;
> > > +
> > > switch (adev->asic_type) {
> > > case CHIP_TAHITI:
> > > case CHIP_PITCAIRN:
> > > case CHIP_VERDE:
> > > -   return 6;
> > > +   num_crtc = 6;
> > > case CHIP_OLAND:
> > > -   return 2;
> > > +   num_crtc = 2;
> > > default:
> > > -   return 0;
> > > +   num_crtc = 0;
> > > }
> > > +   return num_crtc;
> >
> > Any particular reason for this change?  It just adds an extra variable.
> >
> > Alex
>
> Just for uniformisation with DCE8 and DCE10. We could also remove the
> variable and use returns everywhere.
>
> Any preferences?

ah, ok. I think the direct returns are cleaner.

Alex

> Alexandre
>
> >
> > >  }
> > >
> > >  void dce_v6_0_disable_dce(struct amdgpu_device *adev)
> > > @@ -2846,7 +2877,28 @@ static bool dce_v6_0_is_idle(void *handle)
> > >
> > >  static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
> > >  {
> > > -   DRM_INFO(": dce_v6_0_soft_reset --- no impl!!\n");
> > > +   u32 srbm_soft_reset = 0, tmp;
> > > +   struct amdgpu_device *adev = ip_block->adev;
> > > +
> > > +   if (dce_v6_0_is_display_hung(adev))
> > > +   srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
> > > +
> > > +   if (srbm_soft_reset) {
> > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > +   tmp |= srbm_soft_reset;
> > > +   dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
> > > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > +
> > > +   udelay(50);
> > > +
> > > +   tmp &= ~srbm_soft_reset;
> > > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > +
> > > +   /* Wait a little for things to settle down */
> > > +   udelay(50);
> > > +   }
> > > return 0;
> > >  }
> > >
> > > --
> > > 2.48.1
> > >


Re: [PATCH 2/6] drm/amdgpu: add dce_v6_0_soft_reset() to DCE6

2025-02-27 Thread Alexandre Demers
On Thu, Feb 27, 2025 at 9:23 AM Alex Deucher  wrote:
>
> On Thu, Feb 27, 2025 at 12:49 AM Alexandre Demers
>  wrote:
> >
> > DCE6 was missing soft reset, but it was easily identifiable under radeon.
> > This should be it, pretty much as it is done under DCE8 and DCE10.
> >
> > Signed-off-by: Alexandre Demers 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 62 ---
> >  1 file changed, 57 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > index bd763fde1c50..254cb73324c6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > @@ -371,27 +371,58 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct 
> > amdgpu_device *adev)
> > return mmDC_GPIO_HPD_A;
> >  }
> >
> > +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
> > +{
> > +   u32 crtc_hung = 0;
> > +   u32 crtc_status[6];
> > +   u32 i, j, tmp;
> > +
> > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > +   if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & 
> > CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
> > +   crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > crtc_offsets[i]);
> > +   crtc_hung |= (1 << i);
> > +   }
> > +   }
> > +
> > +   for (j = 0; j < 10; j++) {
> > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > +   if (crtc_hung & (1 << i)) {
> > +   tmp = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > crtc_offsets[i]);
> > +   if (tmp != crtc_status[i])
> > +   crtc_hung &= ~(1 << i);
> > +   }
> > +   }
> > +   if (crtc_hung == 0)
> > +   return false;
> > +   udelay(100);
> > +   }
> > +
> > +   return true;
> > +}
> > +
> >  static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
> >   bool render)
> >  {
> > if (!render)
> > WREG32(mmVGA_RENDER_CONTROL,
> > RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
> > -
> >  }
> >
> >  static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
> >  {
> > +   int num_crtc = 0;
> > +
> > switch (adev->asic_type) {
> > case CHIP_TAHITI:
> > case CHIP_PITCAIRN:
> > case CHIP_VERDE:
> > -   return 6;
> > +   num_crtc = 6;
> > case CHIP_OLAND:
> > -   return 2;
> > +   num_crtc = 2;
> > default:
> > -   return 0;
> > +   num_crtc = 0;
> > }
> > +   return num_crtc;
>
> Any particular reason for this change?  It just adds an extra variable.
>
> Alex

Just for uniformisation with DCE8 and DCE10. We could also remove the
variable and use returns everywhere.

Any preferences?
Alexandre

>
> >  }
> >
> >  void dce_v6_0_disable_dce(struct amdgpu_device *adev)
> > @@ -2846,7 +2877,28 @@ static bool dce_v6_0_is_idle(void *handle)
> >
> >  static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
> >  {
> > -   DRM_INFO(": dce_v6_0_soft_reset --- no impl!!\n");
> > +   u32 srbm_soft_reset = 0, tmp;
> > +   struct amdgpu_device *adev = ip_block->adev;
> > +
> > +   if (dce_v6_0_is_display_hung(adev))
> > +   srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
> > +
> > +   if (srbm_soft_reset) {
> > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > +   tmp |= srbm_soft_reset;
> > +   dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
> > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > +
> > +   udelay(50);
> > +
> > +   tmp &= ~srbm_soft_reset;
> > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > +
> > +   /* Wait a little for things to settle down */
> > +   udelay(50);
> > +   }
> > return 0;
> >  }
> >
> > --
> > 2.48.1
> >


Re: [PATCH 2/6] drm/amdgpu: add dce_v6_0_soft_reset() to DCE6

2025-02-27 Thread Alex Deucher
On Thu, Feb 27, 2025 at 2:01 PM Alex Deucher  wrote:
>
> On Thu, Feb 27, 2025 at 1:52 PM Alexandre Demers
>  wrote:
> >
> > On Thu, Feb 27, 2025 at 9:23 AM Alex Deucher  wrote:
> > >
> > > On Thu, Feb 27, 2025 at 12:49 AM Alexandre Demers
> > >  wrote:
> > > >
> > > > DCE6 was missing soft reset, but it was easily identifiable under 
> > > > radeon.
> > > > This should be it, pretty much as it is done under DCE8 and DCE10.
> > > >
> > > > Signed-off-by: Alexandre Demers 
> > > > ---
> > > >  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 62 ---
> > > >  1 file changed, 57 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> > > > b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > index bd763fde1c50..254cb73324c6 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > > > @@ -371,27 +371,58 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct 
> > > > amdgpu_device *adev)
> > > > return mmDC_GPIO_HPD_A;
> > > >  }
> > > >
> > > > +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
> > > > +{
> > > > +   u32 crtc_hung = 0;
> > > > +   u32 crtc_status[6];
> > > > +   u32 i, j, tmp;
> > > > +
> > > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > > +   if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & 
> > > > CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
> > > > +   crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT 
> > > > + crtc_offsets[i]);
> > > > +   crtc_hung |= (1 << i);
> > > > +   }
> > > > +   }
> > > > +
> > > > +   for (j = 0; j < 10; j++) {
> > > > +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> > > > +   if (crtc_hung & (1 << i)) {
> > > > +   tmp = RREG32(mmCRTC_STATUS_HV_COUNT + 
> > > > crtc_offsets[i]);
> > > > +   if (tmp != crtc_status[i])
> > > > +   crtc_hung &= ~(1 << i);
> > > > +   }
> > > > +   }
> > > > +   if (crtc_hung == 0)
> > > > +   return false;
> > > > +   udelay(100);
> > > > +   }
> > > > +
> > > > +   return true;
> > > > +}
> > > > +
> > > >  static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
> > > >   bool render)
> > > >  {
> > > > if (!render)
> > > > WREG32(mmVGA_RENDER_CONTROL,
> > > > RREG32(mmVGA_RENDER_CONTROL) & 
> > > > VGA_VSTATUS_CNTL);
> > > > -
> > > >  }
> > > >
> > > >  static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
> > > >  {
> > > > +   int num_crtc = 0;
> > > > +
> > > > switch (adev->asic_type) {
> > > > case CHIP_TAHITI:
> > > > case CHIP_PITCAIRN:
> > > > case CHIP_VERDE:
> > > > -   return 6;
> > > > +   num_crtc = 6;
> > > > case CHIP_OLAND:
> > > > -   return 2;
> > > > +   num_crtc = 2;
> > > > default:
> > > > -   return 0;
> > > > +   num_crtc = 0;
> > > > }
> > > > +   return num_crtc;
> > >
> > > Any particular reason for this change?  It just adds an extra variable.
> > >
> > > Alex
> >
> > Just for uniformisation with DCE8 and DCE10. We could also remove the
> > variable and use returns everywhere.
> >
> > Any preferences?
>
> ah, ok. I think the direct returns are cleaner.

I would maybe split up your patches into maybe 3 logical patch sets:
one to fix spelling typos and comments, one to make the DCE code more
uniform across versions, and another to add new DCE6 functionality.

Alex

>
> Alex
>
> > Alexandre
> >
> > >
> > > >  }
> > > >
> > > >  void dce_v6_0_disable_dce(struct amdgpu_device *adev)
> > > > @@ -2846,7 +2877,28 @@ static bool dce_v6_0_is_idle(void *handle)
> > > >
> > > >  static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
> > > >  {
> > > > -   DRM_INFO(": dce_v6_0_soft_reset --- no impl!!\n");
> > > > +   u32 srbm_soft_reset = 0, tmp;
> > > > +   struct amdgpu_device *adev = ip_block->adev;
> > > > +
> > > > +   if (dce_v6_0_is_display_hung(adev))
> > > > +   srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
> > > > +
> > > > +   if (srbm_soft_reset) {
> > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > +   tmp |= srbm_soft_reset;
> > > > +   dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
> > > > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > +
> > > > +   udelay(50);
> > > > +
> > > > +   tmp &= ~srbm_soft_reset;
> > > > +   WREG32(mmSRBM_SOFT_RESET, tmp);
> > > > +   tmp = RREG32(mmSRBM_SOFT_RESET);
> > > > +
> > > > +

RE: [PATCH 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API

2025-02-27 Thread Kim, Jonathan
[Public]

Overall lgtm.
A comment and nitpick below.

> -Original Message-
> From: amd-gfx  On Behalf Of Harish
> Kasiviswanathan
> Sent: Wednesday, February 26, 2025 2:23 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kasiviswanathan, Harish 
> Subject: [PATCH 1/2] drm/amdkfd: Add pm_config_dequeue_wait_counts API
>
> Update pm_update_grace_period() to more cleaner
> pm_config_dequeue_wait_counts(). Previously, grace_period variable was
> overloaded as a variable and a macro, making it inflexible to configure
> additional dequeue wait times.
>
> pm_config_dequeue_wait_counts() now takes in a cmd / variable. This
> allows flexibility to update different dequeue wait times.
>
> Signed-off-by: Harish Kasiviswanathan 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +++
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++-
>  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 26 -
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 56 ++-
>  .../drm/amd/amdkfd/kfd_packet_manager_vi.c|  4 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 --
>  6 files changed, 120 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 94b1ac8a4735..cc7465f9562a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -42,6 +42,8 @@
>  /* Size of the per-pipe EOP queue */
>  #define CIK_HPD_EOP_BYTES_LOG2 11
>  #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
> +/* See unmap_queues_cpsch() */
> +#define USE_DEFAULT_GRACE_PERIOD 0x
>
>  static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
> u32 pasid, unsigned int vmid);
> @@ -1745,10 +1747,7 @@ static int initialize_cpsch(struct device_queue_manager
> *dqm)
>
>   init_sdma_bitmaps(dqm);
>
> - if (dqm->dev->kfd2kgd->get_iq_wait_times)
> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> - &dqm->wait_times,
> - ffs(dqm->dev->xcc_mask) - 1);
> + update_dqm_wait_times(dqm);
>   return 0;
>  }
>
> @@ -1844,25 +1843,11 @@ static int start_cpsch(struct device_queue_manager
> *dqm)
>   /* clear hang status when driver try to start the hw scheduler */
>   dqm->sched_running = true;
>
> - if (!dqm->dev->kfd->shared_resources.enable_mes)
> + if (!dqm->dev->kfd->shared_resources.enable_mes) {
> + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
> + KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
> + dev_err(dev, "Setting optimized dequeue wait failed. 
> Using
> default values\n");
>   execute_queues_cpsch(dqm,
> KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
> USE_DEFAULT_GRACE_PERIOD);
> -
> - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
> - if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
> - (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
> - uint32_t reg_offset = 0;
> - uint32_t grace_period = 1;
> -
> - retval = pm_update_grace_period(&dqm->packet_mgr,
> - grace_period);
> - if (retval)
> - dev_err(dev, "Setting grace timeout failed\n");
> - else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
> - /* Update dqm->wait_times maintained in software */
> - dqm->dev->kfd2kgd->build_grace_period_packet_info(
> - dqm->dev->adev, dqm->wait_times,
> - grace_period, ®_offset,
> - &dqm->wait_times);
>   }
>
>   /* setup per-queue reset detection buffer  */
> @@ -2261,7 +2246,14 @@ static int reset_queues_on_hws_hang(struct
> device_queue_manager *dqm)
>   return r;
>  }
>
> -/* dqm->lock mutex has to be locked before calling this function */
> +/* dqm->lock mutex has to be locked before calling this function
> + *
> + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
> + *   for context switch latency. Lower values are used by debugger
> + *   since context switching are triggered at high frequency.
> + *   This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
> + *
> + */
>  static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>   enum kfd_unmap_queues_filter filter,
>   uint32_t filter_param,
> @@ -2280,7 +2272,8 @@ static int unmap_queues_cpsch(struct
> device_queue_manager *dqm,
>   return -EIO;
>
>   if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
> - retval = pm_update_grace_period(&dqm->packet_mgr,
> grace_period);
> + retval

RE: [PATCH 2/2] drm/amdgpu: validate user queue parameters

2025-02-27 Thread Liang, Prike
[Public]

It seems that invalid input parameters happen rarely. Using the `unlikely()` 
function in the validation condition can optimize the code execution path.

Anyway, the patch series is Reviewed-by: Prike Liang 

Regards,
  Prike

> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Thursday, February 27, 2025 12:03 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH 2/2] drm/amdgpu: validate user queue parameters
>
> Make sure these are set properly to ensure compatibility if we ever update the
> IOCTL interface.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 0664e04828c07..39279920a757c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -380,12 +380,26 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void
> *data,
>
>   switch (args->in.op) {
>   case AMDGPU_USERQ_OP_CREATE:
> + if (args->in._pad)
> + return -EINVAL;
>   r = amdgpu_userqueue_create(filp, args);
>   if (r)
>   DRM_ERROR("Failed to create usermode queue\n");
>   break;
>
>   case AMDGPU_USERQ_OP_FREE:
> + if (args->in.ip_type ||
> + args->in.doorbell_handle ||
> + args->in.doorbell_offset ||
> + args->in._pad ||
> + args->in.queue_va ||
> + args->in.queue_size ||
> + args->in.rptr_va ||
> + args->in.wptr_va ||
> + args->in.wptr_va ||
> + args->in.mqd ||
> + args->in.mqd_size)
> + return -EINVAL;
>   r = amdgpu_userqueue_destroy(filp, args->in.queue_id);
>   if (r)
>   DRM_ERROR("Failed to destroy usermode queue\n");
> --
> 2.48.1



[PATCH] drm/amd/pm: Fix indentation issue

2025-02-27 Thread Asad Kamal
Fix indentation issue for smu_v_13_0_12 get_gpu_metrics

Reported-by: kernel test robot 
Closes: 
https://lore.kernel.org/oe-kbuild-all/202502272246.oisqunc1-...@intel.com

Signed-off-by: Asad Kamal 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
index 5e80b9aabfc9..285dbfe10303 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
@@ -469,7 +469,7 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context 
*smu, void **table)
SMUQ10_ROUND(metrics->GfxBusy[inst]);
gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] =
SMUQ10_ROUND(metrics->GfxBusyAcc[inst]);
-   idx++;
+   idx++;
}
}
 
-- 
2.46.0



Re: [PATCH v3] drm/amdgpu: fix the memleak caused by fence not released

2025-02-27 Thread Yadav, Arvind



On 2/27/2025 7:55 PM, Christian König wrote:


Am 18.02.25 um 15:53 schrieb Arvind Yadav:

Encountering a taint issue during the unloading of gpu_sched
due to the fence not being released/put. In this context,
amdgpu_vm_clear_freed is responsible for creating a job to
update the page table (PT). It allocates kmem_cache for
drm_sched_fence and returns the finished fence associated
with job->base.s_fence. In case of Usermode queue this finished
fence is added to the timeline sync object through
amdgpu_gem_update_bo_mapping, which is utilized by user
space to ensure the completion of the PT update.

[  508.900587] 
=
[  508.900605] BUG drm_sched_fence (Tainted: G N): Objects 
remaining in drm_sched_fence on __kmem_cache_shutdown()
[  508.900617] 
-

[  508.900627] Slab 0xe0cc04548780 objects=32 used=2 fp=0x8ea81521f000 
flags=0x17c240(workingset|head|node=0|zone=2|lastcpupid=0x1f)
[  508.900645] CPU: 3 UID: 0 PID: 2337 Comm: rmmod Tainted: G N 
6.12.0+ #1
[  508.900651] Tainted: [N]=TEST
[  508.900653] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS 
ELITE/X570 AORUS ELITE, BIOS F34 06/10/2021
[  508.900656] Call Trace:
[  508.900659]  
[  508.900665]  dump_stack_lvl+0x70/0x90
[  508.900674]  dump_stack+0x14/0x20
[  508.900678]  slab_err+0xcb/0x110
[  508.900687]  ? srso_return_thunk+0x5/0x5f
[  508.900692]  ? try_to_grab_pending+0xd3/0x1d0
[  508.900697]  ? srso_return_thunk+0x5/0x5f
[  508.900701]  ? mutex_lock+0x17/0x50
[  508.900708]  __kmem_cache_shutdown+0x144/0x2d0
[  508.900713]  ? flush_rcu_work+0x50/0x60
[  508.900719]  kmem_cache_destroy+0x46/0x1f0
[  508.900728]  drm_sched_fence_slab_fini+0x19/0x970 [gpu_sched]
[  508.900736]  __do_sys_delete_module.constprop.0+0x184/0x320
[  508.900744]  ? srso_return_thunk+0x5/0x5f
[  508.900747]  ? debug_smp_processor_id+0x1b/0x30
[  508.900754]  __x64_sys_delete_module+0x16/0x20
[  508.900758]  x64_sys_call+0xdf/0x20d0
[  508.900763]  do_syscall_64+0x51/0x120
[  508.900769]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

v2: call dma_fence_put in amdgpu_gem_va_update_vm
v3: Addressed review comments from Christian.
 - calling amdgpu_gem_update_timeline_node before switch.
 - puting a dma_fence in case of error or !timeline_syncobj.

Cc: Alex Deucher 
Cc: Christian König 
Cc: Shashank Sharma 
Cc: Sunil Khatri 
Signed-off-by: Le Ma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 +
  1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8b67aae6c2fe..40522b4f331b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -125,9 +125,6 @@ amdgpu_gem_update_bo_mapping(struct drm_file *filp,
struct amdgpu_vm *vm = &fpriv->vm;
struct dma_fence *last_update;
  
-	if (!syncobj)

-   return;
-
/* Find the last update fence */
switch (operation) {
case AMDGPU_VA_OP_MAP:
@@ -839,6 +836,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_exec exec;
uint64_t va_flags;
uint64_t vm_size;
+   int syncobj_status;
int r = 0;
  
  	if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {

@@ -931,6 +929,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
  
+	syncobj_status = amdgpu_gem_update_timeline_node(filp,

+   args->vm_timeline_syncobj_out,
+   args->vm_timeline_point,
+   &timeline_syncobj,
+   &timeline_chain);
+

You don't need syncobj_status here, just assign the return value to r and abort 
if we can't find any syncobj.


I have not used variable 'r' because below inside 
switch(args->operation) the 'r' value will be reinitialized and the 
return value of amdgpu_gem_update_timeline_node will not be used. Here, 
we cannot abort because syncobj will be NULL for Non-UQ.

we can use r but it will not do anything. :)





switch (args->operation) {
case AMDGPU_VA_OP_MAP:
va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -957,22 +961,19 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
*data,
break;
}
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
-
-   r = amdgpu_gem_update_timeline_node(filp,
-   
args->vm_timeline_syncobj_out,
-   args->vm_timeline_point,
-   &timeline_syncobj,
-

Re: [PATCH 3/6] drm/amdgpu: complete dce_v6_0_set_crtc_vline_interrupt_state() in DCE6

2025-02-27 Thread Alex Deucher
On Thu, Feb 27, 2025 at 12:23 AM Alexandre Demers
 wrote:
>
> dce_v6_0_set_crtc_vline_interrupt_state() was empty without any info to
> inform the user.

Doesn't hurt to fill it in, but nothing uses the vline interrupt at
the moment.  Might be better to just remove it from all of the non-DC
display code.

Alex


>
> Based on DCE8 and DCE10 code.
>
> Signed-off-by: Alexandre Demers 
> ---
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 44 +++
>  1 file changed, 44 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> index 254cb73324c6..e805c4f9222c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> @@ -2957,7 +2957,51 @@ static void 
> dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
> int crtc,
> enum 
> amdgpu_interrupt_state state)
>  {
> +   u32 reg_block, lb_interrupt_mask;
>
> +   if (crtc >= adev->mode_info.num_crtc) {
> +   DRM_DEBUG("invalid crtc %d\n", crtc);
> +   return;
> +   }
> +
> +   switch (crtc) {
> +   case 0:
> +   reg_block = CRTC0_REGISTER_OFFSET;
> +   break;
> +   case 1:
> +   reg_block = CRTC1_REGISTER_OFFSET;
> +   break;
> +   case 2:
> +   reg_block = CRTC2_REGISTER_OFFSET;
> +   break;
> +   case 3:
> +   reg_block = CRTC3_REGISTER_OFFSET;
> +   break;
> +   case 4:
> +   reg_block = CRTC4_REGISTER_OFFSET;
> +   break;
> +   case 5:
> +   reg_block = CRTC5_REGISTER_OFFSET;
> +   break;
> +   default:
> +   DRM_DEBUG("invalid crtc %d\n", crtc);
> +   return;
> +   }
> +
> +   switch (state) {
> +   case AMDGPU_IRQ_STATE_DISABLE:
> +   lb_interrupt_mask = RREG32(mmINT_MASK + reg_block);
> +   lb_interrupt_mask &= ~VLINE_INT_MASK;
> +   WREG32(mmINT_MASK + reg_block, lb_interrupt_mask);
> +   break;
> +   case AMDGPU_IRQ_STATE_ENABLE:
> +   lb_interrupt_mask = RREG32(mmINT_MASK + reg_block);
> +   lb_interrupt_mask |= VLINE_INT_MASK;
> +   WREG32(mmINT_MASK + reg_block, lb_interrupt_mask);
> +   break;
> +   default:
> +   break;
> +   }
>  }
>
>  static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
> --
> 2.48.1
>


Re: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset

2025-02-27 Thread Alex Deucher
On Thu, Feb 27, 2025 at 7:36 AM Liang, Prike  wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> Please review the series patch to catch up the gfx latest base and to avoid 
> the commit merged problem.

See my comment on patch 1:

#define RS64_FW_UC_START_ADDR_LO 0x3000

Will this potentially change in future firmware versions or is it
fixed?  If it will change, then let's just read it back from registers
and store it in adev->gfx.rs64_fw_use_start_addr_lo so
that it will be correct if the user has a mix of GPUs in the system.
Other than those comments, the series looks good to me.

Alex


>
> Regards,
>   Prike
>
> > -Original Message-
> > From: Liang, Prike 
> > Sent: Friday, February 21, 2025 9:01 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander ; Liang, Prike
> > 
> > Subject: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe
> > reset
> >
> > Implement the GFX12 KCQ pipe reset, and disable the GFX12 kernel compute
> > queue until the CPFW fully supports it.
> >
> > Signed-off-by: Prike Liang 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 90 +-
> >  1 file changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > index 79ae7d538844..103298938d22 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > @@ -5352,6 +5352,90 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring
> > *ring, unsigned int vmid)
> >   return amdgpu_ring_test_ring(ring);
> >  }
> >
> > +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) {
> > +
> > + struct amdgpu_device *adev = ring->adev;
> > + uint32_t reset_pipe = 0, clean_pipe = 0;
> > + int r;
> > +
> > + if (!gfx_v12_pipe_reset_support(adev))
> > + return -EOPNOTSUPP;
> > +
> > + gfx_v12_0_set_safe_mode(adev, 0);
> > + mutex_lock(&adev->srbm_mutex);
> > + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> > +
> > + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> > + clean_pipe = reset_pipe;
> > +
> > + if (adev->gfx.rs64_enable) {
> > +
> > + switch (ring->pipe) {
> > + case 0:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE0_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE0_RESET, 0);
> > + break;
> > + case 1:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE1_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE1_RESET, 0);
> > + break;
> > + case 2:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE2_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE2_RESET, 0);
> > + break;
> > + case 3:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE3_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > +MEC_PIPE3_RESET, 0);
> > + break;
> > + default:
> > + break;
> > + }
> > + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> > + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> > + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR)
> > << 2) - RS64_FW_UC_START_ADDR_LO;
> > + } else {
> > + switch (ring->pipe) {
> > + case 0:
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> > +
> > MEC_ME1_PIPE0_RESET,
> > 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_CNTL,
> > +
> > MEC_ME1_PIPE0_RESET,
> > 0);
> > + break;
> > + case 1:
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> > +
> > MEC_ME1_PIPE1_RESET,
> > 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_CNTL,
> > +
> > MEC_ME1_PIPE1_RESET,
> > 0);
> > +   

[PATCH 2/2] drm/amdgpu: Add support for CPERs on virtualization

2025-02-27 Thread Tony Yi
Add support for CPERs on VFs.

VFs do not receive PMFW messages directly; as such, they need to
query them from the host. To avoid hitting host event guard,
CPER queries need to be rate limited. CPER queries share the same
RAS telemetry buffer as error count query, so a mutex protecting
the shared buffer was added as well.

For readability, the amdgpu_detect_virtualization was refactored
into multiple individual functions.

Signed-off-by: Tony Yi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   |  31 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 138 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  18 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  14 +++
 5 files changed, 195 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5e1d8f0039d0..198d29faa754 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3099,7 +3099,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
 
amdgpu_fru_get_product_info(adev);
 
-   r = amdgpu_cper_init(adev);
+   if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+   r = amdgpu_cper_init(adev);
 
 init_failed:
 
@@ -4335,10 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 * for throttling interrupt) = 60 seconds.
 */
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
-   ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1);
 
ratelimit_set_flags(&adev->throttling_logging_rs, 
RATELIMIT_MSG_ON_RELEASE);
-   ratelimit_set_flags(&adev->virt.ras_telemetry_rs, 
RATELIMIT_MSG_ON_RELEASE);
 
/* Registers mapping */
/* TODO: block userspace mapping of io register */
@@ -4370,7 +4369,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return -ENOMEM;
 
/* detect hw virtualization here */
-   amdgpu_detect_virtualization(adev);
+   amdgpu_virt_init(adev);
 
amdgpu_device_get_pcie_info(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 81a7d4faac9c..d55c8b7fdb59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -578,12 +578,32 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, 
char __user *buf,
return result;
 }
 
+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+   size_t size, loff_t *pos)
+{
+   struct amdgpu_ring *ring = file_inode(f)->i_private;
+
+   if (*pos & 3 || size & 3)
+   return -EINVAL;
+
+   if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+   amdgpu_virt_req_ras_cper_dump(ring->adev, false);
+
+   return amdgpu_debugfs_ring_read(f, buf, size, pos);
+}
+
 static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+   .owner = THIS_MODULE,
+   .read = amdgpu_debugfs_virt_ring_read,
+   .llseek = default_llseek
+};
+
 static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
   size_t size, loff_t *pos)
 {
@@ -671,9 +691,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];
 
sprintf(name, "amdgpu_ring_%s", ring->name);
-   debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
-&amdgpu_debugfs_ring_fops,
-ring->ring_size + 12);
+   if (amdgpu_sriov_vf(adev))
+   debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+&amdgpu_debugfs_virt_ring_fops,
+ring->ring_size + 12);
+   else
+   debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+&amdgpu_debugfs_ring_fops,
+ring->ring_size + 12);
 
if (ring->mqd_obj) {
sprintf(name, "amdgpu_mqd_%s", ring->name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e6f0152e5b08..3832513ec7bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -739,7 +739,7 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
}
 }
 
-void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
 {
uint32_t reg;
 
@@ -775,8 +775,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device 
*adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;

[PATCH 1/2] drm/amdgpu: Update headers for CPER support on SRIOV

2025-02-27 Thread Tony Yi
Update amdgv_sriovmsg.h and mxgpu_nv.h to add new definitions for
CPER support on VFs. PMFW ACA messages are not available on VFs,
and VFs must query CPERs from host.

Signed-off-by: Tony Yi 
---
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 40 ++---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h   |  2 ++
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index b4f9c2f4e92c..d6ac2652f0ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -97,11 +97,12 @@ union amd_sriov_msg_feature_flags {
uint32_t pp_one_vf_mode : 1;
uint32_t reg_indirect_acc   : 1;
uint32_t av1_support: 1;
-   uint32_t vcn_rb_decouple: 1;
+   uint32_t vcn_rb_decouple: 1;
uint32_t mes_info_dump_enable   : 1;
uint32_t ras_caps   : 1;
uint32_t ras_telemetry  : 1;
-   uint32_t reserved   : 21;
+   uint32_t ras_cper   : 1;
+   uint32_t reserved   : 20;
} flags;
uint32_t all;
 };
@@ -328,21 +329,25 @@ enum amd_sriov_mailbox_request_message {
MB_REQ_MSG_READY_TO_RESET = 201,
MB_REQ_MSG_RAS_POISON = 202,
MB_REQ_RAS_ERROR_COUNT = 203,
+   MB_REQ_RAS_CPER_DUMP = 204,
 };
 
 /* mailbox message send from host to guest  */
 enum amd_sriov_mailbox_response_message {
-   MB_RES_MSG_CLR_MSG_BUF = 0,
-   MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
-   MB_RES_MSG_FLR_NOTIFICATION,
-   MB_RES_MSG_FLR_NOTIFICATION_COMPLETION,
-   MB_RES_MSG_SUCCESS,
-   MB_RES_MSG_FAIL,
-   MB_RES_MSG_QUERY_ALIVE,
-   MB_RES_MSG_GPU_INIT_DATA_READY,
-   MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,
-
-   MB_RES_MSG_TEXT_MESSAGE = 255
+   MB_RES_MSG_CLR_MSG_BUF  = 0,
+   MB_RES_MSG_READY_TO_ACCESS_GPU  = 1,
+   MB_RES_MSG_FLR_NOTIFICATION = 2,
+   MB_RES_MSG_FLR_NOTIFICATION_COMPLETION  = 3,
+   MB_RES_MSG_SUCCESS  = 4,
+   MB_RES_MSG_FAIL = 5,
+   MB_RES_MSG_QUERY_ALIVE  = 6,
+   MB_RES_MSG_GPU_INIT_DATA_READY  = 7,
+   MB_RES_MSG_RAS_POISON_READY = 8,
+   MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9,
+   MB_RES_MSG_GPU_RMA  = 10,
+   MB_RES_MSG_RAS_ERROR_COUNT_READY= 11,
+   MB_REQ_RAS_CPER_DUMP_READY  = 14,
+   MB_RES_MSG_TEXT_MESSAGE = 255
 };
 
 enum amd_sriov_ras_telemetry_gpu_block {
@@ -386,11 +391,20 @@ struct amd_sriov_ras_telemetry_error_count {
} block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
 };
 
+struct amd_sriov_ras_cper_dump {
+   uint32_t more;
+   uint64_t overflow_count;
+   uint64_t count;
+   uint64_t wptr;
+   uint32_t buf[];
+};
+
 struct amdsriov_ras_telemetry {
struct amd_sriov_ras_telemetry_header header;
 
union {
struct amd_sriov_ras_telemetry_error_count error_count;
+   struct amd_sriov_ras_cper_dump cper_dump;
} body;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 9d61d76e1bf9..72c9fceb9d79 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -41,6 +41,7 @@ enum idh_request {
IDH_READY_TO_RESET  = 201,
IDH_RAS_POISON  = 202,
IDH_REQ_RAS_ERROR_COUNT = 203,
+   IDH_REQ_RAS_CPER_DUMP = 204,
 };
 
 enum idh_event {
@@ -56,6 +57,7 @@ enum idh_event {
IDH_PF_SOFT_FLR_NOTIFICATION,
IDH_RAS_ERROR_DETECTED,
IDH_RAS_ERROR_COUNT_READY = 11,
+   IDH_RAS_CPER_DUMP_READY = 14,
 
IDH_TEXT_MESSAGE = 255,
 };
-- 
2.34.1



Re: [PATCH 2/6] drm/amdgpu: add dce_v6_0_soft_reset() to DCE6

2025-02-27 Thread Alex Deucher
On Thu, Feb 27, 2025 at 12:49 AM Alexandre Demers
 wrote:
>
> DCE6 was missing soft reset, but it was easily identifiable under radeon.
> This should be it, pretty much as it is done under DCE8 and DCE10.
>
> Signed-off-by: Alexandre Demers 
> ---
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 62 ---
>  1 file changed, 57 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> index bd763fde1c50..254cb73324c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> @@ -371,27 +371,58 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct 
> amdgpu_device *adev)
> return mmDC_GPIO_HPD_A;
>  }
>
> +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
> +{
> +   u32 crtc_hung = 0;
> +   u32 crtc_status[6];
> +   u32 i, j, tmp;
> +
> +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> +   if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & 
> CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
> +   crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + 
> crtc_offsets[i]);
> +   crtc_hung |= (1 << i);
> +   }
> +   }
> +
> +   for (j = 0; j < 10; j++) {
> +   for (i = 0; i < adev->mode_info.num_crtc; i++) {
> +   if (crtc_hung & (1 << i)) {
> +   tmp = RREG32(mmCRTC_STATUS_HV_COUNT + 
> crtc_offsets[i]);
> +   if (tmp != crtc_status[i])
> +   crtc_hung &= ~(1 << i);
> +   }
> +   }
> +   if (crtc_hung == 0)
> +   return false;
> +   udelay(100);
> +   }
> +
> +   return true;
> +}
> +
>  static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
>   bool render)
>  {
> if (!render)
> WREG32(mmVGA_RENDER_CONTROL,
> RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
> -
>  }
>
>  static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
>  {
> +   int num_crtc = 0;
> +
> switch (adev->asic_type) {
> case CHIP_TAHITI:
> case CHIP_PITCAIRN:
> case CHIP_VERDE:
> -   return 6;
> +   num_crtc = 6;
> case CHIP_OLAND:
> -   return 2;
> +   num_crtc = 2;
> default:
> -   return 0;
> +   num_crtc = 0;
> }
> +   return num_crtc;

Any particular reason for this change?  It just adds an extra variable.

Alex

>  }
>
>  void dce_v6_0_disable_dce(struct amdgpu_device *adev)
> @@ -2846,7 +2877,28 @@ static bool dce_v6_0_is_idle(void *handle)
>
>  static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
>  {
> -   DRM_INFO(": dce_v6_0_soft_reset --- no impl!!\n");
> +   u32 srbm_soft_reset = 0, tmp;
> +   struct amdgpu_device *adev = ip_block->adev;
> +
> +   if (dce_v6_0_is_display_hung(adev))
> +   srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
> +
> +   if (srbm_soft_reset) {
> +   tmp = RREG32(mmSRBM_SOFT_RESET);
> +   tmp |= srbm_soft_reset;
> +   dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
> +   WREG32(mmSRBM_SOFT_RESET, tmp);
> +   tmp = RREG32(mmSRBM_SOFT_RESET);
> +
> +   udelay(50);
> +
> +   tmp &= ~srbm_soft_reset;
> +   WREG32(mmSRBM_SOFT_RESET, tmp);
> +   tmp = RREG32(mmSRBM_SOFT_RESET);
> +
> +   /* Wait a little for things to settle down */
> +   udelay(50);
> +   }
> return 0;
>  }
>
> --
> 2.48.1
>


Re: [PATCH v3] drm/amdgpu: fix the memleak caused by fence not released

2025-02-27 Thread Christian König



Am 18.02.25 um 15:53 schrieb Arvind Yadav:
> Encountering a taint issue during the unloading of gpu_sched
> due to the fence not being released/put. In this context,
> amdgpu_vm_clear_freed is responsible for creating a job to
> update the page table (PT). It allocates kmem_cache for
> drm_sched_fence and returns the finished fence associated
> with job->base.s_fence. In case of Usermode queue this finished
> fence is added to the timeline sync object through
> amdgpu_gem_update_bo_mapping, which is utilized by user
> space to ensure the completion of the PT update.
>
> [  508.900587] 
> =
> [  508.900605] BUG drm_sched_fence (Tainted: G N): Objects 
> remaining in drm_sched_fence on __kmem_cache_shutdown()
> [  508.900617] 
> -
>
> [  508.900627] Slab 0xe0cc04548780 objects=32 used=2 
> fp=0x8ea81521f000 
> flags=0x17c240(workingset|head|node=0|zone=2|lastcpupid=0x1f)
> [  508.900645] CPU: 3 UID: 0 PID: 2337 Comm: rmmod Tainted: G 
> N 6.12.0+ #1
> [  508.900651] Tainted: [N]=TEST
> [  508.900653] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS 
> ELITE/X570 AORUS ELITE, BIOS F34 06/10/2021
> [  508.900656] Call Trace:
> [  508.900659]  
> [  508.900665]  dump_stack_lvl+0x70/0x90
> [  508.900674]  dump_stack+0x14/0x20
> [  508.900678]  slab_err+0xcb/0x110
> [  508.900687]  ? srso_return_thunk+0x5/0x5f
> [  508.900692]  ? try_to_grab_pending+0xd3/0x1d0
> [  508.900697]  ? srso_return_thunk+0x5/0x5f
> [  508.900701]  ? mutex_lock+0x17/0x50
> [  508.900708]  __kmem_cache_shutdown+0x144/0x2d0
> [  508.900713]  ? flush_rcu_work+0x50/0x60
> [  508.900719]  kmem_cache_destroy+0x46/0x1f0
> [  508.900728]  drm_sched_fence_slab_fini+0x19/0x970 [gpu_sched]
> [  508.900736]  __do_sys_delete_module.constprop.0+0x184/0x320
> [  508.900744]  ? srso_return_thunk+0x5/0x5f
> [  508.900747]  ? debug_smp_processor_id+0x1b/0x30
> [  508.900754]  __x64_sys_delete_module+0x16/0x20
> [  508.900758]  x64_sys_call+0xdf/0x20d0
> [  508.900763]  do_syscall_64+0x51/0x120
> [  508.900769]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> v2: call dma_fence_put in amdgpu_gem_va_update_vm
> v3: Addressed review comments from Christian.
> - calling amdgpu_gem_update_timeline_node before switch.
> - puting a dma_fence in case of error or !timeline_syncobj.
>
> Cc: Alex Deucher 
> Cc: Christian König 
> Cc: Shashank Sharma 
> Cc: Sunil Khatri 
> Signed-off-by: Le Ma 
> Signed-off-by: Arvind Yadav 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 +
>  1 file changed, 17 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 8b67aae6c2fe..40522b4f331b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -125,9 +125,6 @@ amdgpu_gem_update_bo_mapping(struct drm_file *filp,
>   struct amdgpu_vm *vm = &fpriv->vm;
>   struct dma_fence *last_update;
>  
> - if (!syncobj)
> - return;
> -
>   /* Find the last update fence */
>   switch (operation) {
>   case AMDGPU_VA_OP_MAP:
> @@ -839,6 +836,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>   struct drm_exec exec;
>   uint64_t va_flags;
>   uint64_t vm_size;
> + int syncobj_status;
>   int r = 0;
>  
>   if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
> @@ -931,6 +929,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>   bo_va = NULL;
>   }
>  
> + syncobj_status = amdgpu_gem_update_timeline_node(filp,
> + args->vm_timeline_syncobj_out,
> + args->vm_timeline_point,
> + &timeline_syncobj,
> + &timeline_chain);
> +

You don't need syncobj_status here, just assign the return value to r and abort 
if we can't find any syncobj.

>   switch (args->operation) {
>   case AMDGPU_VA_OP_MAP:
>   va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> @@ -957,22 +961,19 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>   break;
>   }
>   if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
> -
> - r = amdgpu_gem_update_timeline_node(filp,
> - 
> args->vm_timeline_syncobj_out,
> - args->vm_timeline_point,
> - &timeline_syncobj,
> - &timeline_chain);
> -
>   fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>   args->operation);
>

Re: [PATCH] drm/amdgpu: Fix parameter annotation in vcn_v5_0_0_is_idle

2025-02-27 Thread Alex Deucher
Reviewed-by: Alex Deucher 

On Tue, Feb 25, 2025 at 8:52 PM Srinivasan Shanmugam
 wrote:
>
> Update parameter description in the vcn_v5_0_0_is_idle function
>
> Fixes the below with gcc W=1:
> drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c:1231: warning: Function parameter or 
> struct member 'ip_block' not described in 'vcn_v5_0_0_is_idle'
> drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c:1231: warning: Excess function 
> parameter 'handle' description in 'vcn_v5_0_0_is_idle'
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
> index e07b500235b5..d99d05f42f1d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
> @@ -1223,7 +1223,7 @@ static void vcn_v5_0_0_set_unified_ring_funcs(struct 
> amdgpu_device *adev)
>  /**
>   * vcn_v5_0_0_is_idle - check VCN block is idle
>   *
> - * @handle: amdgpu_device pointer
> + * @ip_block: Pointer to the amdgpu_ip_block structure
>   *
>   * Check whether VCN block is idle
>   */
> --
> 2.34.1
>


Re: [PATCH] drm/amdgpu: Fix parameter annotations for VCN clock gating functions

2025-02-27 Thread Alex Deucher
Reviewed-by: Alex Deucher 

On Tue, Feb 25, 2025 at 8:43 PM Srinivasan Shanmugam
 wrote:
>
> The previous references to a non-existent `adev` parameter have been
> removed & corrected to reflect the use of the `vinst` pointer, which
> points to the VCN instance structure, in the below files:
>
> - vcn_v1_0.c
> - vcn_v2_0.c
> - vcn_v3_0.c
>
> Fixes the below with gcc W=1:
> drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:624: warning: Function parameter or 
> struct member 'vinst' not described in 'vcn_v1_0_enable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:624: warning: Excess function parameter 
> 'adev' description in 'vcn_v1_0_enable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c:376: warning: Function parameter or 
> struct member 'vinst' not described in 'vcn_v2_0_mc_resume'
> drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c:376: warning: Excess function parameter 
> 'adev' description in 'vcn_v2_0_mc_resume'
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:776: warning: Function parameter or 
> struct member 'vinst' not described in 'vcn_v3_0_disable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:776: warning: Excess function parameter 
> 'adev' description in 'vcn_v3_0_disable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:776: warning: Excess function parameter 
> 'inst' description in 'vcn_v3_0_disable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:965: warning: Function parameter or 
> struct member 'vinst' not described in 'vcn_v3_0_enable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:965: warning: Excess function parameter 
> 'adev' description in 'vcn_v3_0_enable_clock_gating'
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:965: warning: Excess function parameter 
> 'inst' description in 'vcn_v3_0_enable_clock_gating'
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 6 ++
>  3 files changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index 8bad63282de4..21b57c29bf7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -616,7 +616,7 @@ static void vcn_v1_0_disable_clock_gating(struct 
> amdgpu_vcn_inst *vinst)
>  /**
>   * vcn_v1_0_enable_clock_gating - enable VCN clock gating
>   *
> - * @adev: amdgpu_device pointer
> + * @vinst: Pointer to the VCN instance structure
>   *
>   * Enable clock gating for VCN block
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
> index f53feb60772e..8e7a36f26e9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
> @@ -368,7 +368,7 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block 
> *ip_block)
>  /**
>   * vcn_v2_0_mc_resume - memory controller programming
>   *
> - * @adev: amdgpu_device pointer
> + * @vinst: Pointer to the VCN instance structure
>   *
>   * Let the VCN memory controller know it's offsets
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> index a3f16fd69927..22ae1939476f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> @@ -767,8 +767,7 @@ static void vcn_v3_0_enable_static_power_gating(struct 
> amdgpu_vcn_inst *vinst)
>  /**
>   * vcn_v3_0_disable_clock_gating - disable VCN clock gating
>   *
> - * @adev: amdgpu_device pointer
> - * @inst: instance number
> + * @vinst: Pointer to the VCN instance structure
>   *
>   * Disable clock gating for VCN block
>   */
> @@ -956,8 +955,7 @@ static void vcn_v3_0_clock_gating_dpg_mode(struct 
> amdgpu_vcn_inst *vinst,
>  /**
>   * vcn_v3_0_enable_clock_gating - enable VCN clock gating
>   *
> - * @adev: amdgpu_device pointer
> - * @inst: instance number
> + * @vinst: Pointer to the VCN instance structure
>   *
>   * Enable clock gating for VCN block
>   */
> --
> 2.34.1
>


Re: [PATCH v3] drm/amdgpu: fix the memleak caused by fence not released

2025-02-27 Thread Christian König
Am 27.02.25 um 16:08 schrieb Yadav, Arvind:
>
> On 2/27/2025 7:55 PM, Christian König wrote:
>>
>> Am 18.02.25 um 15:53 schrieb Arvind Yadav:
>>> Encountering a taint issue during the unloading of gpu_sched
>>> due to the fence not being released/put. In this context,
>>> amdgpu_vm_clear_freed is responsible for creating a job to
>>> update the page table (PT). It allocates kmem_cache for
>>> drm_sched_fence and returns the finished fence associated
>>> with job->base.s_fence. In case of Usermode queue this finished
>>> fence is added to the timeline sync object through
>>> amdgpu_gem_update_bo_mapping, which is utilized by user
>>> space to ensure the completion of the PT update.
>>>
>>> [  508.900587] 
>>> =
>>> [  508.900605] BUG drm_sched_fence (Tainted: G N): Objects 
>>> remaining in drm_sched_fence on __kmem_cache_shutdown()
>>> [  508.900617] 
>>> -
>>>
>>> [  508.900627] Slab 0xe0cc04548780 objects=32 used=2 
>>> fp=0x8ea81521f000 
>>> flags=0x17c240(workingset|head|node=0|zone=2|lastcpupid=0x1f)
>>> [  508.900645] CPU: 3 UID: 0 PID: 2337 Comm: rmmod Tainted: G   
>>>   N 6.12.0+ #1
>>> [  508.900651] Tainted: [N]=TEST
>>> [  508.900653] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS 
>>> ELITE/X570 AORUS ELITE, BIOS F34 06/10/2021
>>> [  508.900656] Call Trace:
>>> [  508.900659]  
>>> [  508.900665]  dump_stack_lvl+0x70/0x90
>>> [  508.900674]  dump_stack+0x14/0x20
>>> [  508.900678]  slab_err+0xcb/0x110
>>> [  508.900687]  ? srso_return_thunk+0x5/0x5f
>>> [  508.900692]  ? try_to_grab_pending+0xd3/0x1d0
>>> [  508.900697]  ? srso_return_thunk+0x5/0x5f
>>> [  508.900701]  ? mutex_lock+0x17/0x50
>>> [  508.900708]  __kmem_cache_shutdown+0x144/0x2d0
>>> [  508.900713]  ? flush_rcu_work+0x50/0x60
>>> [  508.900719]  kmem_cache_destroy+0x46/0x1f0
>>> [  508.900728]  drm_sched_fence_slab_fini+0x19/0x970 [gpu_sched]
>>> [  508.900736]  __do_sys_delete_module.constprop.0+0x184/0x320
>>> [  508.900744]  ? srso_return_thunk+0x5/0x5f
>>> [  508.900747]  ? debug_smp_processor_id+0x1b/0x30
>>> [  508.900754]  __x64_sys_delete_module+0x16/0x20
>>> [  508.900758]  x64_sys_call+0xdf/0x20d0
>>> [  508.900763]  do_syscall_64+0x51/0x120
>>> [  508.900769]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>>
>>> v2: call dma_fence_put in amdgpu_gem_va_update_vm
>>> v3: Addressed review comments from Christian.
>>>  - calling amdgpu_gem_update_timeline_node before switch.
>>>  - puting a dma_fence in case of error or !timeline_syncobj.
>>>
>>> Cc: Alex Deucher 
>>> Cc: Christian König 
>>> Cc: Shashank Sharma 
>>> Cc: Sunil Khatri 
>>> Signed-off-by: Le Ma 
>>> Signed-off-by: Arvind Yadav 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 +
>>>   1 file changed, 17 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index 8b67aae6c2fe..40522b4f331b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -125,9 +125,6 @@ amdgpu_gem_update_bo_mapping(struct drm_file *filp,
>>>   struct amdgpu_vm *vm = &fpriv->vm;
>>>   struct dma_fence *last_update;
>>>   -    if (!syncobj)
>>> -    return;
>>> -
>>>   /* Find the last update fence */
>>>   switch (operation) {
>>>   case AMDGPU_VA_OP_MAP:
>>> @@ -839,6 +836,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
>>> *data,
>>>   struct drm_exec exec;
>>>   uint64_t va_flags;
>>>   uint64_t vm_size;
>>> +    int syncobj_status;
>>>   int r = 0;
>>>     if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
>>> @@ -931,6 +929,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
>>> *data,
>>>   bo_va = NULL;
>>>   }
>>>   +    syncobj_status = amdgpu_gem_update_timeline_node(filp,
>>> +    args->vm_timeline_syncobj_out,
>>> +    args->vm_timeline_point,
>>> +    &timeline_syncobj,
>>> +    &timeline_chain);
>>> +
>> You don't need syncobj_status here, just assign the return value to r and 
>> abort if we can't find any syncobj.
>
> I have not used variable 'r' because below inside switch(args->operation) the 
> 'r' value will be reinitialized and the return value of 
> amdgpu_gem_update_timeline_node will not be used. Here, we cannot abort 
> because syncobj will be NULL for Non-UQ.

No, no that's wrong.

That timeline_syncobj is NULL is not an error. In other words when 
args->vm_timeline_syncobj_out == 0 then amdgpu_gem_update_timeline_node() 
should just set timeline_syncobj=NULL and return 0.

The error happens only if either args->vm_timeline_syncobj_out has a handler we 
can't find or if we fail to allocate memory for the timeline_chain.

In this case t

Re: [PATCH v3] drm/amdgpu: fix the memleak caused by fence not released

2025-02-27 Thread Yadav, Arvind



On 2/27/2025 9:12 PM, Christian König wrote:

No, no that's wrong.

That timeline_syncobj is NULL is not an error. In other words when 
args->vm_timeline_syncobj_out == 0 then amdgpu_gem_update_timeline_node() 
should just set timeline_syncobj=NULL and return 0.

The error happens only if either args->vm_timeline_syncobj_out has a handler we 
can't find or if we fail to allocate memory for the timeline_chain.

In this case the return value should be EINVAl or ENOMEM and then we absolutely 
should abort the operation.

Noted.
Thank you,
Arvind


Re: [PATCH 3/3] drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA

2025-02-27 Thread Christian König
Am 27.02.25 um 12:47 schrieb jesse.zh...@amd.com:
> This commit updates the VM flush implementation for the SDMA engine.
>
> - Added a new function `sdma_v4_4_2_get_invalidate_req` to construct the 
> VM_INVALIDATE_ENG0_REQ
>   register value for the specified VMID and flush type. This function ensures 
> that all relevant
>   page table cache levels (L1 PTEs, L2 PTEs, and L2 PDEs) are invalidated.
>
> - Modified the `sdma_v4_4_2_ring_emit_vm_flush` function to use the new 
> `sdma_v4_4_2_get_invalidate_req`
>   function. The updated function emits the necessary register writes and 
> waits to perform a VM flush
>   for the specified VMID. It updates the PTB address registers and issues a 
> VM invalidation request
>   using the specified VM invalidation engine.
>
> - Included the necessary header file `gc/gc_9_0_sh_mask.h` to provide access 
> to the required register
>   definitions.
>
> v2: vm flush by the vm inalidation packet (Lijo)
>
> Suggested-by: Lijo Lazar 
> Signed-off-by: Jesse Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 81 
>  1 file changed, 67 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index ba43c8f46f45..a9e46a4ed7a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -31,6 +31,7 @@
>  #include "amdgpu_ucode.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_reset.h"
> +#include "gc/gc_9_0_sh_mask.h"
>  
>  #include "sdma/sdma_4_4_2_offset.h"
>  #include "sdma/sdma_4_4_2_sh_mask.h"
> @@ -1292,21 +1293,75 @@ static void 
> sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  seq, 0x, 4);
>  }
>  
> -
> -/**
> - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
> +/*
> + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ 
> register value
> + * @vmid: The VMID to invalidate
> + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = 
> heavyweight)
>   *
> - * @ring: amdgpu_ring pointer
> - * @vmid: vmid number to use
> - * @pd_addr: address
> + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for 
> the specified VMID
> + * and flush type. It ensures that all relevant page table cache levels (L1 
> PTEs, L2 PTEs, and
> + * L2 PDEs) are invalidated.
> + */
> +static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid,
> + uint32_t flush_type)
> +{
> + u32 req = 0;
> +
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
> + PER_VMID_INVALIDATE_REQ, 1 << vmid);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 
> flush_type);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
> + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
> +
> + return req;
> +}
> +/* The vm validate packet is only available for GC9.4.3/GC9.4.4/GC9.5.0 */
> +#define SDMA_OP_VM_INVALIDATE 0x8
> +#define SDMA_SUBOP_VM_INVALIDATE 0x4

That needs to be in a header.

> +
> +/*
> + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA
> + * @ring: The SDMA ring
> + * @vmid: The VMID to flush
> + * @pd_addr: The page directory address
>   *
> - * Update the page table base and flush the VM TLB
> - * using sDMA.
> + * This function emits the necessary register writes and waits to perform a 
> VM flush for the
> + * specified VMID. It updates the PTB address registers and issues a VM 
> invalidation request
> + * using the specified VM invalidation engine.
>   */
>  static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
> -  unsigned vmid, uint64_t pd_addr)
> + unsigned int vmid, uint64_t pd_addr)
>  {
> - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
> + struct amdgpu_device *adev = ring->adev;
> + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0);
> + unsigned int eng = ring->vm_inv_eng;
> + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
> +

> + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
> +  (hub->ctx_addr_distance * vmid),
> +  lower_32_bits(pd_addr));
> +
> +amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
> +  (hub->ctx_addr_distance * vmid),
> +  upper_32_bits(pd_addr));

That is unecessary.

> + /*
> +  * Construct and emit the VM 

Re: [PATCH 2/3 v5] drm/amdgpu: Optimize VM invalidation engine allocation and synchronize GPU TLB flush

2025-02-27 Thread Christian König
Am 27.02.25 um 12:47 schrieb jesse.zh...@amd.com:
> From: "jesse.zh...@amd.com" 
>
> - Modify the VM invalidation engine allocation logic to handle SDMA page 
> rings.
>   SDMA page rings now share the VM invalidation engine with SDMA gfx rings 
> instead of
>   allocating a separate engine. This change ensures efficient resource 
> management and
>   avoids the issue of insufficient VM invalidation engines.
>
> - Add synchronization for GPU TLB flush operations in gmc_v9_0.c.
>   Use spin_lock and spin_unlock to ensure thread safety and prevent race 
> conditions
>   during TLB flush operations. This improves the stability and reliability of 
> the driver,
>   especially in multi-threaded environments.
>
>  v2: replace the sdma ring check with a function `amdgpu_sdma_is_page_queue`
>  to check if a ring is an SDMA page queue.(Lijo)
>
>  v3: Add GC version check, only enabled on GC9.4.3/9.4.4/9.5.0

This needs to be the last patch in the series and not the second. Otherwise you 
have a broken state in between.

>
> Suggested-by: Lijo Lazar 
> Signed-off-by: Jesse Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  7 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 23 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>  3 files changed, 31 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index c6e5c50a3322..68088d731c23 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -602,8 +602,15 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device 
> *adev)
>   return -EINVAL;
>   }
>  
> + if(amdgpu_sdma_is_shared_inv_eng(adev, ring)) {
> + /* Do not allocate a separate VM invalidation engine for SDMA 
> page rings.
> +  * Shared VM invalid engine with sdma gfx ring.
> +  */

First of all that comment has style issues, please use checkpatch.pl.

Then you need to describe why that is done and what are the pre-requisites to 
make it work.

E.g. something like "SDMA has a special packet which allows it to use the same 
invalidation engine for all the rings in one instance."

Christian.

> + ring->vm_inv_eng = inv_eng - 1;
> + } else {
>   ring->vm_inv_eng = inv_eng - 1;
>   vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
> + }
>  
>   dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
>ring->name, ring->vm_inv_eng, ring->vm_hub);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 39669f8788a7..019f670edc29 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -504,6 +504,29 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct 
> amdgpu_device *adev)
>   }
>  }
>  
> +/**
> +* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that 
> shares a VM invalidation engine
> +* @adev: Pointer to the AMDGPU device structure
> +* @ring: Pointer to the ring structure to check
> +*
> +* This function checks if the given ring is an SDMA ring that shares a VM 
> invalidation engine.
> +* It returns true if the ring is such an SDMA ring, false otherwise.
> +*/
> +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct 
> amdgpu_ring* ring)
> +{
> + int i = ring->me;
> +
> + if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
> + return false;
> +
> + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
> + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
> + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
> + return (ring == &adev->sdma.instance[i].ring);
> + else
> + return false;
> +}
> +
>  /**
>   * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
>   * @funcs: Pointer to the callback structure containing pre_reset and 
> post_reset functions
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 965169320065..dcc8fd7a6784 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -194,4 +194,5 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
>  void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
>  int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
>  void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
> +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct 
> amdgpu_ring* ring);
>  #endif



Re: [PATCH v2] drm/amdkfd: Fix instruction hazard in gfx12 trap handler

2025-02-27 Thread Jay Cornwall

On 2/7/2025 17:38, Jay Cornwall wrote:


VALU instructions with SGPR source need wait states to avoid hazard
with SALU using different SGPR.

v2: Eliminate some hazards to reduce code explosion

Signed-off-by: Jay Cornwall 
Cc: Lancelot Six 


Ping.


[PATCH 6.1.y] drm/amd/display: fixed integer types and null check locations

2025-02-27 Thread jianqi.ren.cn
From: Sohaib Nadeem 

[ Upstream commit 0484e05d048b66d01d1f3c1d2306010bb57d8738 ]

[why]:
issues fixed:
- comparison with wider integer type in loop condition which can cause
infinite loops
- pointer dereference before null check

Cc: Mario Limonciello 
Cc: Alex Deucher 
Cc: sta...@vger.kernel.org
Reviewed-by: Josip Pavic 
Acked-by: Aurabindo Pillai 
Signed-off-by: Sohaib Nadeem 
Tested-by: Daniel Wheeler 
Signed-off-by: Alex Deucher 
[ To fix CVE-2024-26767, delete changes made in 
drivers/gpu/drm/amd/display/dc/link/link_validation.c
 for this file is deleted in linux-6.1 ]
Signed-off-by: Jianqi Ren 
Signed-off-by: He Zhe 
---
Verified the build test.
---
 .../gpu/drm/amd/display/dc/bios/bios_parser2.c   | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 4d2590964a20..75e44d8a7b40 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1862,19 +1862,21 @@ static enum bp_result get_firmware_info_v3_2(
/* Vega12 */
smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
DATA_TABLES(smu_info));
-   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): 
%d\n", smu_info_v3_2->gpuclk_ss_percentage);
if (!smu_info_v3_2)
return BP_RESULT_BADBIOSTABLE;
 
+   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): 
%d\n", smu_info_v3_2->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz 
* 10;
} else if (revision.minor == 3) {
/* Vega20 */
smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
DATA_TABLES(smu_info));
-   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): 
%d\n", smu_info_v3_3->gpuclk_ss_percentage);
if (!smu_info_v3_3)
return BP_RESULT_BADBIOSTABLE;
 
+   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): 
%d\n", smu_info_v3_3->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz 
* 10;
}
 
@@ -2439,10 +2441,11 @@ static enum bp_result get_integrated_info_v11(
info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
DATA_TABLES(integratedsysteminfo));
 
-   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", 
info_v11->gpuclk_ss_percentage);
if (info_v11 == NULL)
return BP_RESULT_BADBIOSTABLE;
 
+   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", 
info_v11->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v11->gpucapinfo);
/*
@@ -2654,11 +2657,12 @@ static enum bp_result get_integrated_info_v2_1(
 
info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
DATA_TABLES(integratedsysteminfo));
-   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", 
info_v2_1->gpuclk_ss_percentage);
 
if (info_v2_1 == NULL)
return BP_RESULT_BADBIOSTABLE;
 
+   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", 
info_v2_1->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_1->gpucapinfo);
/*
@@ -2816,11 +2820,11 @@ static enum bp_result get_integrated_info_v2_2(
info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
DATA_TABLES(integratedsysteminfo));
 
-   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", 
info_v2_2->gpuclk_ss_percentage);
-
if (info_v2_2 == NULL)
return BP_RESULT_BADBIOSTABLE;
 
+   DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", 
info_v2_2->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_2->gpucapinfo);
/*
-- 
2.25.1



[PATCH 3/3] drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA

2025-02-27 Thread jesse.zh...@amd.com
This commit updates the VM flush implementation for the SDMA engine.

- Added a new function `sdma_v4_4_2_get_invalidate_req` to construct the 
VM_INVALIDATE_ENG0_REQ
  register value for the specified VMID and flush type. This function ensures 
that all relevant
  page table cache levels (L1 PTEs, L2 PTEs, and L2 PDEs) are invalidated.

- Modified the `sdma_v4_4_2_ring_emit_vm_flush` function to use the new 
`sdma_v4_4_2_get_invalidate_req`
  function. The updated function emits the necessary register writes and waits 
to perform a VM flush
  for the specified VMID. It updates the PTB address registers and issues a VM 
invalidation request
  using the specified VM invalidation engine.

- Included the necessary header file `gc/gc_9_0_sh_mask.h` to provide access to 
the required register
  definitions.

v2: vm flush by the vm inalidation packet (Lijo)

Suggested-by: Lijo Lazar 
Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 81 
 1 file changed, 67 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index ba43c8f46f45..a9e46a4ed7a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -31,6 +31,7 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_reset.h"
+#include "gc/gc_9_0_sh_mask.h"
 
 #include "sdma/sdma_4_4_2_offset.h"
 #include "sdma/sdma_4_4_2_sh_mask.h"
@@ -1292,21 +1293,75 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct 
amdgpu_ring *ring)
   seq, 0x, 4);
 }
 
-
-/**
- * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
+/*
+ * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ 
register value
+ * @vmid: The VMID to invalidate
+ * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = 
heavyweight)
  *
- * @ring: amdgpu_ring pointer
- * @vmid: vmid number to use
- * @pd_addr: address
+ * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the 
specified VMID
+ * and flush type. It ensures that all relevant page table cache levels (L1 
PTEs, L2 PTEs, and
+ * L2 PDEs) are invalidated.
+ */
+static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid,
+   uint32_t flush_type)
+{
+   u32 req = 0;
+
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+   PER_VMID_INVALIDATE_REQ, 1 << vmid);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 
flush_type);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+   CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+   return req;
+}
+/* The vm validate packet is only available for GC9.4.3/GC9.4.4/GC9.5.0 */
+#define SDMA_OP_VM_INVALIDATE 0x8
+#define SDMA_SUBOP_VM_INVALIDATE 0x4
+
+/*
+ * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA
+ * @ring: The SDMA ring
+ * @vmid: The VMID to flush
+ * @pd_addr: The page directory address
  *
- * Update the page table base and flush the VM TLB
- * using sDMA.
+ * This function emits the necessary register writes and waits to perform a VM 
flush for the
+ * specified VMID. It updates the PTB address registers and issues a VM 
invalidation request
+ * using the specified VM invalidation engine.
  */
 static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
-unsigned vmid, uint64_t pd_addr)
+   unsigned int vmid, uint64_t pd_addr)
 {
-   amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+   struct amdgpu_device *adev = ring->adev;
+   uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0);
+   unsigned int eng = ring->vm_inv_eng;
+   struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
+
+   amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+  (hub->ctx_addr_distance * vmid),
+  lower_32_bits(pd_addr));
+
+amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+  (hub->ctx_addr_distance * vmid),
+  upper_32_bits(pd_addr));
+   /*
+* Construct and emit the VM invalidation packet:
+* DW0: OP, Sub OP, Engine IDs (XCC0, XCC1, MMHUB)
+* DW1: Invalidation request
+* DW2: Lower 32 bits of page directory address
+* DW3: Upper 32 bits of page directory address and INVALIDATEACK
+*/
+   amdgpu_ring_write(ring, SD

[PATCH 2/3 v5] drm/amdgpu: Optimize VM invalidation engine allocation and synchronize GPU TLB flush

2025-02-27 Thread jesse.zh...@amd.com
From: "jesse.zh...@amd.com" 

- Modify the VM invalidation engine allocation logic to handle SDMA page rings.
  SDMA page rings now share the VM invalidation engine with SDMA gfx rings 
instead of
  allocating a separate engine. This change ensures efficient resource 
management and
  avoids the issue of insufficient VM invalidation engines.

- Add synchronization for GPU TLB flush operations in gmc_v9_0.c.
  Use spin_lock and spin_unlock to ensure thread safety and prevent race 
conditions
  during TLB flush operations. This improves the stability and reliability of 
the driver,
  especially in multi-threaded environments.

 v2: replace the sdma ring check with a function `amdgpu_sdma_is_page_queue`
 to check if a ring is an SDMA page queue.(Lijo)

 v3: Add GC version check, only enabled on GC9.4.3/9.4.4/9.5.0

Suggested-by: Lijo Lazar 
Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 23 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
 3 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c6e5c50a3322..68088d731c23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -602,8 +602,15 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
+   if(amdgpu_sdma_is_shared_inv_eng(adev, ring)) {
+   /* Do not allocate a separate VM invalidation engine for SDMA 
page rings.
+* Shared VM invalid engine with sdma gfx ring.
+*/
+   ring->vm_inv_eng = inv_eng - 1;
+   } else {
ring->vm_inv_eng = inv_eng - 1;
vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
+   }
 
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
 ring->name, ring->vm_inv_eng, ring->vm_hub);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 39669f8788a7..019f670edc29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -504,6 +504,29 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct 
amdgpu_device *adev)
}
 }
 
+/**
+* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares 
a VM invalidation engine
+* @adev: Pointer to the AMDGPU device structure
+* @ring: Pointer to the ring structure to check
+*
+* This function checks if the given ring is an SDMA ring that shares a VM 
invalidation engine.
+* It returns true if the ring is such an SDMA ring, false otherwise.
+*/
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct 
amdgpu_ring* ring)
+{
+   int i = ring->me;
+
+   if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
+   return false;
+
+   if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+   amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+   amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+   return (ring == &adev->sdma.instance[i].ring);
+   else
+   return false;
+}
+
 /**
  * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
  * @funcs: Pointer to the callback structure containing pre_reset and 
post_reset functions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 965169320065..dcc8fd7a6784 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -194,4 +194,5 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
 int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
 void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct 
amdgpu_ring* ring);
 #endif
-- 
2.25.1



[PATCH 1/3 v5] drm/amd/amdgpu: Increase max rings to enable SDMA page ring

2025-02-27 Thread jesse.zh...@amd.com
From: "jesse.zh...@amd.com" 

Increase the maximum number of rings supported by the AMDGPU driver from 132 to 
148.
This change is necessary to enable support for the SDMA page ring.

Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 52f7a9a79e7b..4224f8fa1614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -37,7 +37,7 @@ struct amdgpu_job;
 struct amdgpu_vm;
 
 /* max number of rings */
-#define AMDGPU_MAX_RINGS   132
+#define AMDGPU_MAX_RINGS   148
 #define AMDGPU_MAX_HWIP_RINGS  64
 #define AMDGPU_MAX_GFX_RINGS   2
 #define AMDGPU_MAX_SW_GFX_RINGS 2
-- 
2.25.1



[PATCH] amdgpu: add env support for amdgpu.ids path

2025-02-27 Thread Sergio Costas Rodriguez
In some cases, like when building a Snap application that uses
libdrm, the `amdgpu.ids` file isn't directly available at the
compiling place, but inside a mounted folder. This forces each
application to link/bind the file from the current place
(usually at the $SNAP/gnome-platform/usr/share/libdrm/amdgpu.ids)
which is cumbersome.

This patch allows to set an environment variable, called
AMDGPU_ASIC_ID_TABLE_PATH, where the file will be also searched
if it isn't located in the default, meson-configured, path.

This patch is also available in the MESA gitlab:
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/273
---
 README.rst  |  9 +
 amdgpu/amdgpu_asic_id.c | 14 ++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 74608031..6532c12f 100644
--- a/README.rst
+++ b/README.rst
@@ -49,3 +49,12 @@ Then use ninja to build and install:
 
 If you are installing into a system location you will need to run install
 separately, and as root.
+
+AMDGPU ASIC table file
+--
+
+The AMDGPU driver requires the `amdgpu.ids` file. It is usually located at
+`$PREFIX/share/libdrm`, but it is possible to specify an alternative path
+during runtime by setting the `AMDGPU_ASIC_ID_TABLE_PATH` environment
+variable with the full path (including the file name) of the alternative
+file.
diff --git a/amdgpu/amdgpu_asic_id.c b/amdgpu/amdgpu_asic_id.c
index a5007ffc..81a7cf7f 100644
--- a/amdgpu/amdgpu_asic_id.c
+++ b/amdgpu/amdgpu_asic_id.c
@@ -112,10 +112,16 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
ssize_t n;
int line_num = 1;
int r = 0;
+   const char *amdgpu_asic_id_table_path = 
getenv("AMDGPU_ASIC_ID_TABLE_PATH");
+
+   if (amdgpu_asic_id_table_path == NULL || amdgpu_asic_id_table_path[0] 
== '\0') {
+   amdgpu_asic_id_table_path = AMDGPU_ASIC_ID_TABLE;
+   }
+
+   fp = fopen(amdgpu_asic_id_table_path, "r");
 
-   fp = fopen(AMDGPU_ASIC_ID_TABLE, "r");
if (!fp) {
-   fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
+   fprintf(stderr, "%s: %s\n", amdgpu_asic_id_table_path,
strerror(errno));
return;
}
@@ -132,7 +138,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
continue;
}
 
-   drmMsg("%s version: %s\n", AMDGPU_ASIC_ID_TABLE, line);
+   drmMsg("%s version: %s\n", amdgpu_asic_id_table_path, line);
break;
}
 
@@ -150,7 +156,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
 
if (r == -EINVAL) {
fprintf(stderr, "Invalid format: %s: line %d: %s\n",
-   AMDGPU_ASIC_ID_TABLE, line_num, line);
+   amdgpu_asic_id_table_path, line_num, line);
} else if (r && r != -EAGAIN) {
fprintf(stderr, "%s: Cannot parse ASIC IDs: %s\n",
__func__, strerror(-r));
-- 
2.45.2



RE: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset

2025-02-27 Thread Liang, Prike
[AMD Official Use Only - AMD Internal Distribution Only]

Please review the series patch to catch up the gfx latest base and to avoid the 
commit merged problem.

Regards,
  Prike

> -Original Message-
> From: Liang, Prike 
> Sent: Friday, February 21, 2025 9:01 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Liang, Prike
> 
> Subject: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe
> reset
>
> Implement the GFX12 KCQ pipe reset, and disable the GFX12 kernel compute
> queue until the CPFW fully supports it.
>
> Signed-off-by: Prike Liang 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 90 +-
>  1 file changed, 88 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 79ae7d538844..103298938d22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5352,6 +5352,90 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring
> *ring, unsigned int vmid)
>   return amdgpu_ring_test_ring(ring);
>  }
>
> +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) {
> +
> + struct amdgpu_device *adev = ring->adev;
> + uint32_t reset_pipe = 0, clean_pipe = 0;
> + int r;
> +
> + if (!gfx_v12_pipe_reset_support(adev))
> + return -EOPNOTSUPP;
> +
> + gfx_v12_0_set_safe_mode(adev, 0);
> + mutex_lock(&adev->srbm_mutex);
> + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> +
> + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> + clean_pipe = reset_pipe;
> +
> + if (adev->gfx.rs64_enable) {
> +
> + switch (ring->pipe) {
> + case 0:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE0_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE0_RESET, 0);
> + break;
> + case 1:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE1_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE1_RESET, 0);
> + break;
> + case 2:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE2_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE2_RESET, 0);
> + break;
> + case 3:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE3_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> +MEC_PIPE3_RESET, 0);
> + break;
> + default:
> + break;
> + }
> + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR)
> << 2) - RS64_FW_UC_START_ADDR_LO;
> + } else {
> + switch (ring->pipe) {
> + case 0:
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> +MEC_ME1_PIPE0_RESET,
> 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_CNTL,
> +MEC_ME1_PIPE0_RESET,
> 0);
> + break;
> + case 1:
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> +MEC_ME1_PIPE1_RESET,
> 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_CNTL,
> +MEC_ME1_PIPE1_RESET,
> 0);
> + break;
> + default:
> + break;
> + }
> + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> + /* Doesn't find the F32 MEC instruction pointer register, and
> suppose
> +  * the driver won't run into the F32 mode.
> +  */
> + }
> +
> + soc24_grbm_select(adev, 0, 0, 0, 0);
> + mutex_unlock(&adev->srbm_mutex);
> + gfx_v12_0_unset_safe_mode(adev, 0);
> +
> + dev_info(adev->dev,"The ring %s pipe resets: %s\n", ring->name,
> + r == 0 ? "successfully" : "failed");
> + /* Need the ring test to ver

[PATCH] drm/amdgpu: Set PG state to gating for vcn_v_5_0_1

2025-02-27 Thread Asad Kamal
For vcn_v_5_0_1, set power state to gating during hw fini. Also there may
be scenario where VCN engine hangs during a job execution, then it's not
safe to assume that set_pg_state works fine during hw_fini to put the state
to gated. After a reset, we can assume that it's in the default state,
therefore reset the driver maintained state. Put the default state as gated
during reset as per this assumption.

Signed-off-by: Asad Kamal 
Suggested-by: Lijo Lazar 
Reviewed-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 900702b1a3bb..0273157c2bfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -224,8 +224,13 @@ static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block 
*ip_block)
struct amdgpu_device *adev = ip_block->adev;
int i;
 
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+   if (vinst->cur_state != AMD_PG_STATE_GATE)
+   vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+   }
 
return 0;
 }
@@ -268,6 +273,11 @@ static int vcn_v5_0_1_resume(struct amdgpu_ip_block 
*ip_block)
int r, i;
 
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+   struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+   if (amdgpu_in_reset(adev))
+   vinst->cur_state = AMD_PG_STATE_GATE;
+
r = amdgpu_vcn_resume(ip_block->adev, i);
if (r)
return r;
-- 
2.46.0