Hi David,

Thanks for review!  My answer is in-line.

Best Regards!

James zhu

On 2026-03-17 15:19, Francis, David wrote:
Have skimmed this patch set. Generally looks normal; another hardware block 
with the usual functionality.
If these pass basic tests, they're probably fine or at least not harmful.
Some general comments:
- The commit descriptions aren't very descriptive. I'd at least like to see 
documentation of the ioctl interface and a brief notice of what SPM is 
somewhere, and some more words on what each patch is doing
[JZ] Sure I will add more descriptions.
- Patch 5/17 adds a way to allocate GTT-type memory through amdgpu_vm. This 
seems sort of out of place with the rest of the patch set. Why is it here? And 
why can't we just use the existing amdgpu_gtt_mgr?
[JZ] I don't think we have existing API to invoke. Originally is it from amdgpu_amdkfd_alloc_kernel_mem
- Patch 14/17 reserves extra space and puts magic numbers in it to avoid a page 
fault. This seems like a workaround for something; at very least I'd like the 
commit description to describe the problem this is solving in detail. I'd also 
prefer if there was a solution that didn't involve poisoning entries with magic 
numbers and checking possibly invalid memory.\

[JZ] Yes, it is HW bug. So basically the solution is WA. I will add more description. This solution worked out with other teams' engineer together. the memory space was  allocated properly. But HW's wrong logical for ring buffer wrapping around caused overflow, and overwrote this allocated adjacent space with unexpected data randomly.

Thanks,
David

________________________________________
From: amd-gfx<[email protected]> on behalf of James 
Zhu<[email protected]>
Sent: Friday, February 20, 2026 2:22 PM
To:[email protected]; Deucher, Alexander
Cc: Ma, Bing; Zhu, James
Subject: [PATCH 17/17] drm/amdgpu: add profiler/spm support for gfx9_4_3

with spm function interface and spm irq.

Signed-off-by: James Zhu<[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 194 ++++++++++++++++++++++--
  1 file changed, 183 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 44b07785bf9c..29fd5e2413da 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -169,6 +169,8 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device 
*adev);
  static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev);
  static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
                                 struct amdgpu_cu_info *cu_info);
+static void gfx_v9_4_3_update_spm_vmid_internal(struct amdgpu_device *adev,
+                                             int xcc_id, unsigned int vmid);
  static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int 
xcc_id);
  static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int 
xcc_id);

@@ -1065,6 +1067,13 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block 
*ip_block)

         num_xcc = NUM_XCC(adev->gfx.xcc_mask);

+       /* SPM */
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_RLC,
+                             GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT,
+                             &adev->gfx.spm_irq);
+       if (r)
+               return r;
+
         /* EOP Event */
         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 
GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
         if (r)
@@ -1453,10 +1462,14 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)

  static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
  {
-       /* init spm vmid with 0xf */
-       if (adev->gfx.rlc.funcs->update_spm_vmid)
-               adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
+       int i, num_xcc;
+
+       if (amdgpu_sriov_vf(adev))
+               return 0;

+       num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+       for (i = 0; i < num_xcc; i++)
+               adev->gfx.rlc.funcs->update_spm_vmid(adev, i, NULL, 0xf);
         return 0;
  }

@@ -1631,14 +1644,15 @@ static int gfx_v9_4_3_xcc_rlc_resume(struct 
amdgpu_device *adev, int xcc_id)
  {
         int r;

+       gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-               gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
                 /* legacy rlc firmware loading */
                 r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id);
                 if (r)
                         return r;
-               gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
         }
+       gfx_v9_4_3_update_spm_vmid_internal(adev, xcc_id, 0xf);
+       gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);

         amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
         /* disable CG */
@@ -1666,28 +1680,38 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device 
*adev)
         return 0;
  }

-static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev,
-                                             int inst, struct amdgpu_ring 
*ring, unsigned int vmid)
+static void gfx_v9_4_3_update_spm_vmid_internal(struct amdgpu_device *adev,
+                                             int xcc_id, unsigned int vmid)
  {
         u32 reg, pre_data, data;

-       reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL);
+       reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
         if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
                 pre_data = RREG32_NO_KIQ(reg);
         else
-               pre_data = RREG32(reg);
+               pre_data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_MC_CNTL);

         data =  pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;

         if (pre_data != data) {
                 if (amdgpu_sriov_is_pp_one_vf(adev) && 
!amdgpu_sriov_runtime(adev)) {
-                       WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, inst), 
regRLC_SPM_MC_CNTL, data);
+                       WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_MC_CNTL, data);
                 } else
-                       WREG32_SOC15(GC, GET_INST(GC, inst), 
regRLC_SPM_MC_CNTL, data);
+                       WREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_MC_CNTL, data);
         }
  }

+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
+               struct amdgpu_ring *ring, unsigned int vmid)
+{
+       amdgpu_gfx_off_ctrl(adev, false);
+
+       gfx_v9_4_3_update_spm_vmid_internal(adev, xcc_id, vmid);
+
+       amdgpu_gfx_off_ctrl(adev, true);
+}
+
  static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
         {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)},
         {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)},
@@ -2373,6 +2397,7 @@ static int gfx_v9_4_3_hw_fini(struct amdgpu_ip_block 
*ip_block)
         int i, num_xcc;

         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0);
         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
         amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);

@@ -2507,12 +2532,112 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct 
amdgpu_ring *ring,
                                    (1 << (oa_size + oa_base)) - (1 << 
oa_base));
  }

+static void gfx_v9_4_3_spm_start(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
+       uint32_t data = 0;
+
+       data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SPM_PERFMON_CNTL);
+       data |= RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE_MASK;
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_PERFMON_CNTL), data);
+
+       data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE,
+                       CP_PERFMON_STATE_DISABLE_AND_RESET);
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regCP_PERFMON_CNTL), data);
+
+       /* When SPM is reset, RLC automatically resets wptr to 0.
+        * Manually reset rptr to match this.
+        */
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_RING_RDPTR), 0);
+
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_INT_CNTL), 1);
+
+       data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL);
+       data |= RLC_CLK_CNTL__RLC_SPM_CLK_CNTL_MASK;
+       WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data);
+}
+
+static void gfx_v9_4_3_spm_stop(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
+       uint32_t data = 0;
+
+       data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL);
+       data &= (~RLC_CLK_CNTL__RLC_SPM_CLK_CNTL_MASK);
+       WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data);
+
+       data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE,
+                       CP_PERFMON_STATE_STOP_COUNTING);
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regCP_PERFMON_CNTL), data);
+
+       data = REG_SET_FIELD(0, CP_PERFMON_CNTL, PERFMON_STATE,
+                       CP_PERFMON_STATE_DISABLE_AND_RESET);
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regCP_PERFMON_CNTL), data);
+
+       /* When SPM is reset, RLC automatically resets wptr to 0.
+        * Manually reset rptr to match this.
+        */
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_RING_RDPTR), 0);
+}
+
+static void gfx_v9_4_3_spm_set_rdptr(struct amdgpu_device *adev, int xcc_id,  
u32 rptr)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
+
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regRLC_SPM_RING_RDPTR), rptr);
+}
+
+static void gfx_v9_4_3_set_spm_perfmon_ring_buf(struct amdgpu_device *adev,
+                                           int xcc_id, u64 gpu_addr, u32 size)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
+
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0,
+                       regRLC_SPM_PERFMON_RING_BASE_LO), 
lower_32_bits(gpu_addr));
+
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, 0,
+                               regRLC_SPM_PERFMON_RING_BASE_HI), 
upper_32_bits(gpu_addr));
+
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+                               regRLC_SPM_PERFMON_RING_SIZE), size);
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+                               regRLC_SPM_SEGMENT_THRESHOLD), 0x1);
+
+       gfx_v9_4_3_write_data_to_reg(kiq_ring, 0, false,
+                       SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), 
regCP_PERFMON_CNTL), 0);
+}
+
+static const struct spm_funcs gfx_v9_4_3_spm_funcs = {
+       .start = &gfx_v9_4_3_spm_start,
+       .stop = &gfx_v9_4_3_spm_stop,
+       .set_rdptr = &gfx_v9_4_3_spm_set_rdptr,
+       .set_spm_perfmon_ring_buf = &gfx_v9_4_3_set_spm_perfmon_ring_buf,
+       .set_spm_config_size = 30,
+};
+
+static void gfx_v9_4_3_set_spm_funcs(struct amdgpu_device *adev)
+{
+       adev->gfx.spmfuncs = &gfx_v9_4_3_spm_funcs;
+}
+
  static int gfx_v9_4_3_early_init(struct amdgpu_ip_block *ip_block)
  {
         struct amdgpu_device *adev = ip_block->adev;

         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
                                           AMDGPU_MAX_COMPUTE_RINGS);
+       gfx_v9_4_3_set_spm_funcs(adev);
         gfx_v9_4_3_set_kiq_pm4_funcs(adev);
         gfx_v9_4_3_set_ring_funcs(adev);
         gfx_v9_4_3_set_irq_funcs(adev);
@@ -2534,6 +2659,10 @@ static int gfx_v9_4_3_late_init(struct amdgpu_ip_block 
*ip_block)
         if (r)
                 return r;

+       r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0);
+       if (r)
+               return r;
+
         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
         if (r)
                 return r;
@@ -3404,6 +3533,41 @@ static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring 
*ring)
         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
  }

+static int gfx_v9_4_3_spm_set_interrupt_state(struct amdgpu_device *adev,
+                                            struct amdgpu_irq_src *src,
+                                            unsigned int type,
+                                            enum amdgpu_interrupt_state state)
+{
+       int i, num_xcc;
+
+       num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+       for (i = 0; i < num_xcc; i++) {
+               switch (state) {
+               case AMDGPU_IRQ_STATE_DISABLE:
+                       WREG32_SOC15(GC, GET_INST(GC, i), regRLC_SPM_INT_CNTL, 
0);
+                       break;
+               case AMDGPU_IRQ_STATE_ENABLE:
+                       WREG32_SOC15(GC, GET_INST(GC, i), regRLC_SPM_INT_CNTL, 
1);
+                       break;
+               default:
+                       break;
+               }
+       }
+       return 0;
+}
+
+static int gfx_v9_4_3_spm_irq(struct amdgpu_device *adev,
+                            struct amdgpu_irq_src *source,
+                            struct amdgpu_iv_entry *entry)
+{
+       int xcc_id;
+
+       xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+       amdgpu_rlc_spm_interrupt(adev, xcc_id);
+       return 0;
+}
+
  static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring,
                                         uint32_t pipe, bool enable)
  {
@@ -4831,11 +4995,19 @@ static const struct amdgpu_irq_src_funcs 
gfx_v9_4_3_priv_inst_irq_funcs = {
         .process = gfx_v9_4_3_priv_inst_irq,
  };

+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_spm_irq_funcs = {
+       .set = gfx_v9_4_3_spm_set_interrupt_state,
+       .process = gfx_v9_4_3_spm_irq,
+};
+
  static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
  {
         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
         adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs;

+       adev->gfx.spm_irq.num_types = 1;
+       adev->gfx.spm_irq.funcs = &gfx_v9_4_3_spm_irq_funcs;
+
         adev->gfx.priv_reg_irq.num_types = 1;
         adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;

--
2.34.1

Reply via email to