Make CU occupancy calculations work on GFX 9.4.3 by
updating the logic to handle multiple XCCs correctly.

Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>
---
v1->v2:
- Break into 2 patches, one for the generic change
  and the other for GFX v9.4.3.
- Incorporate Harish's comments.

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_process.c          | 10 +++++++++-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index fe8a8e7e9a9a..e6bc808d9c59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -963,14 +963,14 @@ static void get_wave_count(struct amdgpu_device *adev, 
int queue_idx,
         */
        pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
        queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
-       soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
-       reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst,
+       soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+       reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
                                  mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
        wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
        if (wave_cnt != 0) {
                queue_cnt->wave_cnt += wave_cnt;
                queue_cnt->doorbell_off =
-                       (RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) &
+                       (RREG32_SOC15(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_DOORBELL_CONTROL) &
                         CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
                         CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
        }
@@ -1034,7 +1034,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev,
        DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
 
        lock_spi_csq_mutexes(adev);
-       soc15_grbm_select(adev, 1, 0, 0, 0, inst);
+       soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
 
        /*
         * Iterate through the shader engines and arrays of the device
@@ -1047,7 +1047,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev,
        se_cnt = adev->gfx.config.max_shader_engines;
        for (se_idx = 0; se_idx < se_cnt; se_idx++) {
                amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
-               queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
+               queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), 
mmSPI_CSQ_WF_ACTIVE_STATUS);
 
                /*
                 * Assumption: queue map encodes following schema: four
@@ -1072,7 +1072,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev,
        }
 
        amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
-       soc15_grbm_select(adev, 0, 0, 0, 0, inst);
+       soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
        unlock_spi_csq_mutexes(adev);
 
        /* Update the output parameters and return */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d73841268c9b..831e0e92bd23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -292,8 +292,13 @@ static int kfd_get_cu_occupancy(struct attribute *attr, 
char *buffer)
        wave_cnt = 0;
        max_waves_per_cu = 0;
 
+       /*
+        * For GFX9.4.3, fetch the CU occupancy from the first XCC in the 
partition.
+        * Later we multiply the wave count by number of XCCs in the partition 
to get
+        * the total wave counts across all XCCs in the partition.
+        */
        dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
-                       &max_waves_per_cu, 0);
+                       &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
 
        for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
                if (cu_occupancy[i].wave_cnt != 0 &&
@@ -302,6 +307,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, 
char *buffer)
                        wave_cnt += cu_occupancy[i].wave_cnt;
        }
 
+       /* Update wave_cnt for the number of XCCs in the partition */
+       wave_cnt *= NUM_XCC(dev->xcc_mask);
+
        /* Translate wave count to number of compute units */
        cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
        return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
-- 
2.35.1

Reply via email to