We overload "cu" to mean "wgp" in a bunch of places, but max_cu_per_sh
is always in terms of CUs.

Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
---
 src/lib/scan_waves.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
index 767520c..3279cc2 100644
--- a/src/lib/scan_waves.c
+++ b/src/lib/scan_waves.c
@@ -618,48 +618,50 @@ static int umr_scan_wave_simd(struct umr_asic *asic, 
uint32_t se, uint32_t sh, u
        return 0;
 }
 
 /**
  * umr_scan_wave_data - Scan for any halted valid waves
  *
  * Returns NULL on error (or no waves found).
  */
 struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
 {
-       uint32_t se, sh, cu, simd;
+       uint32_t se, sh, simd;
        struct umr_wave_data *ohead, *head, **ptail;
        int r;
 
        ohead = head = calloc(1, sizeof *head);
        if (!head) {
                asic->err_msg("[ERROR]: Out of memory\n");
                return NULL;
        }
        ptail = &head;
 
        for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
-       for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
-       for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+       for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) {
                if (asic->family <= FAMILY_AI) {
-                       asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, 
&(*ptail)->ws);
-                       if ((*ptail)->ws.sq_info.busy) {
-                               for (simd = 0; simd < 4; simd++) {
-                                       r = umr_scan_wave_simd(asic, se, sh, 
cu, simd, &ptail);
-                                       if (r < 0)
-                                               goto error;
+                       for (uint32_t cu = 0; cu < 
asic->config.gfx.max_cu_per_sh; cu++) {
+                               asic->wave_funcs.get_wave_sq_info(asic, se, sh, 
cu, &(*ptail)->ws);
+                               if ((*ptail)->ws.sq_info.busy) {
+                                       for (simd = 0; simd < 4; simd++) {
+                                               r = umr_scan_wave_simd(asic, 
se, sh, cu, simd, &ptail);
+                                               if (r < 0)
+                                                       goto error;
+                                       }
                                }
                        }
                } else {
+                       for (uint32_t wgp = 0; wgp < 
asic->config.gfx.max_cu_per_sh / 2; wgp++)
                        for (simd = 0; simd < 4; simd++) {
-                               asic->wave_funcs.get_wave_sq_info(asic, se, sh, 
MANY_TO_INSTANCE(cu, simd), &(*ptail)->ws);
+                               asic->wave_funcs.get_wave_sq_info(asic, se, sh, 
MANY_TO_INSTANCE(wgp, simd), &(*ptail)->ws);
                                if ((*ptail)->ws.sq_info.busy) {
-                                       r = umr_scan_wave_simd(asic, se, sh, 
cu, simd, &ptail);
+                                       r = umr_scan_wave_simd(asic, se, sh, 
wgp, simd, &ptail);
                                        if (r < 0)
                                                goto error;
                                }
                        }
                }
        }
 
        // drop the pre-allocated tail node
        free(*ptail);
        *ptail = NULL;
-- 
2.40.0

Reply via email to