From: Marek Olšák <marek.ol...@amd.com>

v2: account for LDS usage in PS
    the limit is per SIMD, not per CU
---
 src/gallium/drivers/radeonsi/si_shader.c | 54 +++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1bd617f..33c0db6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4001,22 +4001,65 @@ static void si_shader_dump_disassembly(const struct 
radeon_shader_binary *binary
 
 static void si_shader_dump_stats(struct si_screen *sscreen,
                                 struct si_shader_config *conf,
+                                unsigned num_inputs,
                                 unsigned code_size,
                                 struct pipe_debug_callback *debug,
                                 unsigned processor)
 {
+       unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
+       unsigned lds_per_wave = 0;
+       unsigned max_simd_waves = 10;
+
+       /* Compute LDS usage for PS. */
+       if (processor == TGSI_PROCESSOR_FRAGMENT) {
+               /* The minimum usage per wave is (num_inputs * 36). The maximum
+                * usage is (num_inputs * 36 * 16).
+                * We can get anything in between and it varies between waves.
+                *
+                * Other stages don't know the size at compile time or don't
+                * allocate LDS per wave, but instead they do it per thread 
group.
+                */
+               lds_per_wave = conf->lds_size * lds_increment +
+                              align(num_inputs * 36, lds_increment);
+       }
+
+       /* Compute the per-SIMD wave counts. */
+       if (conf->num_sgprs) {
+               if (sscreen->b.chip_class >= VI)
+                       max_simd_waves = MIN2(max_simd_waves, 800 / 
conf->num_sgprs);
+               else
+                       max_simd_waves = MIN2(max_simd_waves, 512 / 
conf->num_sgprs);
+       }
+
+       if (conf->num_vgprs)
+               max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+       /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+        * that PS can use.
+        */
+       if (lds_per_wave)
+               max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
        if (r600_can_dump_shader(&sscreen->b, processor)) {
                fprintf(stderr, "*** SHADER STATS ***\n"
-                       "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
-                       "Scratch: %d bytes per wave\n********************\n",
+                       "SGPRS: %d\n"
+                       "VGPRS: %d\n"
+                       "Code Size: %d bytes\n"
+                       "LDS: %d blocks\n"
+                       "Scratch: %d bytes per wave\n"
+                       "Max Waves: %d\n"
+                       "********************\n",
                        conf->num_sgprs, conf->num_vgprs, code_size,
-                       conf->lds_size, conf->scratch_bytes_per_wave);
+                       conf->lds_size, conf->scratch_bytes_per_wave,
+                       max_simd_waves);
        }
 
        pipe_debug_message(debug, SHADER_INFO,
-                          "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d 
LDS: %d Scratch: %d",
+                          "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
+                          "LDS: %d Scratch: %d Max Waves: %d",
                           conf->num_sgprs, conf->num_vgprs, code_size,
-                          conf->lds_size, conf->scratch_bytes_per_wave);
+                          conf->lds_size, conf->scratch_bytes_per_wave,
+                          max_simd_waves);
 }
 
 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
@@ -4027,6 +4070,7 @@ void si_shader_dump(struct si_screen *sscreen, struct 
si_shader *shader,
                        si_shader_dump_disassembly(&shader->binary, debug);
 
        si_shader_dump_stats(sscreen, &shader->config,
+                            shader->selector->info.num_inputs,
                             shader->binary.code_size, debug, processor);
 }
 
-- 
2.1.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to