From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 44a4dd2..145de9f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5943,61 +5943,71 @@ static void si_shader_dump_disassembly(const struct 
radeon_shader_binary *binary
                fprintf(file, "Shader %s binary:\n", name);
                for (i = 0; i < binary->code_size; i += 4) {
                        fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
                                binary->code[i + 3], binary->code[i + 2],
                                binary->code[i + 1], binary->code[i]);
                }
        }
 }
 
 static void si_shader_dump_stats(struct si_screen *sscreen,
-                                struct si_shader_config *conf,
-                                unsigned num_inputs,
-                                unsigned code_size,
+                                struct si_shader *shader,
                                 struct pipe_debug_callback *debug,
                                 unsigned processor,
                                 FILE *file)
 {
+       struct si_shader_config *conf = &shader->config;
+       unsigned num_inputs = shader->selector ? 
shader->selector->info.num_inputs : 0;
+       unsigned code_size = si_get_shader_binary_size(shader);
        unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
        unsigned lds_per_wave = 0;
        unsigned max_simd_waves = 10;
 
        /* Compute LDS usage for PS. */
-       if (processor == PIPE_SHADER_FRAGMENT) {
+       switch (processor) {
+       case PIPE_SHADER_FRAGMENT:
                /* The minimum usage per wave is (num_inputs * 48). The maximum
                 * usage is (num_inputs * 48 * 16).
                 * We can get anything in between and it varies between waves.
                 *
                 * The 48 bytes per input for a single primitive is equal to
                 * 4 bytes/component * 4 components/input * 3 points.
                 *
                 * Other stages don't know the size at compile time or don't
                 * allocate LDS per wave, but instead they do it per thread 
group.
                 */
                lds_per_wave = conf->lds_size * lds_increment +
                               align(num_inputs * 48, lds_increment);
+               break;
+       case PIPE_SHADER_COMPUTE:
+               if (shader->selector) {
+                       unsigned max_workgroup_size =
+                               si_get_max_workgroup_size(shader);
+                       lds_per_wave = (conf->lds_size * lds_increment) /
+                                      DIV_ROUND_UP(max_workgroup_size, 64);
+               }
+               break;
        }
 
        /* Compute the per-SIMD wave counts. */
        if (conf->num_sgprs) {
                if (sscreen->b.chip_class >= VI)
                        max_simd_waves = MIN2(max_simd_waves, 800 / 
conf->num_sgprs);
                else
                        max_simd_waves = MIN2(max_simd_waves, 512 / 
conf->num_sgprs);
        }
 
        if (conf->num_vgprs)
                max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
 
-       /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
-        * that PS can use.
-        */
+       /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
+        * 16KB makes some SIMDs unoccupied). */
        if (lds_per_wave)
                max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
        if (file != stderr ||
            r600_can_dump_shader(&sscreen->b, processor)) {
                if (processor == PIPE_SHADER_FRAGMENT) {
                        fprintf(file, "*** SHADER CONFIG ***\n"
                                "SPI_PS_INPUT_ADDR = 0x%04x\n"
                                "SPI_PS_INPUT_ENA  = 0x%04x\n",
                                conf->spi_ps_input_addr, 
conf->spi_ps_input_ena);
@@ -6087,24 +6097,21 @@ void si_shader_dump(struct si_screen *sscreen, struct 
si_shader *shader,
                                                   debug, "prolog", file);
 
                si_shader_dump_disassembly(&shader->binary, debug, "main", 
file);
 
                if (shader->epilog)
                        si_shader_dump_disassembly(&shader->epilog->binary,
                                                   debug, "epilog", file);
                fprintf(file, "\n");
        }
 
-       si_shader_dump_stats(sscreen, &shader->config,
-                            shader->selector ? 
shader->selector->info.num_inputs : 0,
-                            si_get_shader_binary_size(shader), debug, 
processor,
-                            file);
+       si_shader_dump_stats(sscreen, shader, debug, processor, file);
 }
 
 int si_compile_llvm(struct si_screen *sscreen,
                    struct radeon_shader_binary *binary,
                    struct si_shader_config *conf,
                    LLVMTargetMachineRef tm,
                    LLVMModuleRef mod,
                    struct pipe_debug_callback *debug,
                    unsigned processor,
                    const char *name)
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to