From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 44a4dd2..145de9f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5943,61 +5943,71 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary fprintf(file, "Shader %s binary:\n", name); for (i = 0; i < binary->code_size; i += 4) { fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], binary->code[i + 2], binary->code[i + 1], binary->code[i]); } } } static void si_shader_dump_stats(struct si_screen *sscreen, - struct si_shader_config *conf, - unsigned num_inputs, - unsigned code_size, + struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *file) { + struct si_shader_config *conf = &shader->config; + unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0; + unsigned code_size = si_get_shader_binary_size(shader); unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256; unsigned lds_per_wave = 0; unsigned max_simd_waves = 10; /* Compute LDS usage for PS. */ - if (processor == PIPE_SHADER_FRAGMENT) { + switch (processor) { + case PIPE_SHADER_FRAGMENT: /* The minimum usage per wave is (num_inputs * 48). The maximum * usage is (num_inputs * 48 * 16). * We can get anything in between and it varies between waves. * * The 48 bytes per input for a single primitive is equal to * 4 bytes/component * 4 components/input * 3 points. * * Other stages don't know the size at compile time or don't * allocate LDS per wave, but instead they do it per thread group. */ lds_per_wave = conf->lds_size * lds_increment + align(num_inputs * 48, lds_increment); + break; + case PIPE_SHADER_COMPUTE: + if (shader->selector) { + unsigned max_workgroup_size = + si_get_max_workgroup_size(shader); + lds_per_wave = (conf->lds_size * lds_increment) / + DIV_ROUND_UP(max_workgroup_size, 64); + } + break; } /* Compute the per-SIMD wave counts. */ if (conf->num_sgprs) { if (sscreen->b.chip_class >= VI) max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); else max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); } if (conf->num_vgprs) max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); - /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD - * that PS can use. - */ + /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above + * 16KB makes some SIMDs unoccupied). */ if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); if (file != stderr || r600_can_dump_shader(&sscreen->b, processor)) { if (processor == PIPE_SHADER_FRAGMENT) { fprintf(file, "*** SHADER CONFIG ***\n" "SPI_PS_INPUT_ADDR = 0x%04x\n" "SPI_PS_INPUT_ENA = 0x%04x\n", conf->spi_ps_input_addr, conf->spi_ps_input_ena); @@ -6087,24 +6097,21 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, debug, "prolog", file); si_shader_dump_disassembly(&shader->binary, debug, "main", file); if (shader->epilog) si_shader_dump_disassembly(&shader->epilog->binary, debug, "epilog", file); fprintf(file, "\n"); } - si_shader_dump_stats(sscreen, &shader->config, - shader->selector ? shader->selector->info.num_inputs : 0, - si_get_shader_binary_size(shader), debug, processor, - file); + si_shader_dump_stats(sscreen, shader, debug, processor, file); } int si_compile_llvm(struct si_screen *sscreen, struct radeon_shader_binary *binary, struct si_shader_config *conf, LLVMTargetMachineRef tm, LLVMModuleRef mod, struct pipe_debug_callback *debug, unsigned processor, const char *name) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev