From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 34 +++++++++++++++++++++----------- src/gallium/drivers/radeonsi/si_shader.h | 1 + 2 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b18b4f6..f1ac94f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5353,47 +5353,42 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary, } else { fprintf(file, "Shader %s binary:\n", name); for (i = 0; i < binary->code_size; i += 4) { fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], binary->code[i + 2], binary->code[i + 1], binary->code[i]); } } } -static void si_shader_dump_stats(struct si_screen *sscreen, - const struct si_shader *shader, - struct pipe_debug_callback *debug, - unsigned processor, - FILE *file, - bool check_debug_option) +static void si_calculate_max_simd_waves(struct si_shader *shader) { - const struct si_shader_config *conf = &shader->config; - unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0; - unsigned code_size = si_get_shader_binary_size(shader); + struct si_screen *sscreen = shader->selector->screen; + struct si_shader_config *conf = &shader->config; + unsigned num_inputs = shader->selector->info.num_inputs; unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256; unsigned lds_per_wave = 0; unsigned max_simd_waves; switch (sscreen->info.family) { /* These always have 8 waves: */ case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: max_simd_waves = 8; break; default: max_simd_waves = 10; } /* Compute LDS usage for PS. */ - switch (processor) { + switch (shader->selector->type) { case PIPE_SHADER_FRAGMENT: /* The minimum usage per wave is (num_inputs * 48). The maximum * usage is (num_inputs * 48 * 16). * We can get anything in between and it varies between waves. * * The 48 bytes per input for a single primitive is equal to * 4 bytes/component * 4 components/input * 3 points. * * Other stages don't know the size at compile time or don't * allocate LDS per wave, but instead they do it per thread group. @@ -5420,20 +5415,33 @@ static void si_shader_dump_stats(struct si_screen *sscreen, } if (conf->num_vgprs) max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above * 16KB makes some SIMDs unoccupied). */ if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); + conf->max_simd_waves = max_simd_waves; +} + +static void si_shader_dump_stats(struct si_screen *sscreen, + const struct si_shader *shader, + struct pipe_debug_callback *debug, + unsigned processor, + FILE *file, + bool check_debug_option) +{ + const struct si_shader_config *conf = &shader->config; + unsigned code_size = si_get_shader_binary_size(shader); + if (!check_debug_option || si_can_dump_shader(sscreen, processor)) { if (processor == PIPE_SHADER_FRAGMENT) { fprintf(file, "*** SHADER CONFIG ***\n" "SPI_PS_INPUT_ADDR = 0x%04x\n" "SPI_PS_INPUT_ENA = 0x%04x\n", conf->spi_ps_input_addr, conf->spi_ps_input_ena); } fprintf(file, "*** SHADER STATS ***\n" @@ -5444,30 +5452,30 @@ static void si_shader_dump_stats(struct si_screen *sscreen, "Private memory VGPRs: %d\n" "Code Size: %d bytes\n" "LDS: %d blocks\n" "Scratch: %d bytes per wave\n" "Max Waves: %d\n" "********************\n\n\n", conf->num_sgprs, conf->num_vgprs, conf->spilled_sgprs, conf->spilled_vgprs, conf->private_mem_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave, - max_simd_waves); + conf->max_simd_waves); } pipe_debug_message(debug, SHADER_INFO, "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d " "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d " "Spilled VGPRs: %d PrivMem VGPRs: %d", conf->num_sgprs, conf->num_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave, - max_simd_waves, conf->spilled_sgprs, + conf->max_simd_waves, conf->spilled_sgprs, conf->spilled_vgprs, conf->private_mem_vgprs); } const char *si_get_shader_name(const struct si_shader *shader, unsigned processor) { switch (processor) { case PIPE_SHADER_VERTEX: if (shader->key.as_es) return "Vertex Shader as ES"; else if (shader->key.as_ls) @@ -6960,20 +6968,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) { shader->info.ancillary_vgpr_index = shader->info.num_input_vgprs; shader->info.num_input_vgprs += 1; } if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr)) shader->info.num_input_vgprs += 1; if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)) shader->info.num_input_vgprs += 1; } + si_calculate_max_simd_waves(shader); return 0; } /** * Create, compile and return a shader part (prolog or epilog). * * \param sscreen screen * \param list list of shader parts of the same category * \param type shader type * \param key shader part key @@ -8033,20 +8042,21 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->prolog2->config.num_sgprs); shader->config.num_vgprs = MAX2(shader->config.num_vgprs, shader->prolog2->config.num_vgprs); } if (shader->epilog) { shader->config.num_sgprs = MAX2(shader->config.num_sgprs, shader->epilog->config.num_sgprs); shader->config.num_vgprs = MAX2(shader->config.num_vgprs, shader->epilog->config.num_vgprs); } + si_calculate_max_simd_waves(shader); } si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, stderr, true); /* Upload. */ r = si_shader_binary_upload(sscreen, shader); if (r) { fprintf(stderr, "LLVM failed to upload shader\n"); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index c449aa9..6ed1646 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -550,20 +550,21 @@ struct si_shader_key { /* Restore the pack alignment to default. */ #pragma pack(pop) struct si_shader_config { unsigned num_sgprs; unsigned num_vgprs; unsigned spilled_sgprs; unsigned spilled_vgprs; unsigned private_mem_vgprs; unsigned lds_size; + unsigned max_simd_waves; unsigned spi_ps_input_ena; unsigned spi_ps_input_addr; unsigned float_mode; unsigned scratch_bytes_per_wave; unsigned rsrc1; unsigned rsrc2; }; /* GCN-specific shader info. */ struct si_shader_info { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev