On Fri, Jan 22, 2016 at 03:18:12PM +0100, Marek Olšák wrote: > From: Marek Olšák <marek.ol...@amd.com> > > v2: account for LDS usage in PS > the limit is per SIMD, not per CU > --- > src/gallium/drivers/radeonsi/si_shader.c | 54 > +++++++++++++++++++++++++++++--- > 1 file changed, 49 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 1bd617f..33c0db6 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -4001,22 +4001,65 @@ static void si_shader_dump_disassembly(const struct > radeon_shader_binary *binary > > static void si_shader_dump_stats(struct si_screen *sscreen, > struct si_shader_config *conf, > + unsigned num_inputs, > unsigned code_size, > struct pipe_debug_callback *debug, > unsigned processor) > { > + unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256; > + unsigned lds_per_wave = 0; > + unsigned max_simd_waves = 10; > + > + /* Compute LDS usage for PS. */ > + if (processor == TGSI_PROCESSOR_FRAGMENT) { > + /* The minimum usage per wave is (num_inputs * 36). The maximum > + * usage is (num_inputs * 36 * 16). > + * We can get anything in between and it varies between waves. > + * > + * Other stages don't know the size at compile time or don't > + * allocate LDS per wave, but instead they do it per thread > group. > + */ > + lds_per_wave = conf->lds_size * lds_increment + > + align(num_inputs * 36, lds_increment); > + } > + > + /* Compute the per-SIMD wave counts. */ > + if (conf->num_sgprs) { > + if (sscreen->b.chip_class >= VI) > + max_simd_waves = MIN2(max_simd_waves, 800 / > conf->num_sgprs); > + else > + max_simd_waves = MIN2(max_simd_waves, 512 / > conf->num_sgprs); > + } > + > + if (conf->num_vgprs) > + max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); > + > + /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD > + * that PS can use. > + */ > + if (lds_per_wave) > + max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); > + > if (r600_can_dump_shader(&sscreen->b, processor)) { > fprintf(stderr, "*** SHADER STATS ***\n" > - "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d > blocks\n" > - "Scratch: %d bytes per wave\n********************\n", > + "SGPRS: %d\n" > + "VGPRS: %d\n" > + "Code Size: %d bytes\n" > + "LDS: %d blocks\n" > + "Scratch: %d bytes per wave\n" > + "Max Waves: %d\n" > + "********************\n", > conf->num_sgprs, conf->num_vgprs, code_size, > - conf->lds_size, conf->scratch_bytes_per_wave); > + conf->lds_size, conf->scratch_bytes_per_wave, > + max_simd_waves); > } > > pipe_debug_message(debug, SHADER_INFO, > - "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d > LDS: %d Scratch: %d", > + "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d " > + "LDS: %d Scratch: %d Max Waves: %d", > conf->num_sgprs, conf->num_vgprs, code_size, > - conf->lds_size, conf->scratch_bytes_per_wave); > + conf->lds_size, conf->scratch_bytes_per_wave, > + max_simd_waves); > } > > void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, > @@ -4027,6 +4070,7 @@ void si_shader_dump(struct si_screen *sscreen, struct > si_shader *shader, > si_shader_dump_disassembly(&shader->binary, debug); > > si_shader_dump_stats(sscreen, &shader->config, > + shader->selector->info.num_inputs,
clover is segfaulting here, because shader->selector is NULL for compute shaders. -Tom > shader->binary.code_size, debug, processor); > } > > -- > 2.1.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev