Ping
On Sat, Jun 3, 2017 at 6:04 PM, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > Heaven LDS usage for LS+HS is below. The masks are "outputs_written" > for LS and HS. Note that 32K is the maximum size. > > Before: > heaven_x64: ls=1f1 tcs=1f1, lds=32K > heaven_x64: ls=31 tcs=31, lds=24K > heaven_x64: ls=71 tcs=71, lds=28K > > After: > heaven_x64: ls=3f tcs=3f, lds=24K > heaven_x64: ls=7 tcs=7, lds=13K > heaven_x64: ls=f tcs=f, lds=17K > > All other apps have a similar decrease in LDS usage, because > the "outputs_written" masks are similar. Also, most apps don't write > POSITION in these shader stages, so there is room for improvement. > (tight per-component input/output packing might help even more) > > It's unknown whether this improves performance. > --- > src/gallium/drivers/radeonsi/si_shader.c | 18 +++++++++++------- > src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +++- > 2 files changed, 14 insertions(+), 8 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index ddfaa3b..3a86c0b 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -129,32 +129,36 @@ unsigned si_shader_io_get_unique_index_patch(unsigned > semantic_name, unsigned in > /** > * Returns a unique index for a semantic name and index. The index must be > * less than 64, so that a 64-bit bitmask of used inputs or outputs can be > * calculated. > */ > unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned > index) > { > switch (semantic_name) { > case TGSI_SEMANTIC_POSITION: > return 0; > - case TGSI_SEMANTIC_PSIZE: > - return 1; > - case TGSI_SEMANTIC_CLIPDIST: > - assert(index <= 1); > - return 2 + index; > case TGSI_SEMANTIC_GENERIC: > + /* Since some shader stages use the the highest used IO index > + * to determine the size to allocate for inputs/outputs > + * (in LDS, tess and GS rings). GENERIC should be placed right > + * after POSITION to make that size as small as possible. > + */ > if (index < SI_MAX_IO_GENERIC) > - return 4 + index; > + return 1 + index; > > assert(!"invalid generic index"); > return 0; > - > + case TGSI_SEMANTIC_PSIZE: > + return SI_MAX_IO_GENERIC + 1; > + case TGSI_SEMANTIC_CLIPDIST: > + assert(index <= 1); > + return SI_MAX_IO_GENERIC + 2 + index; > case TGSI_SEMANTIC_FOG: > return SI_MAX_IO_GENERIC + 4; > case TGSI_SEMANTIC_LAYER: > return SI_MAX_IO_GENERIC + 5; > case TGSI_SEMANTIC_VIEWPORT_INDEX: > return SI_MAX_IO_GENERIC + 6; > case TGSI_SEMANTIC_PRIMID: > return SI_MAX_IO_GENERIC + 7; > case TGSI_SEMANTIC_COLOR: /* these alias */ > case TGSI_SEMANTIC_BCOLOR: > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 8ac4309..f36997b 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -1226,21 +1226,23 @@ static void si_shader_selector_key_hw_vs(struct > si_context *sctx, > ps_disabled = > sctx->queued.named.rasterizer->rasterizer_discard || > (!ps_colormask && > !ps_modifies_zs && > !ps->info.writes_memory); > } > > /* Find out which VS outputs aren't used by the PS. */ > uint64_t outputs_written = vs->outputs_written; > uint64_t inputs_read = 0; > > - outputs_written &= ~0x3; /* ignore POSITION, PSIZE */ > + /* ignore POSITION, PSIZE */ > + outputs_written &= ~((1ull << > si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0) | > + (1ull << > si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0)))); > > if (!ps_disabled) { > inputs_read = ps->inputs_read; > } > > uint64_t linked = outputs_written & inputs_read; > > key->opt.hw_vs.kill_outputs = ~linked & outputs_written; > } > > -- > 2.7.4 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev