It improves Uningine Heaven performance here (RX 580, 8 GB),too.
without Tessellation
Before:
FPS: 79.0
Score: 1991
Min FPS: 20.8
Max FPS: 189.8
After:
FPS: 79.1
Score: 1993
Min FPS: 19.6
Max FPS: 185.9
with Tessellation
Before:
FPS: 67.7
Score: 1705
Min FPS: 8.8
Max FPS: 182.6
After:
FPS: 68.3
Score: 1720
Min FPS: 15.9
Max FPS: 179.6
System
Platform: Linux 4.20.0-amd-staging-4.11-1.g7262353-default+ x86_64
CPU model: Intel(R) Xeon(R) CPU X3470 @ 2.93GHz (2925MHz) x8
GPU model: Unknown GPU (256MB) x1
Marek is this the 'tessellation regression' you tried to solve?
For this series:
Tested-by: Dieter Nützel <die...@nuetzel-hh.de>
Dieter
Am 03.06.2017 18:04, schrieb Marek Olšák:
From: Marek Olšák <marek.ol...@amd.com>
Heaven LDS usage for LS+HS is below. The masks are "outputs_written"
for LS and HS. Note that 32K is the maximum size.
Before:
heaven_x64: ls=1f1 tcs=1f1, lds=32K
heaven_x64: ls=31 tcs=31, lds=24K
heaven_x64: ls=71 tcs=71, lds=28K
After:
heaven_x64: ls=3f tcs=3f, lds=24K
heaven_x64: ls=7 tcs=7, lds=13K
heaven_x64: ls=f tcs=f, lds=17K
All other apps have a similar decrease in LDS usage, because
the "outputs_written" masks are similar. Also, most apps don't write
POSITION in these shader stages, so there is room for improvement.
(tight per-component input/output packing might help even more)
It's unknown whether this improves performance.
---
src/gallium/drivers/radeonsi/si_shader.c | 18
+++++++++++-------
src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +++-
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index ddfaa3b..3a86c0b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -129,32 +129,36 @@ unsigned
si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned
in
/**
* Returns a unique index for a semantic name and index. The index
must be
* less than 64, so that a 64-bit bitmask of used inputs or outputs
can be
* calculated.
*/
unsigned si_shader_io_get_unique_index(unsigned semantic_name,
unsigned index)
{
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION:
return 0;
- case TGSI_SEMANTIC_PSIZE:
- return 1;
- case TGSI_SEMANTIC_CLIPDIST:
- assert(index <= 1);
- return 2 + index;
case TGSI_SEMANTIC_GENERIC:
+ /* Since some shader stages use the the highest used IO index
+ * to determine the size to allocate for inputs/outputs
+ * (in LDS, tess and GS rings). GENERIC should be placed right
+ * after POSITION to make that size as small as possible.
+ */
if (index < SI_MAX_IO_GENERIC)
- return 4 + index;
+ return 1 + index;
assert(!"invalid generic index");
return 0;
-
+ case TGSI_SEMANTIC_PSIZE:
+ return SI_MAX_IO_GENERIC + 1;
+ case TGSI_SEMANTIC_CLIPDIST:
+ assert(index <= 1);
+ return SI_MAX_IO_GENERIC + 2 + index;
case TGSI_SEMANTIC_FOG:
return SI_MAX_IO_GENERIC + 4;
case TGSI_SEMANTIC_LAYER:
return SI_MAX_IO_GENERIC + 5;
case TGSI_SEMANTIC_VIEWPORT_INDEX:
return SI_MAX_IO_GENERIC + 6;
case TGSI_SEMANTIC_PRIMID:
return SI_MAX_IO_GENERIC + 7;
case TGSI_SEMANTIC_COLOR: /* these alias */
case TGSI_SEMANTIC_BCOLOR:
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ac4309..f36997b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1226,21 +1226,23 @@ static void
si_shader_selector_key_hw_vs(struct si_context *sctx,
ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard
||
(!ps_colormask &&
!ps_modifies_zs &&
!ps->info.writes_memory);
}
/* Find out which VS outputs aren't used by the PS. */
uint64_t outputs_written = vs->outputs_written;
uint64_t inputs_read = 0;
- outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
+ /* ignore POSITION, PSIZE */
+ outputs_written &= ~((1ull <<
si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0) |
+ (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE,
0))));
if (!ps_disabled) {
inputs_read = ps->inputs_read;
}
uint64_t linked = outputs_written & inputs_read;
key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
}
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev