As per the discussion on IRC, this is trying to fix an issue with a GLSL compiler pass lower_output_reads that always generates 8 output writes for any shader which writes gl_FragData. I'll fix this in the GLSL compiler. NAK.
Marek On Sun, Oct 20, 2013 at 2:33 AM, Jay Cornwall <j...@jcornwall.me> wrote: > This patch identifies shader exports to unbound CBs and removes them during > TGSI to LLVM IR lowering. The method is identical to the one used in the > gallium/r600 driver. > > The GLSL lower_output_reads pass generates temporary copies for writes to > shader outputs. In the case of gl_FragData, this results in writes to every > MRT when one or more elements are written in the shader. When these MRTs > are unbound and masked out there is still a performance loss equivalent to > exports to bound, unmasked MRTs on SI. > > Signed-off-by: Jay Cornwall <j...@jcornwall.me> > --- > src/gallium/drivers/radeonsi/radeonsi_pipe.h | 1 + > src/gallium/drivers/radeonsi/radeonsi_shader.c | 6 ++++++ > src/gallium/drivers/radeonsi/si_state.c | 15 +++++++++++++-- > src/gallium/drivers/radeonsi/si_state.h | 1 + > 4 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h > b/src/gallium/drivers/radeonsi/radeonsi_pipe.h > index 26f7e09..bede043 100644 > --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h > +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h > @@ -148,6 +148,7 @@ struct r600_context { > unsigned fb_compressed_cb_mask; > unsigned pa_sc_line_stipple; > unsigned pa_su_sc_mode_cntl; > + boolean dual_src_blend; > /* for saving when using blitter */ > struct pipe_stencil_ref stencil_ref; > struct si_pipe_shader_selector *ps_shader; > diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c > b/src/gallium/drivers/radeonsi/radeonsi_shader.c > index 80ee325..e4a9f56 100644 > --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c > +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c > @@ -995,6 +995,12 @@ handle_semantic: > semantic_name); > } > > + /* Shader is keyed on nr_cbufs, optimize out exports > to unbound CBs */ > + if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT && > + semantic_name == TGSI_SEMANTIC_COLOR && > + color_count > > si_shader_ctx->shader->key.ps.nr_cbufs) > + continue; > + > si_llvm_init_export_args(bld_base, d, index, target, > args); > > if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index da7c3d0..8109bde 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -27,6 +27,7 @@ > #include "util/u_memory.h" > #include "util/u_framebuffer.h" > #include "util/u_blitter.h" > +#include "util/u_dual_blend.h" > #include "util/u_helpers.h" > #include "util/u_math.h" > #include "util/u_pack_color.h" > @@ -168,6 +169,8 @@ static void si_update_fb_blend_state(struct r600_context > *rctx) > si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); > > si_pm4_set_state(rctx, fb_blend, pm4); > + > + rctx->dual_src_blend = blend->dual_src_blend; > } > > /* > @@ -309,6 +312,9 @@ static void *si_create_blend_state_mode(struct > pipe_context *ctx, > si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, > blend_cntl); > } > > + /* only MRT0 has dual src blend */ > + blend->dual_src_blend = util_blend_state_is_dual(state, 0); > + > return blend; > } > > @@ -2097,8 +2103,13 @@ static INLINE void si_shader_selector_key(struct > pipe_context *ctx, > if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf) > key->vs.ucps_enabled |= 0x1; > } else if (sel->type == PIPE_SHADER_FRAGMENT) { > - if (sel->fs_write_all) > - key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs; > + /* Key on nr_cbufs to optimize unused EXPORTs. */ > + key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs; > + > + /* Dual-source blending only makes sense with nr_cbufs == 1. > */ > + if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend) > + key->ps.nr_cbufs = 2; > + > key->ps.export_16bpc = rctx->export_16bpc; > > if (rctx->queued.named.rasterizer) { > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index 6dbf880..ca51496 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -34,6 +34,7 @@ struct si_state_blend { > struct si_pm4_state pm4; > uint32_t cb_target_mask; > bool alpha_to_one; > + bool dual_src_blend; > }; > > struct si_state_viewport { > -- > 1.8.4.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev