2012/6/26 <j.gli...@gmail.com>: > From: Vadim Girlin <vadimgir...@gmail.com> > > In some cases TGSI shader has more color outputs than the number of CBs, > so it seems we need to limit the number of color exports. This requires > different shader variants depending on the nr_cbufs, but on the other hand > we are doing less exports, which are very costly. > > v2: fix various piglit regressions > > Signed-off-by: Vadim Girlin <vadimgir...@gmail.com> > Signed-off-by: Jerome Glisse <jgli...@redhat.com>
Thanks Vadim and Jerome! No piglit regressions on my rv770 Improves results in fill demo by 50% before: Simple fill: 5.0 billion pixels/second Blended fill: 4.9 billion pixels/second Textured fill: 5.0 billion pixels/second Shader1 fill: 4.9 billion pixels/second Shader2 fill: 5.0 billion pixels/second after: Simple fill: 7.5 billion pixels/second Blended fill: 7.4 billion pixels/second Textured fill: 7.5 billion pixels/second Shader1 fill: 7.5 billion pixels/second Shader2 fill: 5.0 billion pixels/second For the series: Tested-by: Andreas Boll <andreas.boll....@gmail.com> > --- > src/gallium/drivers/r600/evergreen_state.c | 10 +++------- > src/gallium/drivers/r600/r600_shader.c | 25 ++++++++++++++++++++++--- > src/gallium/drivers/r600/r600_shader.h | 7 ++++++- > src/gallium/drivers/r600/r600_state.c | 2 ++ > src/gallium/drivers/r600/r600_state_common.c | 4 ++-- > 5 files changed, 35 insertions(+), 13 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index b618ca8..3fe95e1 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -2641,18 +2641,14 @@ void evergreen_pipe_shader_ps(struct pipe_context > *ctx, struct r600_pipe_shader > db_shader_control |= S_02880C_KILL_ENABLE(1); > > exports_ps = 0; > - num_cout = 0; > for (i = 0; i < rshader->noutput; i++) { > if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || > rshader->output[i].name == TGSI_SEMANTIC_STENCIL) > exports_ps |= 1; > - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { > - if (rshader->fs_write_all) > - num_cout = rshader->nr_cbufs; > - else > - num_cout++; > - } > } > + > + num_cout = rshader->nr_ps_color_exports; > + > exports_ps |= S_02884C_EXPORT_COLORS(num_cout); > if (!exports_ps) { > /* always at least export 1 component per pixel */ > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index d294084..37914eb 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -801,6 +801,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) > ctx->cv_output = i; > break; > } > + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { > + switch (d->Semantic.Name) { > + case TGSI_SEMANTIC_COLOR: > + ctx->shader->nr_ps_max_color_exports++; > + break; > + } > } > break; > case TGSI_FILE_CONSTANT: > @@ -1153,8 +1159,10 @@ static int r600_shader_from_tgsi(struct r600_context * > rctx, struct r600_pipe_sh > ctx.colors_used = 0; > ctx.clip_vertex_write = 0; > > + shader->nr_ps_color_exports = 0; > + shader->nr_ps_max_color_exports = 0; > + > shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && > rctx->two_side; > - shader->nr_cbufs = rctx->nr_cbufs; > > /* register allocations */ > /* Values [0,127] correspond to GPR[0..127]. > @@ -1289,6 +1297,9 @@ static int r600_shader_from_tgsi(struct r600_context * > rctx, struct r600_pipe_sh > } > } > > + if (shader->fs_write_all && rctx->chip_class >= EVERGREEN) > + shader->nr_ps_max_color_exports = 8; > + > if (ctx.fragcoord_input >= 0) { > if (ctx.bc->chip_class == CAYMAN) { > for (j = 0 ; j < 4; j++) { > @@ -1528,10 +1539,17 @@ static int r600_shader_from_tgsi(struct r600_context > * rctx, struct r600_pipe_sh > break; > case TGSI_PROCESSOR_FRAGMENT: > if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { > + /* never export more colors than the number > of CBs */ > + if (next_pixel_base && next_pixel_base >= > (rctx->nr_cbufs + rctx->dual_src_blend * 1)) { > + /* skip export */ > + j--; > + continue; > + } > output[j].array_base = next_pixel_base++; > output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > + shader->nr_ps_color_exports++; > if (shader->fs_write_all && (rctx->chip_class > >= EVERGREEN)) { > - for (k = 1; k < shader->nr_cbufs; > k++) { > + for (k = 1; k < rctx->nr_cbufs; k++) { > j++; > memset(&output[j], 0, > sizeof(struct r600_bytecode_output)); > output[j].gpr = > shader->output[i].gpr; > @@ -1545,6 +1563,7 @@ static int r600_shader_from_tgsi(struct r600_context * > rctx, struct r600_pipe_sh > output[j].array_base = > next_pixel_base++; > output[j].inst = > BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); > output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > + shader->nr_ps_color_exports++; > } > } > } else if (shader->output[i].name == > TGSI_SEMANTIC_POSITION) { > @@ -1595,7 +1614,7 @@ static int r600_shader_from_tgsi(struct r600_context * > rctx, struct r600_pipe_sh > } > > /* add fake pixel export */ > - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) { > + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) { > memset(&output[j], 0, sizeof(struct r600_bytecode_output)); > output[j].gpr = 0; > output[j].elem_size = 3; > diff --git a/src/gallium/drivers/r600/r600_shader.h > b/src/gallium/drivers/r600/r600_shader.h > index 2d35e77..eb0bbf6 100644 > --- a/src/gallium/drivers/r600/r600_shader.h > +++ b/src/gallium/drivers/r600/r600_shader.h > @@ -49,7 +49,12 @@ struct r600_shader { > boolean fs_write_all; > boolean vs_prohibit_ucps; > boolean two_side; > - unsigned nr_cbufs; > + /* Number of color outputs in the TGSI shader, > + * sometimes it could be higher than nr_cbufs (bug?). > + * Also with writes_all property on eg+ it will be set to max CB > number */ > + unsigned nr_ps_max_color_exports; > + /* Real number of ps color exports compiled in the bytecode */ > + unsigned nr_ps_color_exports; > /* bit n is set if the shader writes gl_ClipDistance[n] */ > unsigned clip_dist_write; > /* flag is set if the shader writes VS_OUT_MISC_VEC (e.g. for PSIZE) */ > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index fc75781..b314edc 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -1640,6 +1640,8 @@ static void r600_set_framebuffer_state(struct > pipe_context *ctx, > > /* build states */ > rctx->have_depth_fb = 0; > + rctx->nr_cbufs = state->nr_cbufs; > + > for (int i = 0; i < state->nr_cbufs; i++) { > r600_cb(rctx, rstate, state, i); > } > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index 00e1bd0..7755c9e 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -690,8 +690,8 @@ static void r600_update_derived_state(struct r600_context > *rctx) > } > > if ((rctx->ps_shader->shader.two_side != rctx->two_side) || > - ((rctx->chip_class >= EVERGREEN) && > rctx->ps_shader->shader.fs_write_all && > - (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) { > + (MIN2(rctx->ps_shader->shader.nr_ps_max_color_exports, > rctx->nr_cbufs + rctx->dual_src_blend) > + != rctx->ps_shader->shader.nr_ps_color_exports)) { > r600_shader_rebuild(&rctx->context, rctx->ps_shader); > } > > -- > 1.7.10.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev