On Fri, Oct 26, 2012 at 10:01 PM, <j.gli...@gmail.com> wrote: > From: Jerome Glisse <jgli...@redhat.com> > > On r6xx/r7xx shader resource management need to make sure that the > shader does not goes over the gpr register limit. Each specific > asic has a maxmimum register that can be split btw shader stage. > For each stage the shader must not use more register than the > limit programmed.
We may also want to add a new parameter to the radeon info ioctl to fetch the GPR limit for each asic. Alex > > Signed-off-by: Jerome Glisse <jgli...@redhat.com> > --- > src/gallium/drivers/r600/r600_pipe.h | 1 + > src/gallium/drivers/r600/r600_state.c | 60 > +++++++++++++++++++--------- > src/gallium/drivers/r600/r600_state_common.c | 22 +++++----- > 3 files changed, 55 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index ff2a5fd..2045af3 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -363,6 +363,7 @@ struct r600_context { > enum chip_class chip_class; > boolean has_vertex_cache; > boolean keep_tiling_flags; > + bool discard_draw; > unsigned default_ps_gprs, default_vs_gprs; > unsigned r6xx_num_clause_temp_gprs; > unsigned backend_mask; > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 7d07008..43af934 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context > *rctx) > /* Adjust GPR allocation on R6xx/R7xx */ > void r600_adjust_gprs(struct r600_context *rctx) > { > - unsigned num_ps_gprs = rctx->default_ps_gprs; > - unsigned num_vs_gprs = rctx->default_vs_gprs; > + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; > + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; > + unsigned new_num_ps_gprs = num_ps_gprs; > + unsigned new_num_vs_gprs = num_vs_gprs; > + unsigned cur_num_ps_gprs = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned cur_num_vs_gprs = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned def_num_ps_gprs = rctx->default_ps_gprs; > + unsigned def_num_vs_gprs = rctx->default_vs_gprs; > + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; > + /* hardware will reserve twice num_clause_temp_gprs */ > + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + > def_num_clause_temp_gprs * 2; > unsigned tmp; > - int diff; > > - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > { > - diff = rctx->ps_shader->current->shader.bc.ngpr - > rctx->default_ps_gprs; > - num_vs_gprs -= diff; > - num_ps_gprs += diff; > + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to > max_gprs */ > + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > > cur_num_vs_gprs) { > + /* try to use switch back to default */ > + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > > def_num_vs_gprs) { > + /* always privilege vs stage so that at worst we have > the > + * pixel stage producing wrong output (not the vertex > + * stage) */ > + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + > def_num_clause_temp_gprs * 2); > + new_num_vs_gprs = num_vs_gprs; > + } else { > + new_num_ps_gprs = def_num_ps_gprs; > + new_num_vs_gprs = def_num_vs_gprs; > + } > + } else { > + rctx->discard_draw = false; > + return; > } > > - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) > - { > - diff = rctx->vs_shader->current->shader.bc.ngpr - > rctx->default_vs_gprs; > - num_ps_gprs -= diff; > - num_vs_gprs += diff; > + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= > + * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup > + * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS > + * it will lockup. So in this case just discard the draw command > + * and don't change the current gprs repartitions. > + */ > + rctx->discard_draw = false; > + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) { > + rctx->discard_draw = true; > + return; > } > > - tmp = 0; > - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); > - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); > - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); > - > - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { > + /* in some case we endup recomputing the current value */ > + tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | > + S_008C04_NUM_VS_GPRS(new_num_vs_gprs) | > + S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); > + if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { > rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; > rctx->config_state.atom.dirty = true; > rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index 65985c7..5827efb 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -755,10 +755,6 @@ static int r600_shader_select(struct pipe_context *ctx, > shader->next_variant = sel->current; > sel->current = shader; > > - if (rctx->chip_class < EVERGREEN && rctx->ps_shader && > rctx->vs_shader) { > - r600_adjust_gprs(rctx); > - } > - > if (rctx->ps_shader && > rctx->cb_misc_state.nr_ps_color_outputs != > rctx->ps_shader->current->nr_ps_color_outputs) { > rctx->cb_misc_state.nr_ps_color_outputs = > rctx->ps_shader->current->nr_ps_color_outputs; > @@ -814,9 +810,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, > void *state) > rctx->cb_misc_state.multiwrite = multiwrite; > rctx->cb_misc_state.atom.dirty = true; > } > - > - if (rctx->vs_shader) > - r600_adjust_gprs(rctx); > } > > if (rctx->cb_misc_state.nr_ps_color_outputs != > rctx->ps_shader->current->nr_ps_color_outputs) { > @@ -839,9 +832,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, > void *state) > if (state) { > r600_context_pipe_state_set(rctx, > &rctx->vs_shader->current->rstate); > > - if (rctx->chip_class < EVERGREEN && rctx->ps_shader) > - r600_adjust_gprs(rctx); > - > /* Update clip misc state. */ > if (rctx->vs_shader->current->pa_cl_vs_out_cntl != > rctx->clip_misc_state.pa_cl_vs_out_cntl || > rctx->vs_shader->current->shader.clip_dist_write != > rctx->clip_misc_state.clip_dist_write) { > @@ -1055,6 +1045,10 @@ static void r600_update_derived_state(struct > r600_context *rctx) > > r600_shader_select(ctx, rctx->ps_shader, &ps_dirty); > > + if (rctx->chip_class < EVERGREEN && rctx->ps_shader && > rctx->vs_shader) { > + r600_adjust_gprs(rctx); > + } > + > if (rctx->ps_shader && rctx->rasterizer && > ((rctx->rasterizer->sprite_coord_enable != > rctx->ps_shader->current->sprite_coord_enable) || > (rctx->rasterizer->flatshade != > rctx->ps_shader->current->flatshade))) { > @@ -1139,6 +1133,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > > r600_update_derived_state(rctx); > > + if (rctx->discard_draw) { > + /* no need to clutter command stream with useless state > + * state change. Draw call are discarded if shader is > + * over the limit. See r600_adjust_gprs > + */ > + return; > + } > + > if (info.indexed) { > /* Initialize the index buffer struct. */ > pipe_resource_reference(&ib.buffer, > rctx->index_buffer.buffer); > -- > 1.7.11.7 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev