This looks good to me. Thank you. Marek
On Tue, Oct 30, 2012 at 11:04 PM, <j.gli...@gmail.com> wrote: > From: Jerome Glisse <jgli...@redhat.com> > > On r6xx/r7xx shader resource management need to make sure that the > shader does not goes over the gpr register limit. Each specific > asic has a maxmimum register that can be split btw shader stage. > For each stage the shader must not use more register than the > limit programmed. > > v2: Print an error message when discarding draw. Don't add another > boolean to context structure, but rather propagate the discard > boolean through the call chain. > > Signed-off-by: Jerome Glisse <jgli...@redhat.com> > --- > src/gallium/drivers/r600/r600_pipe.h | 2 +- > src/gallium/drivers/r600/r600_state.c | 67 > +++++++++++++++++++--------- > src/gallium/drivers/r600/r600_state_common.c | 27 ++++++----- > 3 files changed, 62 insertions(+), 34 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index ff2a5fd..3edef40 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx); > void *r600_create_resolve_blend(struct r600_context *rctx); > void *r700_create_resolve_blend(struct r600_context *rctx); > void *r600_create_decompress_blend(struct r600_context *rctx); > -void r600_adjust_gprs(struct r600_context *rctx); > +bool r600_adjust_gprs(struct r600_context *rctx); > boolean r600_is_format_supported(struct pipe_screen *screen, > enum pipe_format format, > enum pipe_texture_target target, > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 7d07008..76fe44d 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context > *rctx) > } > > /* Adjust GPR allocation on R6xx/R7xx */ > -void r600_adjust_gprs(struct r600_context *rctx) > +bool r600_adjust_gprs(struct r600_context *rctx) > { > - unsigned num_ps_gprs = rctx->default_ps_gprs; > - unsigned num_vs_gprs = rctx->default_vs_gprs; > + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; > + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; > + unsigned new_num_ps_gprs = num_ps_gprs; > + unsigned new_num_vs_gprs = num_vs_gprs; > + unsigned cur_num_ps_gprs = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned cur_num_vs_gprs = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned def_num_ps_gprs = rctx->default_ps_gprs; > + unsigned def_num_vs_gprs = rctx->default_vs_gprs; > + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; > + /* hardware will reserve twice num_clause_temp_gprs */ > + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + > def_num_clause_temp_gprs * 2; > unsigned tmp; > - int diff; > > - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > { > - diff = rctx->ps_shader->current->shader.bc.ngpr - > rctx->default_ps_gprs; > - num_vs_gprs -= diff; > - num_ps_gprs += diff; > - } > - > - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) > - { > - diff = rctx->vs_shader->current->shader.bc.ngpr - > rctx->default_vs_gprs; > - num_ps_gprs -= diff; > - num_vs_gprs += diff; > + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to > max_gprs */ > + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > > cur_num_vs_gprs) { > + /* try to use switch back to default */ > + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > > def_num_vs_gprs) { > + /* always privilege vs stage so that at worst we have > the > + * pixel stage producing wrong output (not the vertex > + * stage) */ > + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + > def_num_clause_temp_gprs * 2); > + new_num_vs_gprs = num_vs_gprs; > + } else { > + new_num_ps_gprs = def_num_ps_gprs; > + new_num_vs_gprs = def_num_vs_gprs; > + } > + } else { > + return true; > } > > - tmp = 0; > - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); > - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); > - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); > - > - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { > + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= > + * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup > + * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS > + * it will lockup. So in this case just discard the draw command > + * and don't change the current gprs repartitions. > + */ > + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) { > + R600_ERR("ps & vs shader require too many register (%d + %d) " > + "for a combined maximum of %d\n", > + num_ps_gprs, num_vs_gprs, max_gprs); > + return false; > + } > + > + /* in some case we endup recomputing the current value */ > + tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | > + S_008C04_NUM_VS_GPRS(new_num_vs_gprs) | > + S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); > + if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { > rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; > rctx->config_state.atom.dirty = true; > rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > } > + return true; > } > > void r600_init_atom_start_cs(struct r600_context *rctx) > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index 65985c7..94e5e47 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -755,10 +755,6 @@ static int r600_shader_select(struct pipe_context *ctx, > shader->next_variant = sel->current; > sel->current = shader; > > - if (rctx->chip_class < EVERGREEN && rctx->ps_shader && > rctx->vs_shader) { > - r600_adjust_gprs(rctx); > - } > - > if (rctx->ps_shader && > rctx->cb_misc_state.nr_ps_color_outputs != > rctx->ps_shader->current->nr_ps_color_outputs) { > rctx->cb_misc_state.nr_ps_color_outputs = > rctx->ps_shader->current->nr_ps_color_outputs; > @@ -814,9 +810,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, > void *state) > rctx->cb_misc_state.multiwrite = multiwrite; > rctx->cb_misc_state.atom.dirty = true; > } > - > - if (rctx->vs_shader) > - r600_adjust_gprs(rctx); > } > > if (rctx->cb_misc_state.nr_ps_color_outputs != > rctx->ps_shader->current->nr_ps_color_outputs) { > @@ -839,9 +832,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, > void *state) > if (state) { > r600_context_pipe_state_set(rctx, > &rctx->vs_shader->current->rstate); > > - if (rctx->chip_class < EVERGREEN && rctx->ps_shader) > - r600_adjust_gprs(rctx); > - > /* Update clip misc state. */ > if (rctx->vs_shader->current->pa_cl_vs_out_cntl != > rctx->clip_misc_state.pa_cl_vs_out_cntl || > rctx->vs_shader->current->shader.clip_dist_write != > rctx->clip_misc_state.clip_dist_write) { > @@ -1032,7 +1022,7 @@ static void r600_set_sample_mask(struct pipe_context > *pipe, unsigned sample_mask > rctx->sample_mask.atom.dirty = true; > } > > -static void r600_update_derived_state(struct r600_context *rctx) > +static bool r600_update_derived_state(struct r600_context *rctx) > { > struct pipe_context * ctx = (struct pipe_context*)rctx; > unsigned ps_dirty = 0; > @@ -1070,6 +1060,13 @@ static void r600_update_derived_state(struct > r600_context *rctx) > if (ps_dirty) > r600_context_pipe_state_set(rctx, > &rctx->ps_shader->current->rstate); > > + if (rctx->chip_class < EVERGREEN && rctx->ps_shader && > rctx->vs_shader) { > + if (!r600_adjust_gprs(rctx)) { > + /* discard rendering */ > + return false; > + } > + } > + > blend_disable = (rctx->dual_src_blend && > rctx->ps_shader->current->nr_ps_color_outputs < 2); > > @@ -1079,6 +1076,7 @@ static void r600_update_derived_state(struct > r600_context *rctx) > rctx->blend_state.cso, > blend_disable); > } > + return true; > } > > static unsigned r600_conv_prim_to_gs_out(unsigned mode) > @@ -1137,7 +1135,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > return; > } > > - r600_update_derived_state(rctx); > + if (!r600_update_derived_state(rctx)) { > + /* useless to render because current rendering command > + * can't be achieved > + */ > + return; > + } > > if (info.indexed) { > /* Initialize the index buffer struct. */ > -- > 1.7.11.7 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev