FWIW, instead of putting the discard_draw flag in r600_context, it would be cleaner to have r600_adjust_gprs return false if drawing should be skipped, then r600_update_derived_state would return false and draw_vbo would skip rendering. That way you wouldn't have to add any comments in draw_vbo, because it would be clear where the error is coming from.
Using R600_ERR to report the error should be sufficient. r600_adjust_gprs seems to be the best place for that. Marek On Sat, Oct 27, 2012 at 3:47 AM, Jerome Glisse <j.gli...@gmail.com> wrote: > On Fri, Oct 26, 2012 at 10:01 PM, <j.gli...@gmail.com> wrote: >> From: Jerome Glisse <jgli...@redhat.com> >> >> On r6xx/r7xx shader resource management need to make sure that the >> shader does not goes over the gpr register limit. Each specific >> asic has a maxmimum register that can be split btw shader stage. >> For each stage the shader must not use more register than the >> limit programmed. >> >> Signed-off-by: Jerome Glisse <jgli...@redhat.com> > > I haven't yet fully tested it on wide range of GPU but it fixes piglit > case that were locking up o one can directly use quick-drivers. I > mostly would like feedback on if we should print a warning when we > discard a draw command because shader exceed limit. > > Note that with this patch the test that were locking up fails but with > a simple patch on top of that (decreasing clause temp gpr to 2) they > pass. > > Regards, > Jerome > >> --- >> src/gallium/drivers/r600/r600_pipe.h | 1 + >> src/gallium/drivers/r600/r600_state.c | 60 >> +++++++++++++++++++--------- >> src/gallium/drivers/r600/r600_state_common.c | 22 +++++----- >> 3 files changed, 55 insertions(+), 28 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/r600_pipe.h >> b/src/gallium/drivers/r600/r600_pipe.h >> index ff2a5fd..2045af3 100644 >> --- a/src/gallium/drivers/r600/r600_pipe.h >> +++ b/src/gallium/drivers/r600/r600_pipe.h >> @@ -363,6 +363,7 @@ struct r600_context { >> enum chip_class chip_class; >> boolean has_vertex_cache; >> boolean keep_tiling_flags; >> + bool discard_draw; >> unsigned default_ps_gprs, default_vs_gprs; >> unsigned r6xx_num_clause_temp_gprs; >> unsigned backend_mask; >> diff --git a/src/gallium/drivers/r600/r600_state.c >> b/src/gallium/drivers/r600/r600_state.c >> index 7d07008..43af934 100644 >> --- a/src/gallium/drivers/r600/r600_state.c >> +++ b/src/gallium/drivers/r600/r600_state.c >> @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context >> *rctx) >> /* Adjust GPR allocation on R6xx/R7xx */ >> void r600_adjust_gprs(struct r600_context *rctx) >> { >> - unsigned num_ps_gprs = rctx->default_ps_gprs; >> - unsigned num_vs_gprs = rctx->default_vs_gprs; >> + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; >> + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; >> + unsigned new_num_ps_gprs = num_ps_gprs; >> + unsigned new_num_vs_gprs = num_vs_gprs; >> + unsigned cur_num_ps_gprs = >> G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); >> + unsigned cur_num_vs_gprs = >> G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); >> + unsigned def_num_ps_gprs = rctx->default_ps_gprs; >> + unsigned def_num_vs_gprs = rctx->default_vs_gprs; >> + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; >> + /* hardware will reserve twice num_clause_temp_gprs */ >> + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + >> def_num_clause_temp_gprs * 2; >> unsigned tmp; >> - int diff; >> >> - if (rctx->ps_shader->current->shader.bc.ngpr > >> rctx->default_ps_gprs) { >> - diff = rctx->ps_shader->current->shader.bc.ngpr - >> rctx->default_ps_gprs; >> - num_vs_gprs -= diff; >> - num_ps_gprs += diff; >> + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to >> max_gprs */ >> + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > >> cur_num_vs_gprs) { >> + /* try to use switch back to default */ >> + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > >> def_num_vs_gprs) { >> + /* always privilege vs stage so that at worst we >> have the >> + * pixel stage producing wrong output (not the vertex >> + * stage) */ >> + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + >> def_num_clause_temp_gprs * 2); >> + new_num_vs_gprs = num_vs_gprs; >> + } else { >> + new_num_ps_gprs = def_num_ps_gprs; >> + new_num_vs_gprs = def_num_vs_gprs; >> + } >> + } else { >> + rctx->discard_draw = false; >> + return; >> } >> >> - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) >> - { >> - diff = rctx->vs_shader->current->shader.bc.ngpr - >> rctx->default_vs_gprs; >> - num_ps_gprs -= diff; >> - num_vs_gprs += diff; >> + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= >> + * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup >> + * Also if a shader use more gpr than >> SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS >> + * it will lockup. So in this case just discard the draw command >> + * and don't change the current gprs repartitions. >> + */ >> + rctx->discard_draw = false; >> + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) { >> + rctx->discard_draw = true; >> + return; >> } >> >> - tmp = 0; >> - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); >> - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); >> - tmp |= >> S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); >> - >> - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { >> + /* in some case we endup recomputing the current value */ >> + tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | >> + S_008C04_NUM_VS_GPRS(new_num_vs_gprs) | >> + S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); >> + if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { >> rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; >> rctx->config_state.atom.dirty = true; >> rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; >> diff --git a/src/gallium/drivers/r600/r600_state_common.c >> b/src/gallium/drivers/r600/r600_state_common.c >> index 65985c7..5827efb 100644 >> --- a/src/gallium/drivers/r600/r600_state_common.c >> +++ b/src/gallium/drivers/r600/r600_state_common.c >> @@ -755,10 +755,6 @@ static int r600_shader_select(struct pipe_context *ctx, >> shader->next_variant = sel->current; >> sel->current = shader; >> >> - if (rctx->chip_class < EVERGREEN && rctx->ps_shader && >> rctx->vs_shader) { >> - r600_adjust_gprs(rctx); >> - } >> - >> if (rctx->ps_shader && >> rctx->cb_misc_state.nr_ps_color_outputs != >> rctx->ps_shader->current->nr_ps_color_outputs) { >> rctx->cb_misc_state.nr_ps_color_outputs = >> rctx->ps_shader->current->nr_ps_color_outputs; >> @@ -814,9 +810,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, >> void *state) >> rctx->cb_misc_state.multiwrite = multiwrite; >> rctx->cb_misc_state.atom.dirty = true; >> } >> - >> - if (rctx->vs_shader) >> - r600_adjust_gprs(rctx); >> } >> >> if (rctx->cb_misc_state.nr_ps_color_outputs != >> rctx->ps_shader->current->nr_ps_color_outputs) { >> @@ -839,9 +832,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, >> void *state) >> if (state) { >> r600_context_pipe_state_set(rctx, >> &rctx->vs_shader->current->rstate); >> >> - if (rctx->chip_class < EVERGREEN && rctx->ps_shader) >> - r600_adjust_gprs(rctx); >> - >> /* Update clip misc state. */ >> if (rctx->vs_shader->current->pa_cl_vs_out_cntl != >> rctx->clip_misc_state.pa_cl_vs_out_cntl || >> rctx->vs_shader->current->shader.clip_dist_write != >> rctx->clip_misc_state.clip_dist_write) { >> @@ -1055,6 +1045,10 @@ static void r600_update_derived_state(struct >> r600_context *rctx) >> >> r600_shader_select(ctx, rctx->ps_shader, &ps_dirty); >> >> + if (rctx->chip_class < EVERGREEN && rctx->ps_shader && >> rctx->vs_shader) { >> + r600_adjust_gprs(rctx); >> + } >> + >> if (rctx->ps_shader && rctx->rasterizer && >> ((rctx->rasterizer->sprite_coord_enable != >> rctx->ps_shader->current->sprite_coord_enable) || >> (rctx->rasterizer->flatshade != >> rctx->ps_shader->current->flatshade))) { >> @@ -1139,6 +1133,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, >> const struct pipe_draw_info >> >> r600_update_derived_state(rctx); >> >> + if (rctx->discard_draw) { >> + /* no need to clutter command stream with useless state >> + * state change. Draw call are discarded if shader is >> + * over the limit. See r600_adjust_gprs >> + */ >> + return; >> + } >> + >> if (info.indexed) { >> /* Initialize the index buffer struct. */ >> pipe_resource_reference(&ib.buffer, >> rctx->index_buffer.buffer); >> -- >> 1.7.11.7 >> > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev