On Mon, Nov 30, 2015 at 8:20 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > This changes the r600 specific GPR adjustment code > to use the stage defines, and arrays. > > This is prep work for the tess changes later. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/gallium/drivers/r600/r600_pipe.h | 2 +- > src/gallium/drivers/r600/r600_state.c | 117 > ++++++++++++++++++---------------- > 2 files changed, 64 insertions(+), 55 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index 36fa1c9..0e57efe 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -432,7 +432,7 @@ struct r600_context { > /* Hardware info. */ > boolean has_vertex_cache; > boolean keep_tiling_flags; > - unsigned default_ps_gprs, default_vs_gprs; > + unsigned default_gprs[EG_NUM_HW_STAGES]; > unsigned r6xx_num_clause_temp_gprs; > > /* Miscellaneous state objects. */ > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index c2d4abc..a16d4bd 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2044,57 +2044,62 @@ static void r600_emit_gs_rings(struct r600_context > *rctx, struct r600_atom *a) > /* Adjust GPR allocation on R6xx/R7xx */ > bool r600_adjust_gprs(struct r600_context *rctx) > { > - unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; > - unsigned num_vs_gprs, num_es_gprs, num_gs_gprs; > - unsigned new_num_ps_gprs = num_ps_gprs; > - unsigned new_num_vs_gprs, new_num_es_gprs, new_num_gs_gprs; > - unsigned cur_num_ps_gprs = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > - unsigned cur_num_vs_gprs = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > - unsigned cur_num_gs_gprs = > G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); > - unsigned cur_num_es_gprs = > G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); > - unsigned def_num_ps_gprs = rctx->default_ps_gprs; > - unsigned def_num_vs_gprs = rctx->default_vs_gprs; > - unsigned def_num_gs_gprs = 0; > - unsigned def_num_es_gprs = 0; > + unsigned num_gprs[R600_NUM_HW_STAGES]; > + unsigned new_gprs[R600_NUM_HW_STAGES]; > + unsigned cur_gprs[R600_NUM_HW_STAGES]; > + unsigned def_gprs[R600_NUM_HW_STAGES]; > unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; > - /* hardware will reserve twice num_clause_temp_gprs */ > - unsigned max_gprs = def_num_gs_gprs + def_num_es_gprs + > def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2; > + unsigned max_gprs; > unsigned tmp, tmp2; > + unsigned i; > + bool need_recalc = false, use_default = true; > + > + /* hardware will reserve twice num_clause_temp_gprs */ > + max_gprs = def_num_clause_temp_gprs * 2; > + for (i = 0; i < R600_NUM_HW_STAGES; i++) { > + def_gprs[i] = rctx->default_gprs[i]; > + max_gprs += def_gprs[i]; > + } > > + cur_gprs[R600_HW_STAGE_PS] = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + cur_gprs[R600_HW_STAGE_VS] = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + cur_gprs[R600_HW_STAGE_GS] = > G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); > + cur_gprs[R600_HW_STAGE_ES] = > G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); > + > + num_gprs[R600_HW_STAGE_PS] = rctx->ps_shader->current->shader.bc.ngpr; > if (rctx->gs_shader) { > - num_es_gprs = rctx->vs_shader->current->shader.bc.ngpr; > - num_gs_gprs = rctx->gs_shader->current->shader.bc.ngpr; > - num_vs_gprs = > rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr; > + num_gprs[R600_HW_STAGE_ES] = > rctx->vs_shader->current->shader.bc.ngpr; > + num_gprs[R600_HW_STAGE_GS] = > rctx->gs_shader->current->shader.bc.ngpr; > + num_gprs[R600_HW_STAGE_VS] = > rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr; > } else { > - num_es_gprs = 0; > - num_gs_gprs = 0; > - num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; > + num_gprs[R600_HW_STAGE_ES] = 0; > + num_gprs[R600_HW_STAGE_GS] = 0; > + num_gprs[R600_HW_STAGE_VS] = > rctx->vs_shader->current->shader.bc.ngpr; > + } > + > + for (i = 0; i < R600_NUM_HW_STAGES; i++) { > + new_gprs[i] = num_gprs[i]; > + if (new_gprs[i] > cur_gprs[i]) > + need_recalc = true; > + if (new_gprs[i] > def_gprs[i]) > + use_default = false; > } > - new_num_vs_gprs = num_vs_gprs; > - new_num_es_gprs = num_es_gprs; > - new_num_gs_gprs = num_gs_gprs; > > /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to > max_gprs */ > - if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > > cur_num_vs_gprs || > - new_num_es_gprs > cur_num_es_gprs || new_num_gs_gprs > > cur_num_gs_gprs) { > - /* try to use switch back to default */ > - if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > > def_num_vs_gprs || > - new_num_gs_gprs > def_num_gs_gprs || new_num_es_gprs > > def_num_es_gprs) { > - /* always privilege vs stage so that at worst we have > the > - * pixel stage producing wrong output (not the vertex > - * stage) */ > - new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + > new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2); > - new_num_vs_gprs = num_vs_gprs; > - new_num_gs_gprs = num_gs_gprs; > - new_num_es_gprs = num_es_gprs; > - } else { > - new_num_ps_gprs = def_num_ps_gprs; > - new_num_vs_gprs = def_num_vs_gprs; > - new_num_es_gprs = def_num_es_gprs; > - new_num_gs_gprs = def_num_gs_gprs; > - } > - } else { > + if (!need_recalc) > return true; > + > + /* try to use switch back to default */ > + if (!use_default) { > + /* always privilege vs stage so that at worst we have the > + * pixel stage producing wrong output (not the vertex > + * stage) */ > + new_gprs[R600_HW_STAGE_PS] = max_gprs - > def_num_clause_temp_gprs * 2; > + for (i = R600_HW_STAGE_VS; i < R600_NUM_HW_STAGES; i++) > + new_gprs[R600_HW_STAGE_PS] -= new_gprs[i]; > + } else { > + for (i = 0; i < 4; i++)
Maybe it would be better to use a define instead of 4, i.e "i < R600_NUM_HW_STAGES" (like in the loop above) ? > + new_gprs[i] = def_gprs[i]; > } > > /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= > @@ -2103,21 +2108,22 @@ bool r600_adjust_gprs(struct r600_context *rctx) > * it will lockup. So in this case just discard the draw command > * and don't change the current gprs repartitions. > */ > - if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs || > - num_gs_gprs > new_num_gs_gprs || num_es_gprs > new_num_es_gprs) { > - R600_ERR("shaders require too many register (%d + %d + %d + > %d) " > - "for a combined maximum of %d\n", > - num_ps_gprs, num_vs_gprs, num_es_gprs, num_gs_gprs, > max_gprs); > - return false; > + for (i = 0; i < R600_NUM_HW_STAGES; i++) { > + if (num_gprs[i] > new_gprs[i]) { > + R600_ERR("shaders require too many register (%d + %d > + %d + %d) " > + "for a combined maximum of %d\n", > + num_gprs[R600_HW_STAGE_PS], > num_gprs[R600_HW_STAGE_VS], num_gprs[R600_HW_STAGE_ES], > num_gprs[R600_HW_STAGE_GS], max_gprs); > + return false; > + } > } > > /* in some case we endup recomputing the current value */ > - tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | > - S_008C04_NUM_VS_GPRS(new_num_vs_gprs) | > + tmp = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) | > + S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) | > S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); > > - tmp2 = S_008C08_NUM_ES_GPRS(new_num_es_gprs) | > - S_008C08_NUM_GS_GPRS(new_num_gs_gprs); > + tmp2 = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) | > + S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]); > if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || > rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) { > rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; > rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2; > @@ -2285,8 +2291,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx) > break; > } > > - rctx->default_ps_gprs = num_ps_gprs; > - rctx->default_vs_gprs = num_vs_gprs; > + rctx->default_gprs[R600_HW_STAGE_PS] = num_ps_gprs; > + rctx->default_gprs[R600_HW_STAGE_VS] = num_vs_gprs; > + rctx->default_gprs[R600_HW_STAGE_GS] = 0; > + rctx->default_gprs[R600_HW_STAGE_ES] = 0; > + > rctx->r6xx_num_clause_temp_gprs = num_temp_gprs; > > /* SQ_CONFIG */ > -- > 2.5.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev With the above fixed, this patch is: Reviewed-by: Oded Gabbay <oded.gab...@gmail.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev