On Fri, Mar 14, 2014 at 08:15:33PM +0200, Pohjolainen, Topi wrote: > On Tue, Mar 11, 2014 at 11:48:54PM -0700, Kenneth Graunke wrote: > > Previously, remove_dead_constants() would renumber the UNIFORM registers > > to be sequential starting from zero, and the resulting register number > > would be used directly as an index into the params[] array. > > > > This renumbering made it difficult to collect and save information about > > pull constant locations, since setup_pull_constants() and > > move_uniform_array_access_to_pull_constants() used different names. > > > > This patch generalizes setup_pull_constants() to decide whether each > > uniform register should be a pull constant, push constant, or neither > > (because it's unused). Then, it stores mappings from UNIFORM register > > numbers to params[] or pull_params[] indices in the push_constant_loc > > and pull_constant_loc arrays. (We already did this for pull constants.) > > > > Then, assign_curb_setup() just needs to consult the push_constant_loc > > array to get the real index into the params[] array. > > > > This effectively folds all the remove_dead_constants() functionality > > into assign_constant_locations(), while being less irritable to work > > with. > > > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > > --- > > src/mesa/drivers/dri/i965/brw_fs.cpp | 187 > > +++++++++++---------------- > > src/mesa/drivers/dri/i965/brw_fs.h | 13 +- > > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 +- > > 3 files changed, 85 insertions(+), 118 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > > b/src/mesa/drivers/dri/i965/brw_fs.cpp > > index 3c8237a..5e01e78 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > @@ -885,8 +885,8 @@ fs_visitor::import_uniforms(fs_visitor *v) > > hash_table_call_foreach(v->variable_ht, > > import_uniforms_callback, > > variable_ht); > > - this->params_remap = v->params_remap; > > - this->nr_params_remap = v->nr_params_remap; > > + this->push_constant_loc = v->push_constant_loc; > > + this->uniforms = v->uniforms; > > } > > > > /* Our support for uniforms is piggy-backed on the struct > > @@ -1397,11 +1397,8 @@ fs_visitor::assign_curb_setup() > > { > > if (dispatch_width == 8) { > > c->prog_data.first_curbe_grf = c->nr_payload_regs; > > - stage_prog_data->nr_params = uniforms; > > } else { > > c->prog_data.first_curbe_grf_16 = c->nr_payload_regs; > > - /* Make sure we didn't try to sneak in an extra uniform */ > > - assert(uniforms == 0); > > } > > > > c->prog_data.curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / > > 8; > > @@ -1412,7 +1409,19 @@ fs_visitor::assign_curb_setup() > > > > for (unsigned int i = 0; i < 3; i++) { > > if (inst->src[i].file == UNIFORM) { > > - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; > > + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; > > + int constant_nr; > > + if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { > > + constant_nr = push_constant_loc[uniform_nr]; > > + } else { > > + /* Section 5.11 of the OpenGL 4.1 spec says: > > + * "Out-of-bounds reads return undefined values, which > > include > > + * values from other variables of the active program or > > zero." > > + * Just return the first push constant. > > + */ > > + constant_nr = 0; > > + } > > + > > struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + > > constant_nr / 8, > > constant_nr % 8); > > @@ -1724,94 +1733,6 @@ fs_visitor::compact_virtual_grfs() > > } > > } > > > > -bool > > -fs_visitor::remove_dead_constants() > > -{ > > - if (dispatch_width == 8) { > > - this->params_remap = ralloc_array(mem_ctx, int, uniforms); > > - this->nr_params_remap = uniforms; > > - > > - for (unsigned int i = 0; i < uniforms; i++) > > - this->params_remap[i] = -1; > > - > > - /* Find which params are still in use. */ > > - foreach_list(node, &this->instructions) { > > - fs_inst *inst = (fs_inst *)node; > > - > > - for (int i = 0; i < 3; i++) { > > - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; > > - > > - if (inst->src[i].file != UNIFORM) > > - continue; > > - > > - /* Section 5.11 of the OpenGL 4.3 spec says: > > - * > > - * "Out-of-bounds reads return undefined values, which include > > - * values from other variables of the active program or zero." > > - */ > > - if (constant_nr < 0 || constant_nr >= (int)uniforms) { > > - constant_nr = 0; > > - } > > - > > - /* For now, set this to non-negative. We'll give it the > > - * actual new number in a moment, in order to keep the > > - * register numbers nicely ordered. > > - */ > > - this->params_remap[constant_nr] = 0; > > - } > > - } > > - > > - /* Figure out what the new numbers for the params will be. At some > > - * point when we're doing uniform array access, we're going to want > > - * to keep the distinction between .reg and .reg_offset, but for > > - * now we don't care. > > - */ > > - unsigned int new_nr_params = 0; > > - for (unsigned int i = 0; i < uniforms; i++) { > > - if (this->params_remap[i] != -1) { > > - this->params_remap[i] = new_nr_params++; > > - } > > - } > > - > > - /* Update the list of params to be uploaded to match our new > > numbering. */ > > - for (unsigned int i = 0; i < uniforms; i++) { > > - int remapped = this->params_remap[i]; > > - > > - if (remapped == -1) > > - continue; > > - > > - stage_prog_data->param[remapped] = stage_prog_data->param[i]; > > - } > > - > > - uniforms = new_nr_params; > > - } else { > > - /* This should have been generated in the SIMD8 pass already. */ > > - assert(this->params_remap); > > - } > > - > > - /* Now do the renumbering of the shader to remove unused params. */ > > - foreach_list(node, &this->instructions) { > > - fs_inst *inst = (fs_inst *)node; > > - > > - for (int i = 0; i < 3; i++) { > > - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; > > - > > - if (inst->src[i].file != UNIFORM) > > - continue; > > - > > - /* as above alias to 0 */ > > - if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) { > > - constant_nr = 0; > > - } > > - assert(this->params_remap[constant_nr] != -1); > > - inst->src[i].reg = this->params_remap[constant_nr]; > > - inst->src[i].reg_offset = 0; > > - } > > - } > > - > > - return true; > > -} > > - > > /* > > * Implements array access of uniforms by inserting a > > * PULL_CONSTANT_LOAD instruction. > > @@ -1872,8 +1793,7 @@ > > fs_visitor::move_uniform_array_access_to_pull_constants() > > } > > > > /** > > - * Choose accesses from the UNIFORM file to demote to using the pull > > - * constant buffer. > > + * Assign UNIFORM file registers to either push constants or pull > > constants. > > * > > * We allow a fragment shader to have more than the specified minimum > > * maximum number of fragment shader uniform components (64). If > > @@ -1882,24 +1802,62 @@ > > fs_visitor::move_uniform_array_access_to_pull_constants() > > * update the program to load them. > > */ > > void > > -fs_visitor::setup_pull_constants() > > +fs_visitor::assign_constant_locations() > > { > > - /* Only allow 16 registers (128 uniform components) as push constants. > > */ > > - unsigned int max_uniform_components = 16 * 8; > > - if (uniforms <= max_uniform_components) > > + /* Only the first compile (SIMD8 mode) gets to decide on locations. */ > > + if (dispatch_width != 8) > > return; > > > > - /* Just demote the end of the list. We could probably do better > > + /* Find which UNIFORM registers are still in use. */ > > + bool is_live[uniforms]; > > + for (unsigned int i = 0; i < uniforms; i++) { > > + is_live[i] = false; > > + } > > + > > + foreach_list(node, &this->instructions) { > > + fs_inst *inst = (fs_inst *) node; > > + > > + for (int i = 0; i < 3; i++) { > > + if (inst->src[i].file != UNIFORM) > > + continue; > > + > > + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; > > + if (constant_nr >= 0 && constant_nr < (int) uniforms) > > + is_live[constant_nr] = true; > > + } > > + } > > + > > + /* Only allow 16 registers (128 uniform components) as push constants. > > + * > > + * Just demote the end of the list. We could probably do better > > * here, demoting things that are rarely used in the program first. > > */ > > - unsigned int pull_uniform_base = max_uniform_components; > > + unsigned int max_push_components = 16 * 8; > > This could be declared as constant. Otherwise I've read this over a few times > and it looks just fine. Makes it simpler and consistent between push and pull. > > Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > > You might want to have a second opinion though.
Patches 6 and 7 are also Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > > > + unsigned int num_push_constants = 0; > > > > + push_constant_loc = ralloc_array(mem_ctx, int, uniforms); > > pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); > > + for (unsigned int i = 0; i < uniforms; i++) > > + pull_constant_loc[i] = -1; > > + > > for (unsigned int i = 0; i < uniforms; i++) { > > - if (i < pull_uniform_base) { > > - pull_constant_loc[i] = -1; > > + if (!is_live[i] || pull_constant_loc[i] != -1) { > > + /* This UNIFORM register is either dead, or has already been > > demoted > > + * to a pull const. Mark it as no longer living in the param[] > > array. > > + */ > > + push_constant_loc[i] = -1; > > + continue; > > + } > > + > > + if (num_push_constants < max_push_components) { > > + /* Retain as a push constant. Record the location in the params[] > > + * array. > > + */ > > + push_constant_loc[i] = num_push_constants++; > > } else { > > - pull_constant_loc[i] = -1; > > + /* Demote to a pull constant. */ > > + push_constant_loc[i] = -1; > > + > > /* If our constant is already being uploaded for reladdr purposes, > > * reuse it. > > */ > > @@ -1916,7 +1874,21 @@ fs_visitor::setup_pull_constants() > > } > > } > > } > > - uniforms = pull_uniform_base; > > + > > + stage_prog_data->nr_params = num_push_constants; > > + > > + /* Up until now, the param[] array has been indexed by reg + reg_offset > > + * of UNIFORM registers. Condense it to only contain the uniforms we > > + * chose to upload as push constants. > > + */ > > + for (unsigned int i = 0; i < uniforms; i++) { > > + int remapped = push_constant_loc[i]; > > + > > + if (remapped == -1) > > + continue; > > + > > + stage_prog_data->param[remapped] = stage_prog_data->param[i]; > > + } > > > > demote_pull_constants(false); > > } > > @@ -3408,8 +3380,7 @@ fs_visitor::run() > > split_virtual_grfs(); > > > > move_uniform_array_access_to_pull_constants(); > > - remove_dead_constants(); > > - setup_pull_constants(); > > + assign_constant_locations(); > > > > opt_drop_redundant_mov_to_flags(); > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > > b/src/mesa/drivers/dri/i965/brw_fs.h > > index 2ef5a29..5733a49 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.h > > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > > @@ -357,7 +357,7 @@ public: > > void split_virtual_grfs(); > > void compact_virtual_grfs(); > > void move_uniform_array_access_to_pull_constants(); > > - void setup_pull_constants(); > > + void assign_constant_locations(); > > void demote_pull_constants(bool reladdr_only); > > void invalidate_live_intervals(); > > void calculate_live_intervals(); > > @@ -375,7 +375,6 @@ public: > > bool compute_to_mrf(); > > bool dead_code_eliminate(); > > bool dead_code_eliminate_local(); > > - bool remove_dead_constants(); > > bool remove_duplicate_mrf_writes(); > > bool virtual_grf_interferes(int a, int b); > > void schedule_instructions(instruction_scheduler_mode mode); > > @@ -516,13 +515,11 @@ public: > > */ > > int *pull_constant_loc; > > > > - /* This is the map from UNIFORM hw_reg + reg_offset as generated by > > - * the visitor to the packed uniform number after > > - * remove_dead_constants() that represents the actual uploaded > > - * uniform index. > > + /** > > + * Array mapping UNIFORM register numbers to the push parameter index, > > + * or -1 if this uniform register isn't being uploaded as a push > > constant. > > */ > > - int *params_remap; > > - int nr_params_remap; > > + int *push_constant_loc; > > > > struct hash_table *variable_ht; > > fs_reg frag_depth; > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > index 404af30..7088502 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > > @@ -2972,8 +2972,7 @@ fs_visitor::fs_visitor(struct brw_context *brw, > > > > this->uniforms = 0; > > this->pull_constant_loc = NULL; > > - this->params_remap = NULL; > > - this->nr_params_remap = 0; > > + this->push_constant_loc = NULL; > > > > this->force_uncompressed_stack = 0; > > > > -- > > 1.9.0 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev