On 03/12/2014 03:14 AM, Pohjolainen, Topi wrote: > On Tue, Mar 11, 2014 at 11:48:54PM -0700, Kenneth Graunke wrote: >> Previously, remove_dead_constants() would renumber the UNIFORM registers >> to be sequential starting from zero, and the resulting register number >> would be used directly as an index into the params[] array. >> >> This renumbering made it difficult to collect and save information about >> pull constant locations, since setup_pull_constants() and >> move_uniform_array_access_to_pull_constants() used different names. >> >> This patch generalizes setup_pull_constants() to decide whether each >> uniform register should be a pull constant, push constant, or neither >> (because it's unused). Then, it stores mappings from UNIFORM register >> numbers to params[] or pull_params[] indices in the push_constant_loc >> and pull_constant_loc arrays. (We already did this for pull constants.) >> >> Then, assign_curb_setup() just needs to consult the push_constant_loc >> array to get the real index into the params[] array. >> >> This effectively folds all the remove_dead_constants() functionality >> into assign_constant_locations(), while being less irritable to work >> with. >> >> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> >> --- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 187 >> +++++++++++---------------- >> src/mesa/drivers/dri/i965/brw_fs.h | 13 +- >> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 +- >> 3 files changed, 85 insertions(+), 118 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index 3c8237a..5e01e78 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -885,8 +885,8 @@ fs_visitor::import_uniforms(fs_visitor *v) >> hash_table_call_foreach(v->variable_ht, >> import_uniforms_callback, >> variable_ht); >> - this->params_remap = v->params_remap; >> - this->nr_params_remap = v->nr_params_remap; >> + this->push_constant_loc = v->push_constant_loc; >> + this->uniforms = v->uniforms; >> } >> >> /* Our support for uniforms is piggy-backed on the struct >> @@ -1397,11 +1397,8 @@ fs_visitor::assign_curb_setup() >> { >> if (dispatch_width == 8) { >> c->prog_data.first_curbe_grf = c->nr_payload_regs; >> - stage_prog_data->nr_params = uniforms; >> } else { >> c->prog_data.first_curbe_grf_16 = c->nr_payload_regs; >> - /* Make sure we didn't try to sneak in an extra uniform */ >> - assert(uniforms == 0); >> } >> >> c->prog_data.curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8; >> @@ -1412,7 +1409,19 @@ fs_visitor::assign_curb_setup() >> >> for (unsigned int i = 0; i < 3; i++) { >> if (inst->src[i].file == UNIFORM) { >> - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; >> + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; >> + int constant_nr; >> + if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { >> + constant_nr = push_constant_loc[uniform_nr]; >> + } else { >> + /* Section 5.11 of the OpenGL 4.1 spec says: >> + * "Out-of-bounds reads return undefined values, which >> include >> + * values from other variables of the active program or >> zero." >> + * Just return the first push constant. >> + */ >> + constant_nr = 0; >> + } >> + >> struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + >> constant_nr / 8, >> constant_nr % 8); >> @@ -1724,94 +1733,6 @@ fs_visitor::compact_virtual_grfs() >> } >> } >> >> -bool >> -fs_visitor::remove_dead_constants() >> -{ >> - if (dispatch_width == 8) { >> - this->params_remap = ralloc_array(mem_ctx, int, uniforms); >> - this->nr_params_remap = uniforms; >> - >> - for (unsigned int i = 0; i < uniforms; i++) >> - this->params_remap[i] = -1; >> - >> - /* Find which params are still in use. */ >> - foreach_list(node, &this->instructions) { >> - fs_inst *inst = (fs_inst *)node; >> - >> - for (int i = 0; i < 3; i++) { >> - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; >> - >> - if (inst->src[i].file != UNIFORM) >> - continue; >> - >> - /* Section 5.11 of the OpenGL 4.3 spec says: >> - * >> - * "Out-of-bounds reads return undefined values, which include >> - * values from other variables of the active program or zero." >> - */ >> - if (constant_nr < 0 || constant_nr >= (int)uniforms) { >> - constant_nr = 0; >> - } >> - >> - /* For now, set this to non-negative. We'll give it the >> - * actual new number in a moment, in order to keep the >> - * register numbers nicely ordered. >> - */ >> - this->params_remap[constant_nr] = 0; >> - } >> - } >> - >> - /* Figure out what the new numbers for the params will be. At some >> - * point when we're doing uniform array access, we're going to want >> - * to keep the distinction between .reg and .reg_offset, but for >> - * now we don't care. >> - */ >> - unsigned int new_nr_params = 0; >> - for (unsigned int i = 0; i < uniforms; i++) { >> - if (this->params_remap[i] != -1) { >> - this->params_remap[i] = new_nr_params++; >> - } >> - } >> - >> - /* Update the list of params to be uploaded to match our new >> numbering. */ >> - for (unsigned int i = 0; i < uniforms; i++) { >> - int remapped = this->params_remap[i]; >> - >> - if (remapped == -1) >> - continue; >> - >> - stage_prog_data->param[remapped] = stage_prog_data->param[i]; >> - } >> - >> - uniforms = new_nr_params; >> - } else { >> - /* This should have been generated in the SIMD8 pass already. */ >> - assert(this->params_remap); >> - } >> - >> - /* Now do the renumbering of the shader to remove unused params. */ >> - foreach_list(node, &this->instructions) { >> - fs_inst *inst = (fs_inst *)node; >> - >> - for (int i = 0; i < 3; i++) { >> - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; >> - >> - if (inst->src[i].file != UNIFORM) >> - continue; >> - >> - /* as above alias to 0 */ >> - if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) { >> - constant_nr = 0; >> - } >> - assert(this->params_remap[constant_nr] != -1); >> - inst->src[i].reg = this->params_remap[constant_nr]; >> - inst->src[i].reg_offset = 0; >> - } >> - } >> - >> - return true; >> -} >> - >> /* >> * Implements array access of uniforms by inserting a >> * PULL_CONSTANT_LOAD instruction. >> @@ -1872,8 +1793,7 @@ >> fs_visitor::move_uniform_array_access_to_pull_constants() >> } >> >> /** >> - * Choose accesses from the UNIFORM file to demote to using the pull >> - * constant buffer. >> + * Assign UNIFORM file registers to either push constants or pull constants. >> * >> * We allow a fragment shader to have more than the specified minimum >> * maximum number of fragment shader uniform components (64). If >> @@ -1882,24 +1802,62 @@ >> fs_visitor::move_uniform_array_access_to_pull_constants() >> * update the program to load them. >> */ >> void >> -fs_visitor::setup_pull_constants() >> +fs_visitor::assign_constant_locations() >> { >> - /* Only allow 16 registers (128 uniform components) as push constants. */ >> - unsigned int max_uniform_components = 16 * 8; >> - if (uniforms <= max_uniform_components) >> + /* Only the first compile (SIMD8 mode) gets to decide on locations. */ >> + if (dispatch_width != 8) >> return; >> >> - /* Just demote the end of the list. We could probably do better >> + /* Find which UNIFORM registers are still in use. */ >> + bool is_live[uniforms]; >> + for (unsigned int i = 0; i < uniforms; i++) { >> + is_live[i] = false; >> + } >> + >> + foreach_list(node, &this->instructions) { >> + fs_inst *inst = (fs_inst *) node; >> + >> + for (int i = 0; i < 3; i++) { >> + if (inst->src[i].file != UNIFORM) >> + continue; >> + >> + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; >> + if (constant_nr >= 0 && constant_nr < (int) uniforms) >> + is_live[constant_nr] = true; >> + } >> + } >> + >> + /* Only allow 16 registers (128 uniform components) as push constants. >> + * >> + * Just demote the end of the list. We could probably do better >> * here, demoting things that are rarely used in the program first. >> */ >> - unsigned int pull_uniform_base = max_uniform_components; >> + unsigned int max_push_components = 16 * 8; >> + unsigned int num_push_constants = 0; >> >> + push_constant_loc = ralloc_array(mem_ctx, int, uniforms); >> pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); >> + for (unsigned int i = 0; i < uniforms; i++) >> + pull_constant_loc[i] = -1; >> + >> for (unsigned int i = 0; i < uniforms; i++) { >> - if (i < pull_uniform_base) { >> - pull_constant_loc[i] = -1; >> + if (!is_live[i] || pull_constant_loc[i] != -1) { > > I started wondering when the second condition fires and by inspecting code not > visible in this patch it seems that it cannot. The loop addresses elements in > the "pull_constant_loc[]" in order and never beyond current index "i".
You are correct. At this point, it should never trigger. It will be triggered after the next patch ("i965/fs: Use a single instance of the pull_constant_loc array."). I should have introduced it in that patch, rather than here. Would you like me to make that change? Also, thanks for the careful review! >> + /* This UNIFORM register is either dead, or has already been >> demoted >> + * to a pull const. Mark it as no longer living in the param[] >> array. >> + */ >> + push_constant_loc[i] = -1; >> + continue; >> + } >> + >> + if (num_push_constants < max_push_components) { >> + /* Retain as a push constant. Record the location in the params[] >> + * array. >> + */ >> + push_constant_loc[i] = num_push_constants++; >> } else { >> - pull_constant_loc[i] = -1; >> + /* Demote to a pull constant. */ >> + push_constant_loc[i] = -1; >> + >> /* If our constant is already being uploaded for reladdr purposes, >> * reuse it. >> */ > > This is the code not visible. I pasted here for clarity. > > for (unsigned int j = 0; j < stage_prog_data->nr_pull_params; > j++) { > if (stage_prog_data->pull_param[j] == > stage_prog_data->param[i]) { > pull_constant_loc[i] = j; > break; > } > } > if (pull_constant_loc[i] == -1) { > int pull_index = stage_prog_data->nr_pull_params++; > stage_prog_data->pull_param[pull_index] = > stage_prog_data->param[i]; > pull_constant_loc[i] = pull_index; > } > } > }
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev