When the const block and offset are immediate values. Otherwise just fall-back to the previous method of uploading the UBO constant data to GRF using pull constants.
Cc: kenn...@whitecape.org Cc: ja...@jlekstrand.net Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> --- src/mesa/drivers/dri/i965/brw_fs.cpp | 17 +++++++ src/mesa/drivers/dri/i965/brw_fs.h | 6 +++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 68 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++- 4 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e39d821..ad084af 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1865,6 +1865,7 @@ fs_visitor::assign_constant_locations() stage_prog_data->nr_pull_params = num_pull_constants; stage_prog_data->nr_params = 0; + stage_prog_data->nr_ubo_params = ubo_uniforms; unsigned const_reg_access[uniforms]; memset(const_reg_access, 0, sizeof(const_reg_access)); @@ -1899,6 +1900,20 @@ fs_visitor::assign_constant_locations() stage_prog_data->gather_table[p].channel_mask = const_reg_access[i]; } + + for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) { + int p = stage_prog_data->nr_gather_table++; + stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg; + stage_prog_data->gather_table[p].channel_mask = + this->ubo_gather_table[i].channel_mask; + stage_prog_data->gather_table[p].const_block = + this->ubo_gather_table[i].const_block; + stage_prog_data->gather_table[p].const_offset = + this->ubo_gather_table[i].const_offset; + stage_prog_data->max_ubo_const_block = + MAX2(stage_prog_data->max_ubo_const_block, + this->ubo_gather_table[i].const_block); + } } /** @@ -5171,6 +5186,7 @@ brw_wm_fs_emit(struct brw_context *brw, fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, prog, &fp->Base, 8, st_index8); + v.use_gather_constants = brw->fs_ubo_gather && brw->use_resource_streamer; if (!v.run_fs(false /* do_rep_send */)) { if (prog) { prog->LinkStatus = false; @@ -5187,6 +5203,7 @@ brw_wm_fs_emit(struct brw_context *brw, fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, prog, &fp->Base, 16, st_index16); + v2.use_gather_constants = v.use_gather_constants; if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index dd0526a..ded007a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -293,6 +293,9 @@ public: unsigned n); int implied_mrf_writes(fs_inst *inst); + bool nir_generate_ubo_gather_table(const brw::fs_builder &bld, + nir_intrinsic_instr *instr, fs_reg &dest, + bool has_indirect); virtual void dump_instructions(); virtual void dump_instructions(const char *name); @@ -316,6 +319,9 @@ public: /** Number of uniform variable components visited. */ unsigned uniforms; + /** Number of ubo uniform variable components visited. */ + unsigned ubo_uniforms; + /** Byte-offset for the next available spot in the scratch space buffer. */ unsigned last_scratch; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index a6c6a2f..9a50b99 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1488,6 +1488,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr has_indirect = true; /* fallthrough */ case nir_intrinsic_load_ubo: { + if (nir_generate_ubo_gather_table(bld, instr, dest, has_indirect)) + break; + nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; @@ -1874,3 +1877,68 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) unreachable("unknown jump"); } } + +bool +fs_visitor::nir_generate_ubo_gather_table(const brw::fs_builder &bld, + nir_intrinsic_instr *instr, + fs_reg &dest, + bool has_indirect) +{ + const nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); + + if (!const_index || has_indirect || !use_gather_constants) + return false; + + /* Only allow 16 registers (128 uniform components) as push constants. + */ + static const unsigned max_push_components = 16 * 8; + const unsigned param_index = uniforms + ubo_uniforms; + if ((param_index + instr->num_components) > max_push_components) + return false; + + /* Only SIMD8 gets to assign push constant register locations and generate + * the gather table masks. + */ + bool generate_gather_table = (dispatch_width == 8); + + fs_reg uniform_reg; + if (!generate_gather_table) { + for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) { + if ((this->ubo_gather_table[i].const_block == + const_index->u[0]) && + (this->ubo_gather_table[i].const_offset == + (unsigned) instr->const_index[0])) { + uniform_reg = fs_reg(UNIFORM, this->ubo_gather_table[i].reg); + break; + } + } + assert(uniform_reg.file == UNIFORM); + } + + int gather = -1; + if (generate_gather_table) { + uniform_reg = fs_reg(UNIFORM, 0); + gather = this->nr_ubo_gather_table++; + + assert(instr->num_components <= 4); + this->ubo_gather_table[gather].reg = uniform_reg.reg; + this->ubo_gather_table[gather].const_block = const_index->u[0]; + this->ubo_gather_table[gather].const_offset = instr->const_index[0]; + } + + ubo_uniforms += instr->num_components; + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(uniform_reg, dest.type), bld, + j + param_index); + bld.MOV(dest, src); + dest = offset(dest, bld, 1); + + if (gather != -1) { + unsigned mask = ((instr->const_index[0] % 16) == 0) ? + 1 << j : 1 << (((instr->const_index[0] % 16) / 4) + j); + this->ubo_gather_table[gather].channel_mask |= mask; + } + } + + return true; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 5cb794b..59eb122 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1109,6 +1109,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, this->regs_live_at_ip = NULL; this->uniforms = 0; + this->ubo_uniforms = 0; this->last_scratch = 0; this->pull_constant_loc = NULL; this->push_constant_loc = NULL; @@ -1116,8 +1117,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, this->spilled_any_registers = false; this->do_dual_src = false; - if (dispatch_width == 8) + if (dispatch_width == 8) { this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); + this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table, + stage_prog_data->nr_params); + } } fs_visitor::~fs_visitor() -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev