Previously, we used UNIFORM_PULL_CONSTANT_LOAD_GEN7 only for UBO-loads. This patch refactors out the UBO-load code and uses it for pull constant setup as well.
This gives us the benefit of send-from-GRF for normal pull constants. Additionally, it means that the code generator only needs to handle either the pre-Gen7 or Gen7 variant of the opcode, rather than needing to handle both. Cc: Eric Anholt <e...@anholt.net> --- src/mesa/drivers/dri/i965/brw_fs.cpp | 51 ++++++++++++++++++++++------ src/mesa/drivers/dri/i965/brw_fs.h | 3 ++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 31 +++-------------- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ae26d07..c1e7757 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -220,6 +220,42 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition) } exec_list +fs_visitor::UNIFORM_PULL_CONSTANT_LOAD(fs_reg dst, + unsigned surf_index, + unsigned offset) +{ + exec_list instructions; + fs_inst *inst; + + if (intel->gen >= 7) { + fs_reg payload = fs_reg(this, glsl_type::uint_type); + + struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD); + inst = MOV(payload, fs_reg(g0)); + inst->force_writemask_all = true; + instructions.push_tail(inst); + + /* We don't need the second half of this vgrf to be filled with g1 + * in the 16-wide case, but if we use force_uncompressed then live + * variable analysis won't consider this a def! + */ + inst = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET, payload, payload, + fs_reg(offset / 16)); + instructions.push_tail(inst); + inst = new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, + dst, fs_reg(surf_index), payload); + instructions.push_tail(inst); + } else { + inst = new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + dst, fs_reg(surf_index), fs_reg(offset)); + inst->base_mrf = 14; + inst->mlen = 1; + instructions.push_tail(inst); + } + return instructions; +} + +exec_list fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, fs_reg offset) { @@ -1662,17 +1698,10 @@ fs_visitor::setup_pull_constants() assert(!inst->src[i].reladdr); fs_reg dst = fs_reg(this, glsl_type::float_type); - fs_reg index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER); - fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); - fs_inst *pull = - new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, index, offset); - pull->ir = inst->ir; - pull->annotation = inst->annotation; - pull->base_mrf = 14; - pull->mlen = 1; - - inst->insert_before(pull); + exec_list list = + UNIFORM_PULL_CONSTANT_LOAD(dst, SURF_INDEX_FRAG_CONST_BUFFER, + (pull_index * 4) & ~15); + inst->insert_before(&list); inst->src[i].file = GRF; inst->src[i].reg = dst.reg; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b47b0d0..40cc785 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -290,6 +290,9 @@ public: fs_inst *end, fs_reg reg); + exec_list UNIFORM_PULL_CONSTANT_LOAD(fs_reg dst, + unsigned surf_index, + unsigned offset); exec_list VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, fs_reg offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index e70d6bf..57895d9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -571,36 +571,13 @@ fs_visitor::visit(ir_expression *ir) */ ir_constant *uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset = ir->operands[1]->as_constant(); - fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0])); + unsigned surf_index = SURF_INDEX_WM_UBO(uniform_block->value.u[0]); if (const_offset) { fs_reg packed_consts = fs_reg(this, glsl_type::float_type); packed_consts.type = result.type; - if (intel->gen >= 7) { - fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16); - fs_reg payload = fs_reg(this, glsl_type::uint_type); - struct brw_reg g0 = retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UD); - fs_inst *setup = emit(MOV(payload, fs_reg(g0))); - setup->force_writemask_all = true; - /* We don't need the second half of this vgrf to be filled with g1 - * in the 16-wide case, but if we use force_uncompressed then live - * variable analysis won't consider this a def! - */ - - emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload, - payload, const_offset_reg); - emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts, - surf_index, payload); - } else { - fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]); - fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - packed_consts, - surf_index, - const_offset_reg)); - pull->base_mrf = 14; - pull->mlen = 1; - } + emit(UNIFORM_PULL_CONSTANT_LOAD(packed_consts, surf_index, + const_offset->value.u[0])); packed_consts.smear = const_offset->value.u[0] % 16 / 4; for (int i = 0; i < ir->type->vector_elements; i++) { @@ -629,7 +606,7 @@ fs_visitor::visit(ir_expression *ir) for (int i = 0; i < ir->type->vector_elements; i++) { fs_reg offset = fs_reg(this, glsl_type::int_type); emit(ADD(offset, base_offset, fs_reg(i))); - emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, offset)); + emit(VARYING_PULL_CONSTANT_LOAD(result, fs_reg(surf_index), offset)); if (ir->type->base_type == GLSL_TYPE_BOOL) emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); -- 1.8.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev