The same effect can be achieved using a combination of ::stride and ::subreg_offset. Remove the less flexible ::smear to keep the data members of fs_reg orthogonal. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 23 +++++++++++++--------- src/mesa/drivers/dri/i965/brw_fs.h | 4 +++- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 4 +--- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++-- .../drivers/dri/i965/brw_fs_live_variables.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 ++++++------- 7 files changed, 30 insertions(+), 25 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e4cee33..e6e32fc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -379,7 +379,6 @@ void fs_reg::init() { memset(this, 0, sizeof(*this)); - this->smear = -1; stride = 1; } @@ -445,7 +444,6 @@ fs_reg::equals(const fs_reg &r) const !reladdr && !r.reladdr && memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 && - smear == r.smear && stride == r.stride && imm.u == r.imm.u); } @@ -468,6 +466,15 @@ fs_reg::apply_stride(unsigned stride) return *this; } +fs_reg & +fs_reg::set_smear(unsigned subreg) +{ + assert(file != HW_REG && file != IMM); + subreg_offset = subreg * type_sz(type); + stride = 0; + return *this; +} + bool fs_reg::is_contiguous() const { @@ -545,7 +552,7 @@ fs_visitor::get_timestamp() * else that might disrupt timing) by setting smear to 2 and checking if * that field is != 0. */ - dst.smear = 0; + dst.set_smear(0); return dst; } @@ -580,7 +587,7 @@ fs_visitor::emit_shader_time_end() * were the only two timestamp reads that happened). */ fs_reg reset = shader_end_time; - reset.smear = 2; + reset.set_smear(2); fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u))); test->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); @@ -1934,7 +1941,7 @@ fs_visitor::setup_pull_constants() inst->src[i].file = GRF; inst->src[i].reg = dst.reg; inst->src[i].reg_offset = 0; - inst->src[i].smear = pull_index & 3; + inst->src[i].set_smear(pull_index & 3); } } } @@ -2263,8 +2270,7 @@ fs_visitor::register_coalesce_2() inst->src[0].file != GRF || inst->src[0].negate || inst->src[0].abs || - inst->src[0].smear != -1 || - !inst->src[0].is_contiguous() || + !inst->src[0].is_contiguous() || inst->dst.file != GRF || inst->dst.type != inst->src[0].type || virtual_grf_sizes[inst->src[0].reg] != 1) { @@ -2356,7 +2362,6 @@ fs_visitor::register_coalesce() bool has_source_modifiers = (inst->src[0].abs || inst->src[0].negate || - inst->src[0].smear != -1 || !inst->src[0].is_contiguous() || inst->src[0].file == UNIFORM); @@ -2480,7 +2485,7 @@ fs_visitor::compute_to_mrf() inst->dst.file != MRF || inst->src[0].file != GRF || inst->dst.type != inst->src[0].type || inst->src[0].abs || inst->src[0].negate || - inst->src[0].smear != -1 || !inst->src[0].is_contiguous() || + !inst->src[0].is_contiguous() || inst->src[0].subreg_offset) continue; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b0ce812..e36943c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -81,10 +81,12 @@ public: fs_reg retype(uint32_t type); fs_reg &apply_stride(unsigned stride); + /** Smear a channel of the reg to all channels. */ + fs_reg &set_smear(unsigned subreg); + bool negate; bool abs; bool sechalf; - int smear; /* -1, or a channel of the reg to smear to all channels. */ int subreg_offset; /**< Offset in bytes from the start of the register. */ int stride; /**< Register region horizontal stride */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 2f2d6b6..6b67d39 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -298,7 +298,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) bool has_source_modifiers = entry->src.abs || entry->src.negate; if ((has_source_modifiers || entry->src.file == UNIFORM || - entry->src.smear != -1 || !entry->src.is_contiguous()) && + !entry->src.is_contiguous()) && !can_do_source_mods(inst)) return false; @@ -311,8 +311,6 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].file = entry->src.file; inst->src[arg].reg = entry->src.reg; inst->src[arg].reg_offset = entry->src.reg_offset; - if (entry->src.smear != -1) - inst->src[arg].smear = entry->src.smear; inst->src[arg].subreg_offset = entry->src.subreg_offset; inst->src[arg].stride *= entry->src.stride; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 66321bd..3972fda 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -988,8 +988,8 @@ brw_reg_from_fs_reg(fs_reg *reg) switch (reg->file) { case GRF: case MRF: - if (reg->stride == 0 || reg->smear >= 0) { - brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear); + if (reg->stride == 0) { + brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0); } else { brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0); brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride); diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 1a2d398..46fdd74 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -84,7 +84,7 @@ fs_live_variables::setup_one_read(bblock_t *block, fs_inst *inst, * would get stomped by the first decode as well. */ int end_ip = ip; - if (v->dispatch_width == 16 && (reg.smear != -1 || reg.stride == 0 || + if (v->dispatch_width == 16 && (reg.stride == 0 || (v->pixel_x.reg == reg.reg || v->pixel_y.reg == reg.reg))) { end_ip++; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 3290004..c04944e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -591,7 +591,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * loading pull constants, so spilling them is unlikely to reduce * register pressure anyhow. */ - if (inst->src[i].smear >= 0 || !inst->src[i].is_contiguous()) { + if (!inst->src[i].is_contiguous()) { no_spill[inst->src[i].reg] = true; } } @@ -600,7 +600,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) { spill_costs[inst->dst.reg] += inst->regs_written * loop_scale; - if (inst->dst.smear >= 0 || !inst->dst.is_contiguous()) { + if (!inst->dst.is_contiguous()) { no_spill[inst->dst.reg] = true; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 3f065e7..8a76094 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -726,8 +726,14 @@ fs_visitor::visit(ir_expression *ir) emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg)); - packed_consts.smear = const_offset->value.u[0] % 16 / 4; for (int i = 0; i < ir->type->vector_elements; i++) { + packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i); + + /* The std140 packing rules don't allow vectors to cross 16-byte + * boundaries, and a reg is 32 bytes. + */ + assert(packed_consts.subreg_offset < 32); + /* UBO bools are any nonzero value. We consider bools to be * values with the low bit set to 1. Convert them using CMP. */ @@ -737,13 +743,7 @@ fs_visitor::visit(ir_expression *ir) emit(MOV(result, packed_consts)); } - packed_consts.smear++; result.reg_offset++; - - /* The std140 packing rules don't allow vectors to cross 16-byte - * boundaries, and a reg is 32 bytes. - */ - assert(packed_consts.smear < 8); } } else { /* Turn the byte offset into a dword offset. */ -- 1.8.3.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev