--- src/mesa/drivers/dri/i965/brw_fs.cpp | 27 +++++++++++++++++----- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 ++++++++------- .../drivers/dri/i965/brw_schedule_instructions.cpp | 7 +++--- 3 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 33214fd..9656081 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -65,7 +65,23 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources) this->conditional_mod = BRW_CONDITIONAL_NONE; /* This will be the case for almost all instructions. */ - this->regs_written = 1; + switch (dst.file) { + case GRF: + case HW_REG: + case MRF: + this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 32; + break; + case BAD_FILE: + this->regs_written = 0; + break; + case IMM: + case UNIFORM: + unreachable("Invalid destination register file"); + break; + default: + unreachable("Invalid register file"); + break; + } this->writes_accumulator = false; } @@ -2346,7 +2362,7 @@ fs_visitor::compute_to_mrf() /* Things returning more than one register would need us to * understand coalescing out more than one MOV at a time. */ - if (scan_inst->regs_written > 1) + if (scan_inst->regs_written > scan_inst->dst.width / 8) break; /* SEND instructions can't have MRF as a destination. */ @@ -2605,8 +2621,7 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps, void fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) { - int reg_size = dispatch_width / 8; - int write_len = inst->regs_written * reg_size; + int write_len = inst->regs_written; int first_write_grf = inst->dst.reg; bool needs_dep[BRW_MAX_MRF]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -2648,7 +2663,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) */ if (scan_inst->dst.file == GRF) { for (int i = 0; i < scan_inst->regs_written; i++) { - int reg = scan_inst->dst.reg + i * reg_size; + int reg = scan_inst->dst.reg + i; if (reg >= first_write_grf && reg < first_write_grf + write_len && @@ -2686,7 +2701,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) void fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst) { - int write_len = inst->regs_written * dispatch_width / 8; + int write_len = inst->regs_written; int first_write_grf = inst->dst.reg; bool needs_dep[BRW_MAX_MRF]; assert(write_len < (int)sizeof(needs_dep) - 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 35dc318..dae8f25 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -285,7 +285,8 @@ fs_visitor::try_emit_saturate(ir_expression *ir) * src, just set the saturate flag instead of emmitting a separate mov. */ fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (modify && modify->regs_written == 1 && modify->can_do_saturate()) { + if (modify && modify->regs_written == modify->dst.width / 8 && + modify->can_do_saturate()) { modify->saturate = true; this->result = src; return true; @@ -1429,7 +1430,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_present = header_present; - inst->regs_written = 4; + inst->regs_written = 4 * reg_width; if (mlen > MAX_SAMPLER_MESSAGE_SIZE) { fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) @@ -1645,7 +1646,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, else inst->mlen = length * reg_width; inst->header_present = header_present; - inst->regs_written = 4; + inst->regs_written = 4 * reg_width; if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) @@ -1793,9 +1794,10 @@ fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, fs_reg sampler) inst->base_mrf = -1; inst->mlen = length * reg_width; inst->header_present = false; - inst->regs_written = 4; /* we only care about one reg of response, - * but the sampler always writes 4/8 - */ + inst->regs_written = 4 * reg_width; /* we only care about one reg of + * response, but the sampler always + * writes 4/8 + */ return dest; } @@ -1966,14 +1968,15 @@ fs_visitor::visit(ir_texture *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); - for (int i = 0; i < inst->regs_written; i++) { + int components = inst->regs_written / (dst.width / 8); + for (int i = 0; i < components; i++) { if (i == 2) { fixed_payload[i] = fixed_depth; } else { fixed_payload[i] = offset(dst, i); } } - emit(LOAD_PAYLOAD(dst, fixed_payload, inst->regs_written)); + emit(LOAD_PAYLOAD(dst, fixed_payload, components)); } } diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 86d16ab..a9fa6eb 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -834,7 +834,7 @@ fs_instruction_scheduler::calculate_deps() /* write-after-write deps. */ if (inst->dst.file == GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written * reg_width; r++) { + for (int r = 0; r < inst->regs_written; r++) { add_dep(last_grf_write[inst->dst.reg + r], n); last_grf_write[inst->dst.reg + r] = n; } @@ -964,7 +964,7 @@ fs_instruction_scheduler::calculate_deps() */ if (inst->dst.file == GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written * reg_width; r++) + for (int r = 0; r < inst->regs_written; r++) last_grf_write[inst->dst.reg + r] = n; } else { for (int r = 0; r < inst->regs_written; r++) { @@ -1287,7 +1287,8 @@ fs_instruction_scheduler::choose_instruction_to_schedule() * single-result send is probably actually reducing register * pressure. */ - if (inst->regs_written <= 1 && chosen_inst->regs_written > 1) { + if (inst->regs_written <= inst->dst.width / 8 && + chosen_inst->regs_written > chosen_inst->dst.width / 8) { chosen = n; continue; } else if (inst->regs_written > chosen_inst->regs_written) { -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev