Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +-- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 73 +++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 28 deletions(-)
That was easy diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b7378de..9572f3a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1035,8 +1035,8 @@ fs_visitor::virtual_grf_alloc(int size) fs_reg fs_visitor::vgrf(int num_components) { - return fs_reg(GRF, virtual_grf_alloc(num_components), BRW_REGISTER_TYPE_F, - dispatch_width); + return fs_reg(GRF, virtual_grf_alloc(num_components * (dispatch_width / 8)), + BRW_REGISTER_TYPE_F, dispatch_width); } /** Fixed HW reg constructor. */ @@ -3759,7 +3759,6 @@ fs_visitor::run_fs() */ if (shader) { if (getenv("INTEL_USE_NIR") != NULL && !brw->use_rep_send) { - no16("Cannot do 16-wide in NIR yet"); emit_nir_code(); } else { foreach_in_list(ir_instruction, ir, shader->base.ir) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index fca03e5..5208985 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -132,12 +132,14 @@ fs_visitor::emit_nir_code() void fs_visitor::nir_setup_inputs(nir_shader *shader) { + int reg_width = dispatch_width / 8; + fs_reg varying = nir_inputs; struct hash_entry *entry; hash_table_foreach(shader->inputs, entry) { nir_variable *var = (nir_variable *) entry->data; - varying.reg_offset = var->data.driver_location; + varying.reg_offset = var->data.driver_location * reg_width; fs_reg reg; if (!strcmp(var->name, "gl_FragCoord")) { @@ -159,13 +161,15 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) void fs_visitor::nir_setup_outputs(nir_shader *shader) { + int reg_width = dispatch_width / 8; + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; fs_reg reg = nir_outputs; struct hash_entry *entry; hash_table_foreach(shader->outputs, entry) { nir_variable *var = (nir_variable *) entry->data; - reg.reg_offset = var->data.driver_location; + reg.reg_offset = var->data.driver_location * reg_width; if (var->data.index > 0) { assert(var->data.location == FRAG_RESULT_DATA0); @@ -195,7 +199,8 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) { int output = var->data.location - FRAG_RESULT_DATA0 + i; this->outputs[output] = reg; - this->outputs[output].reg_offset += vector_elements * i; + this->outputs[output].reg_offset += + (vector_elements * i) * reg_width; this->output_components[output] = vector_elements; } } @@ -1035,6 +1040,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) fs_reg fs_visitor::get_nir_src(nir_src src) { + int reg_width = dispatch_width / 8; + if (src.is_ssa) { assert(src.ssa->parent_instr->type == nir_instr_type_load_const); nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); @@ -1057,7 +1064,7 @@ fs_visitor::get_nir_src(nir_src src) * this to F if they need to */ reg.type = BRW_REGISTER_TYPE_D; - reg.reg_offset = src.reg.base_offset; + reg.reg_offset = src.reg.base_offset * reg_width; if (src.reg.indirect) { reg.reladdr = new(mem_ctx) fs_reg(); *reg.reladdr = retype(get_nir_src(*src.reg.indirect), @@ -1111,13 +1118,15 @@ fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src) fs_reg fs_visitor::get_nir_dest(nir_dest dest) { + int reg_width = dispatch_width / 8; + fs_reg reg; if (dest.reg.reg->is_global) reg = nir_globals[dest.reg.reg->index]; else reg = nir_locals[dest.reg.reg->index]; - reg.reg_offset = dest.reg.base_offset; + reg.reg_offset = dest.reg.base_offset * reg_width; if (dest.reg.indirect) { reg.reladdr = new(mem_ctx) fs_reg(); *reg.reladdr = retype(get_nir_src(*dest.reg.indirect), @@ -1130,15 +1139,17 @@ fs_visitor::get_nir_dest(nir_dest dest) void fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask) { + int reg_width = dispatch_width / 8; + for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; fs_inst *new_inst = new(mem_ctx) fs_inst(*inst); - new_inst->dst.reg_offset += i; + new_inst->dst.reg_offset += i * reg_width; for (unsigned j = 0; j < new_inst->sources; j++) if (inst->src[j].file == GRF) - new_inst->src[j].reg_offset += i; + new_inst->src[j].reg_offset += i * reg_width; emit(new_inst); } @@ -1150,15 +1161,17 @@ fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, enum brw_predicate predicate, enum brw_conditional_mod mod) { + int reg_width = dispatch_width / 8; + for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0); - new_inst->dst.reg_offset += i; + new_inst->dst.reg_offset += i * reg_width; for (unsigned j = 0; j < new_inst->sources; j++) if (new_inst->src[j].file == GRF) - new_inst->src[j].reg_offset += i; + new_inst->src[j].reg_offset += i * reg_width; new_inst->predicate = predicate; new_inst->conditional_mod = mod; @@ -1173,15 +1186,17 @@ fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1, enum brw_predicate predicate, enum brw_conditional_mod mod) { + int reg_width = dispatch_width / 8; + for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1); - new_inst->dst.reg_offset += i; + new_inst->dst.reg_offset += i * reg_width; for (unsigned j = 0; j < new_inst->sources; j++) if (new_inst->src[j].file == GRF) - new_inst->src[j].reg_offset += i; + new_inst->src[j].reg_offset += i * reg_width; new_inst->predicate = predicate; new_inst->conditional_mod = mod; @@ -1194,15 +1209,17 @@ void fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0, unsigned wr_mask, bool saturate) { + int reg_width = dispatch_width / 8; + for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; fs_reg new_dest = dest; - new_dest.reg_offset += i; + new_dest.reg_offset += i * reg_width; fs_reg new_src0 = src0; if (src0.file == GRF) - new_src0.reg_offset += i; + new_src0.reg_offset += i * reg_width; fs_inst *new_inst = emit_math(op, new_dest, new_src0); new_inst->saturate = saturate; @@ -1214,18 +1231,20 @@ fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1, unsigned wr_mask, bool saturate) { + int reg_width = dispatch_width / 8; + for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; fs_reg new_dest = dest; - new_dest.reg_offset += i; + new_dest.reg_offset += i * reg_width; fs_reg new_src0 = src0; if (src0.file == GRF) - new_src0.reg_offset += i; + new_src0.reg_offset += i * reg_width; fs_reg new_src1 = src1; if (src1.file == GRF) - new_src1.reg_offset += i; + new_src1.reg_offset += i * reg_width; fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1); new_inst->saturate = saturate; @@ -1236,9 +1255,11 @@ void fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src, unsigned num_components) { + int reg_width = dispatch_width / 8; + fs_reg src0 = src; fs_reg src1 = src; - src1.reg_offset++; + src1.reg_offset += 1 * reg_width; if (num_components == 2) { emit(op, dest, src0, src1); @@ -1250,7 +1271,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src, emit(op, temp1, src0, src1); fs_reg src2 = src; - src2.reg_offset += 2; + src2.reg_offset += 2 * reg_width; if (num_components == 3) { emit(op, dest, temp1, src2); @@ -1260,7 +1281,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src, assert(num_components == 4); fs_reg src3 = src; - src3.reg_offset += 3; + src3.reg_offset += 3 * reg_width; fs_reg temp2 = vgrf(1); temp2.type = src.type; @@ -1271,6 +1292,8 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src, void fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { + int reg_width = dispatch_width / 8; + fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); @@ -1374,7 +1397,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) index++; emit(MOV(dest, src)); - dest.reg_offset++; + dest.reg_offset += 1 * reg_width; } } break; @@ -1435,7 +1458,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) assert(packed_consts.subreg_offset < 32); emit(MOV(dest, packed_consts)); - dest.reg_offset++; + dest.reg_offset += 1 * reg_width; } } break; @@ -1448,14 +1471,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) for (int i = 0; i < instr->const_index[1]; i++) { for (unsigned j = 0; j < instr->num_components; j++) { fs_reg src = nir_inputs; - src.reg_offset = instr->const_index[0] + index; + src.reg_offset = (instr->const_index[0] + index) * reg_width; if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); src.type = dest.type; index++; emit(MOV(dest, src)); - dest.reg_offset++; + dest.reg_offset += 1 * reg_width; } } break; @@ -1587,13 +1610,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) for (int i = 0; i < instr->const_index[1]; i++) { for (unsigned j = 0; j < instr->num_components; j++) { fs_reg new_dest = nir_outputs; - new_dest.reg_offset = instr->const_index[0] + index; + new_dest.reg_offset = (instr->const_index[0] + index) * reg_width; if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); new_dest.type = src.type; index++; emit(MOV(new_dest, src)); - src.reg_offset++; + src.reg_offset += 1 * reg_width; } } break; -- 2.2.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev