v2: Fix fragment program fragment.color output, fix smaller-than-vec4 dual src output codegen, use offset() a bit more. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.h | 5 +-- src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 18 ++++---- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 64 +++++++++++++++------------- 4 files changed, 46 insertions(+), 43 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 85a5463..972d4a8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1732,7 +1732,7 @@ fs_visitor::compact_virtual_grfs() { &pixel_y, 1 }, { &pixel_w, 1 }, { &wpos_w, 1 }, - { &dual_src_output, 1 }, + { dual_src_output, ARRAY_SIZE(dual_src_output) }, { outputs, ARRAY_SIZE(outputs) }, { delta_x, ARRAY_SIZE(delta_x) }, { delta_y, ARRAY_SIZE(delta_y) }, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 3d21ee5..d6dfde4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -526,9 +526,8 @@ public: struct hash_table *variable_ht; fs_reg frag_depth; fs_reg sample_mask; - fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; - unsigned output_components[BRW_MAX_DRAW_BUFFERS]; - fs_reg dual_src_output; + fs_reg outputs[BRW_MAX_DRAW_BUFFERS * 4]; + fs_reg dual_src_output[4]; bool do_dual_src; int first_non_payload_grf; /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp index 49eaf05..c6f063e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -646,25 +646,25 @@ fs_visitor::get_fp_dst_reg(const prog_dst_register *dst) return frag_depth; } else if (dst->Index == FRAG_RESULT_COLOR) { if (outputs[0].file == BAD_FILE) { - outputs[0] = fs_reg(this, glsl_type::vec4_type); - output_components[0] = 4; + fs_reg reg = fs_reg(this, glsl_type::vec4_type); /* Tell emit_fb_writes() to smear fragment.color across all the * color attachments. */ - for (int i = 1; i < c->key.nr_color_regions; i++) { - outputs[i] = outputs[0]; - output_components[i] = output_components[0]; + for (int i = 0; i < c->key.nr_color_regions; i++) { + for (int j = 0; j < 4; j++) + outputs[i * 4 + j] = offset(reg, j); } } return outputs[0]; } else { int output_index = dst->Index - FRAG_RESULT_DATA0; - if (outputs[output_index].file == BAD_FILE) { - outputs[output_index] = fs_reg(this, glsl_type::vec4_type); + if (outputs[output_index * 4].file == BAD_FILE) { + fs_reg reg = fs_reg(this, glsl_type::vec4_type); + for (int i = 0; i < 4; i++) + outputs[output_index * 4 + i] = offset(reg, i); } - output_components[output_index] = 4; - return outputs[output_index]; + return outputs[output_index * 4]; } case PROGRAM_UNDEFINED: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 63a0ae5..a017d55 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -70,17 +70,25 @@ fs_visitor::visit(ir_variable *ir) } else if (ir->data.mode == ir_var_shader_out) { reg = new(this->mem_ctx) fs_reg(this, ir->type); + int vector_elements = + ir->type->is_array() ? ir->type->fields.array->vector_elements + : ir->type->vector_elements; + if (ir->data.index > 0) { - assert(ir->data.location == FRAG_RESULT_DATA0); - assert(ir->data.index == 1); - this->dual_src_output = *reg; + assert(ir->data.location == FRAG_RESULT_DATA0); + assert(ir->data.index == 1); + for (unsigned i = 0; i < vector_elements; i++) + this->dual_src_output[i + ir->data.location_frac] = offset(*reg, i); this->do_dual_src = true; } else if (ir->data.location == FRAG_RESULT_COLOR) { + fs_reg chan = *reg; /* Writing gl_FragColor outputs to all color regions. */ - for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { - this->outputs[i] = *reg; - this->output_components[i] = 4; - } + for (int j = 0; j < vector_elements; j++) { + for (unsigned i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { + this->outputs[i * 4 + j + ir->data.location_frac] = chan; + } + chan.reg_offset++; + } } else if (ir->data.location == FRAG_RESULT_DEPTH) { this->frag_depth = *reg; } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) { @@ -90,16 +98,16 @@ fs_visitor::visit(ir_variable *ir) assert(ir->data.location >= FRAG_RESULT_DATA0 && ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); - int vector_elements = - ir->type->is_array() ? ir->type->fields.array->vector_elements - : ir->type->vector_elements; - /* General color output. */ for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { int output = ir->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = *reg; - this->outputs[output].reg_offset += vector_elements * i; - this->output_components[output] = vector_elements; + fs_reg out = *reg; + out.reg_offset += vector_elements * i; + + for (int j = 0; j < vector_elements; j++) { + this->outputs[4 * output + j + ir->data.location_frac] = out; + out.reg_offset++; + } } } } else if (ir->data.mode == ir_var_uniform) { @@ -2600,15 +2608,13 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf) { int reg_width = dispatch_width / 8; fs_inst *inst; - fs_reg color = outputs[target]; + fs_reg color = outputs[target * 4 + index]; fs_reg mrf; /* If there's no color data to be written, skip it. */ if (color.file == BAD_FILE) return; - color.reg_offset += index; - if (dispatch_width == 8 || brw->gen >= 6) { /* SIMD8 write looks like: * m + 0: r0 @@ -2709,8 +2715,7 @@ fs_visitor::emit_alpha_test() BRW_CONDITIONAL_NEQ)); } else { /* RT0 alpha */ - fs_reg color = outputs[0]; - color.reg_offset += 3; + fs_reg color = outputs[3]; /* f0.1 &= func(color, ref) */ cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref), @@ -2815,23 +2820,23 @@ fs_visitor::emit_fb_writes() } if (do_dual_src) { - fs_reg src0 = this->outputs[0]; - fs_reg src1 = this->dual_src_output; - this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB write src0"); for (int i = 0; i < 4; i++) { - fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0)); - src0.reg_offset++; - inst->saturate = c->key.clamp_fragment_color; + fs_reg src0 = this->outputs[0 * 4 + i]; + if (src0.file != BAD_FILE) { + fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), + src0)); + inst->saturate = c->key.clamp_fragment_color; + } } this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB write src1"); for (int i = 0; i < 4; i++) { + fs_reg src1 = this->dual_src_output[i]; fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type), src1)); - src1.reg_offset++; inst->saturate = c->key.clamp_fragment_color; } @@ -2864,8 +2869,7 @@ fs_visitor::emit_fb_writes() int write_color_mrf = color_mrf; if (src0_alpha_to_render_target && target != 0) { fs_inst *inst; - fs_reg color = outputs[0]; - color.reg_offset += 3; + fs_reg color = outputs[3]; inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type), color)); @@ -2873,7 +2877,7 @@ fs_visitor::emit_fb_writes() write_color_mrf = color_mrf + reg_width; } - for (unsigned i = 0; i < this->output_components[target]; i++) + for (unsigned i = 0; i < 4; i++) emit_color_write(target, i, write_color_mrf); bool eot = false; @@ -2966,7 +2970,7 @@ fs_visitor::fs_visitor(struct brw_context *brw, hash_table_pointer_compare); memset(this->outputs, 0, sizeof(this->outputs)); - memset(this->output_components, 0, sizeof(this->output_components)); + memset(this->dual_src_output, 0, sizeof(this->dual_src_output)); this->first_non_payload_grf = 0; this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev