On 03/26/2014 02:23 PM, Eric Anholt wrote: > Basically, replace the output_components[] array with per-channel tracking > of the register storing that channel, or a BAD_FILE undefined reg. > > Right now var->data.location_frac is always 0, but I'm going to use that > in vector_splitting next. > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- > src/mesa/drivers/dri/i965/brw_fs.h | 5 +-- > src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 18 +++++---- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55 > ++++++++++++++-------------- > 4 files changed, 40 insertions(+), 40 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 0d24f59..eee0c8a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1732,7 +1732,7 @@ fs_visitor::compact_virtual_grfs() > { &pixel_y, 1 }, > { &pixel_w, 1 }, > { &wpos_w, 1 }, > - { &dual_src_output, 1 }, > + { dual_src_output, ARRAY_SIZE(dual_src_output) }, > { outputs, ARRAY_SIZE(outputs) }, > { delta_x, ARRAY_SIZE(delta_x) }, > { delta_y, ARRAY_SIZE(delta_y) }, > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index f410733..d47bc28 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -526,9 +526,8 @@ public: > struct hash_table *variable_ht; > fs_reg frag_depth; > fs_reg sample_mask; > - fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; > - unsigned output_components[BRW_MAX_DRAW_BUFFERS]; > - fs_reg dual_src_output; > + fs_reg outputs[BRW_MAX_DRAW_BUFFERS * 4]; > + fs_reg dual_src_output[4]; > bool do_dual_src; > int first_non_payload_grf; > /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ > diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp > b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp > index 49eaf05..19483e3 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp > @@ -646,25 +646,27 @@ fs_visitor::get_fp_dst_reg(const prog_dst_register *dst) > return frag_depth; > } else if (dst->Index == FRAG_RESULT_COLOR) { > if (outputs[0].file == BAD_FILE) { > - outputs[0] = fs_reg(this, glsl_type::vec4_type); > - output_components[0] = 4; > + fs_reg reg = fs_reg(this, glsl_type::vec4_type); > > /* Tell emit_fb_writes() to smear fragment.color across all the > * color attachments. > */ > for (int i = 1; i < c->key.nr_color_regions; i++) { > - outputs[i] = outputs[0]; > - output_components[i] = output_components[0]; > + for (int j = 0; j < 4; j++) { > + outputs[i * 4 + j] = offset(reg, j); > + } > } > } > return outputs[0];
This sure doesn't look right. We check if outputs[0].file == BAD_FILE, and then proceed to initialize outputs[4], outputs[5], etc...but never outputs[0] through outputs[3]. Then we return outputs[0], giving them a bogus register... Did you mean to change the loop condition to 'int i = 0'? > } else { I'm pretty the above bug means this else case will never happen... The GLSL code looks mostly reasonable. > int output_index = dst->Index - FRAG_RESULT_DATA0; > - if (outputs[output_index].file == BAD_FILE) { > - outputs[output_index] = fs_reg(this, glsl_type::vec4_type); > + if (outputs[output_index * 4].file == BAD_FILE) { > + fs_reg reg = fs_reg(this, glsl_type::vec4_type); > + for (int i = 0; i < 4; i++) { > + outputs[output_index * 4 + i] = offset(reg, i); > + } > } > - output_components[output_index] = 4; > - return outputs[output_index]; > + return outputs[output_index * 4]; > } > > case PROGRAM_UNDEFINED: > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 047ec21..d9bb4de 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -70,17 +70,25 @@ fs_visitor::visit(ir_variable *ir) > } else if (ir->data.mode == ir_var_shader_out) { > reg = new(this->mem_ctx) fs_reg(this, ir->type); > > + int vector_elements = > + ir->type->is_array() ? ir->type->fields.array->vector_elements > + : ir->type->vector_elements; > + > if (ir->data.index > 0) { > - assert(ir->data.location == FRAG_RESULT_DATA0); > - assert(ir->data.index == 1); > - this->dual_src_output = *reg; > + assert(ir->data.location == FRAG_RESULT_DATA0); > + assert(ir->data.index == 1); > + for (unsigned i = 0; i < vector_elements; i++) { > + this->dual_src_output[i + ir->data.location_frac] = offset(*reg, > i); > + } > this->do_dual_src = true; > } else if (ir->data.location == FRAG_RESULT_COLOR) { > /* Writing gl_FragColor outputs to all color regions. */ > - for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { > - this->outputs[i] = *reg; > - this->output_components[i] = 4; > - } > + for (unsigned i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { > + for (int j = 0; j < vector_elements; j++) { > + this->outputs[i * 4 + j + ir->data.location_frac] = > offset(*reg, > + j); > + } > + } > } else if (ir->data.location == FRAG_RESULT_DEPTH) { > this->frag_depth = *reg; > } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) { > @@ -90,16 +98,14 @@ fs_visitor::visit(ir_variable *ir) > assert(ir->data.location >= FRAG_RESULT_DATA0 && > ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); > > - int vector_elements = > - ir->type->is_array() ? ir->type->fields.array->vector_elements > - : ir->type->vector_elements; > - > /* General color output. */ > for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { > int output = ir->data.location - FRAG_RESULT_DATA0 + i; > - this->outputs[output] = *reg; > - this->outputs[output].reg_offset += vector_elements * i; > - this->output_components[output] = vector_elements; > + > + for (int j = 0; j < vector_elements; j++) { > + this->outputs[4 * output + j + ir->data.location_frac] = > + offset(*reg, vector_elements * i + j); > + } > } > } > } else if (ir->data.mode == ir_var_uniform) { > @@ -2591,15 +2597,13 @@ fs_visitor::emit_color_write(int target, int index, > int first_color_mrf) > { > int reg_width = dispatch_width / 8; > fs_inst *inst; > - fs_reg color = outputs[target]; > + fs_reg color = outputs[target * 4 + index]; > fs_reg mrf; > > /* If there's no color data to be written, skip it. */ > if (color.file == BAD_FILE) > return; > > - color.reg_offset += index; > - > if (dispatch_width == 8 || brw->gen >= 6) { > /* SIMD8 write looks like: > * m + 0: r0 > @@ -2700,8 +2704,7 @@ fs_visitor::emit_alpha_test() > BRW_CONDITIONAL_NEQ)); > } else { > /* RT0 alpha */ > - fs_reg color = outputs[0]; > - color.reg_offset += 3; > + fs_reg color = outputs[3]; > > /* f0.1 &= func(color, ref) */ > cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref), > @@ -2806,23 +2809,20 @@ fs_visitor::emit_fb_writes() > } > > if (do_dual_src) { > - fs_reg src0 = this->outputs[0]; > - fs_reg src1 = this->dual_src_output; > - > this->current_annotation = ralloc_asprintf(this->mem_ctx, > "FB write src0"); > for (int i = 0; i < 4; i++) { > + fs_reg src0 = this->outputs[i]; Do we need to check for BAD_FILE here? Say, dual source blending on vec3 outputs (rather than vec4)? (Or, just use emit_color_write?) > fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0)); > - src0.reg_offset++; > inst->saturate = c->key.clamp_fragment_color; > } > > this->current_annotation = ralloc_asprintf(this->mem_ctx, > "FB write src1"); > for (int i = 0; i < 4; i++) { > + fs_reg src1 = this->dual_src_output[i]; > fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type), > src1)); > - src1.reg_offset++; > inst->saturate = c->key.clamp_fragment_color; > } > > @@ -2855,8 +2855,7 @@ fs_visitor::emit_fb_writes() > int write_color_mrf = color_mrf; > if (src0_alpha_to_render_target && target != 0) { > fs_inst *inst; > - fs_reg color = outputs[0]; > - color.reg_offset += 3; > + fs_reg color = outputs[3]; > > inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type), > color)); > @@ -2864,7 +2863,7 @@ fs_visitor::emit_fb_writes() > write_color_mrf = color_mrf + reg_width; > } > > - for (unsigned i = 0; i < this->output_components[target]; i++) > + for (unsigned i = 0; i < 4; i++) > emit_color_write(target, i, write_color_mrf); > > bool eot = false; > @@ -2957,7 +2956,7 @@ fs_visitor::fs_visitor(struct brw_context *brw, > hash_table_pointer_compare); > > memset(this->outputs, 0, sizeof(this->outputs)); > - memset(this->output_components, 0, sizeof(this->output_components)); > + memset(this->dual_src_output, 0, sizeof(this->dual_src_output)); > this->first_non_payload_grf = 0; > this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; > >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev