Paul Berry <stereotype...@gmail.com> writes: > On 15 September 2013 00:19, Francisco Jerez <curroje...@riseup.net> wrote: > >>[...] >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> index 762832a..412d27a 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> @@ -2112,8 +2112,91 @@ fs_visitor::visit(ir_end_primitive *) >> } >> >> void >> +fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, >> + unsigned offset, fs_reg dst, fs_reg src0, >> + fs_reg src1) >> +{ >> + const unsigned operand_len = dispatch_width / 8; >> + unsigned mlen = 0; >> + >> + /* Initialize the sample mask in the message header. */ >> + emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0))) >> + ->force_writemask_all = true; >> + emit(MOV(brw_uvec_mrf(1, mlen, 7), >> + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) >> + ->force_writemask_all = true; >> > > For fragment shaders that don't use discard (fp->UsesKill == false) this > will do the right thing. > > For fragment shaders that do use discard, we store the current pixel mask > in the f0.1 register, so you'll need to do something like this: > > emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1))->force_writemask_all > = true; > > Otherwise pixels that have been discarded will erroneously get the atomic > operation applied to them. >
Ugh. Thanks for pointing this out, it should be fixed now. :) > Note: if all the pixels within a 2x2 subspan get discarded, we disable that > subspan in the execution mask. So in order to test this effectively, > you'll probably need to write a piglit test that discards only some of the > pixels within a subspan, and not others. > > >> + mlen++; >> + >> + /* Set the atomic operation offset. */ >> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), brw_imm_ud(offset))); >> + mlen += operand_len; >> + >> + /* Set the atomic operation arguments. */ >> + if (src0.file != BAD_FILE) { >> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0)); >> + mlen += operand_len; >> + } >> + >> + if (src1.file != BAD_FILE) { >> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1)); >> + mlen += operand_len; >> + } >> > > src0 is address and src1 is write data, right? It would be nice to have > that in a comment so that readers don't have to cross reference to the > bspec. > Nope, both are arguments as the comment says, the address is set by the MRF write right before those. > >> + >> + /* Emit the instruction. */ >> + fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, >> + fs_reg(atomic_op), fs_reg(surf_index)); >> + inst.base_mrf = 0; >> + inst.mlen = mlen; >> + emit(inst); >> +} >> + >> +void >> +fs_visitor::emit_untyped_surface_read(unsigned surf_index, unsigned >> offset, >> + fs_reg dst) >> +{ >> + const unsigned operand_len = dispatch_width / 8; >> + unsigned mlen = 0; >> + >> + /* Initialize the sample mask in the message header. */ >> + emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0))) >> + ->force_writemask_all = true; >> + emit(MOV(brw_uvec_mrf(1, mlen, 7), >> + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) >> + ->force_writemask_all = true; >> > > Same comment about discard applies here, although it's less critical > because this is a read operation (I suspect the only effect will be a tiny > performance penalty). > Fixed, thanks. > >> + mlen++; >> + >> + /* Set the surface read offset. */ >> + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), brw_imm_ud(offset))); >> + mlen += operand_len; >> + >> + /* Emit the instruction. */ >> + fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, >> fs_reg(surf_index)); >> + inst.base_mrf = 0; >> + inst.mlen = mlen; >> + emit(inst); >> +} >> + >> +void >> fs_visitor::visit(ir_atomic *ir) >> { >> + ir_variable *loc = ir->location->variable_referenced(); >> + unsigned surf_index = SURF_INDEX_WM_ABO(loc->atomic.buffer_index); >> + >> + result = fs_reg(this, ir->type); >> + >> + switch (ir->op) { >> + case ir_atomic_read: >> + emit_untyped_surface_read(surf_index, loc->atomic.offset, result); >> + break; >> + case ir_atomic_inc: >> + emit_untyped_atomic(BRW_AOP_INC, surf_index, loc->atomic.offset, >> + result, fs_reg(), fs_reg()); >> + break; >> + case ir_atomic_dec: >> + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, loc->atomic.offset, >> + result, fs_reg(), fs_reg()); >> > > These calls to fs_reg() don't look right to me. Don't we need to pass the > address and the increment/decrement amount to emit_untyped_atomic()? > The address *is* passed through the 'loc->atomic.offset' argument. Both registers are empty because the increment/decrement ops take no arguments, the increment amount is always +/-1. Thanks.
pgpKOrA2Ss71O.pgp
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev