On 29 October 2013 16:37, Francisco Jerez <curroje...@riseup.net> wrote:
> This can deal with all the 15 32-bit untyped atomic operations the > hardware supports, but only INC and PREDEC are going to be exposed > through the API for now. > > v2: Represent atomics as GLSL intrinsics. Add support for variably > indexed atomic counter arrays. Fix interaction with fragment > discard. > --- > src/mesa/drivers/dri/i965/brw_fs.h | 9 ++ > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 131 > ++++++++++++++++++++++++++- > 2 files changed, 138 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 5b78313..081f8a3 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -391,6 +391,13 @@ public: > void emit_shader_time_write(enum shader_time_shader_type type, > fs_reg value); > > + void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, > + fs_reg dst, fs_reg offset, fs_reg src0, > + fs_reg src1); > + > + void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, > + fs_reg offset); > + > bool try_rewrite_rhs_to_dst(ir_assignment *ir, > fs_reg dst, > fs_reg src, > @@ -410,6 +417,8 @@ public: > > void dump_instruction(backend_instruction *inst); > > + void visit_atomic_counter_intrinsic(ir_call *ir); > + > struct gl_fragment_program *fp; > struct brw_wm_compile *c; > unsigned int sanity_param_count; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 71b4bf9..b6361d5 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -106,7 +106,7 @@ fs_visitor::visit(ir_variable *ir) > * ir_binop_ubo_load expressions and not ir_dereference_variable > for UBO > * variables, so no need for them to be in variable_ht. > */ > - if (ir->is_in_uniform_block()) > + if (ir->is_in_uniform_block() || ir->type->contains_atomic()) > The comment above this "if" statement should be updated to explain why it's ok to return when we encounter a type that contains an atomic. With that change, the patch is: Reviewed-by: Paul Berry <stereotype...@gmail.com> > return; > > if (dispatch_width == 16) { > @@ -2187,9 +2187,58 @@ fs_visitor::visit(ir_loop_jump *ir) > } > > void > +fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir) > +{ > + ir_dereference *deref = static_cast<ir_dereference *>( > + ir->actual_parameters.get_head()); > + ir_variable *location = deref->variable_referenced(); > + unsigned surf_index = (c->prog_data.base.binding_table.abo_start + > + location->atomic.buffer_index); > + > + /* Calculate the surface offset */ > + fs_reg offset(this, glsl_type::uint_type); > + ir_dereference_array *deref_array = deref->as_dereference_array(); > + > + if (deref_array) { > + deref_array->array_index->accept(this); > + > + fs_reg tmp(this, glsl_type::uint_type); > + emit(MUL(tmp, this->result, ATOMIC_COUNTER_SIZE)); > + emit(ADD(offset, tmp, location->atomic.offset)); > + } else { > + offset = location->atomic.offset; > + } > + > + /* Emit the appropriate machine instruction */ > + const char *callee = ir->callee->function_name(); > + ir->return_deref->accept(this); > + fs_reg dst = this->result; > + > + if (!strcmp("__intrinsic_atomic_read", callee)) { > + emit_untyped_surface_read(surf_index, dst, offset); > + > + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { > + emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, > + fs_reg(), fs_reg()); > + > + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { > + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, > + fs_reg(), fs_reg()); > + } > +} > + > +void > fs_visitor::visit(ir_call *ir) > { > - assert(!"FINISHME"); > + const char *callee = ir->callee->function_name(); > + > + if (!strcmp("__intrinsic_atomic_read", callee) || > + !strcmp("__intrinsic_atomic_increment", callee) || > + !strcmp("__intrinsic_atomic_predecrement", callee)) { > + visit_atomic_counter_intrinsic(ir); > + } else { > + assert(!"Unsupported intrinsic."); > + } > } > > void > @@ -2240,6 +2289,84 @@ fs_visitor::visit(ir_end_primitive *) > assert(!"not reached"); > } > > +void > +fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, > + fs_reg dst, fs_reg offset, fs_reg src0, > + fs_reg src1) > +{ > + const unsigned operand_len = dispatch_width / 8; > + unsigned mlen = 0; > + > + /* Initialize the sample mask in the message header. */ > + emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0))) > + ->force_writemask_all = true; > + > + if (fp->UsesKill) { > + emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1))) > + ->force_writemask_all = true; > + } else { > + emit(MOV(brw_uvec_mrf(1, mlen, 7), > + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) > + ->force_writemask_all = true; > + } > + > + mlen++; > + > + /* Set the atomic operation offset. */ > + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset)); > + mlen += operand_len; > + > + /* Set the atomic operation arguments. */ > + if (src0.file != BAD_FILE) { > + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0)); > + mlen += operand_len; > + } > + > + if (src1.file != BAD_FILE) { > + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1)); > + mlen += operand_len; > + } > + > + /* Emit the instruction. */ > + fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, atomic_op, surf_index); > + inst.base_mrf = 0; > + inst.mlen = mlen; > + emit(inst); > +} > + > +void > +fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, > + fs_reg offset) > +{ > + const unsigned operand_len = dispatch_width / 8; > + unsigned mlen = 0; > + > + /* Initialize the sample mask in the message header. */ > + emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0))) > + ->force_writemask_all = true; > + > + if (fp->UsesKill) { > + emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1))) > + ->force_writemask_all = true; > + } else { > + emit(MOV(brw_uvec_mrf(1, mlen, 7), > + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) > + ->force_writemask_all = true; > + } > + > + mlen++; > + > + /* Set the surface read offset. */ > + emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset)); > + mlen += operand_len; > + > + /* Emit the instruction. */ > + fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index); > + inst.base_mrf = 0; > + inst.mlen = mlen; > + emit(inst); > +} > + > fs_inst * > fs_visitor::emit(fs_inst inst) > { > -- > 1.8.3.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev