radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); >> + radeon_emit(cs, reloc); >> + } >> + ++rctx->append_fence_id; >> + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, >> + >> r600_resource(rctx->append_fence), >> + RADEON_USAGE_READWRITE, >> + RADEON_PRIO_SHADER_RW_BUFFER); >> + dst_offset = r600_resource(rctx->append_fence)->gpu_address; >> + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); >> + radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); >> + radeon_emit(cs, dst_offset & 0xffffffff); >> + radeon_emit(cs, (2 << 29) | ((dst_offset >> 32) & 0xff)); >> + radeon_emit(cs, rctx->append_fence_id); >> + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); >> + radeon_emit(cs, reloc); >> + >> + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0) | pkt_flags); >> + radeon_emit(cs, WAIT_REG_MEM_GEQUAL | WAIT_REG_MEM_MEMORY | (1 << >> 8)); >> + radeon_emit(cs, dst_offset & 0xffffffff); >> + radeon_emit(cs, ((dst_offset >> 32) & 0xff)); >> + radeon_emit(cs, rctx->append_fence_id); >> + radeon_emit(cs, 0xffffffff); >> + radeon_emit(cs, 0xa); >> + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); >> + radeon_emit(cs, reloc); > > > This is pretty pessimistic, did you benchmark it?
I actually don't know of any app that uses atomic counters, do we have one? Until I do I'd rather stay correct and simple, and if someone finds a workload then I can care more. > > There's an ongoing discussion in the OpenGL WG about what exactly the > semantics for ARB_shader_atomic_counters should be, because the original > extension language and the language after inclusion into core seem a bit > contradictory. > > Certainly, one of the CTS tests for ARB_shader_atomic_counters inserts > glMemoryBarrier() calls between back-to-back draws involving counters... > > So it seems to me you should probably be able to get away with waiting for > the append fence ID in pipe_context::memory_barrier and/or when the atomic > counter buffers are unbound. > > The other thing is that the wait may have to be in PFP (or rather, the PFP > may have to be synced with the ME) in order to get index buffer and indirect > draw loads correct. > > Bottom line is, nobody seems to have good and definitive tests for the > various read-after-write combinations that are possible with atomic > counters. > > I guess it's fine for r600 to land this as-is, but I do want to implement > GDS atomics in radeonsi as well, and then we may want to revisit :) > Yeah I've sort of followed it, I'm not sure the fglrx driver even got stuff right, it fails a few of the piglit tests. For radeonsi hopefully this code won't affect it too much. Dave. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev