On Fri, Nov 3, 2017 at 8:24 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > This adds support for a hw atomic counters to TGSI. > > A new register file for storing atomic counters is added, > along with a new atomic counter semantic, along with docs > for both. > > v2: drop semantic, move hw counter to backend, > Ilia pointed out SSO would have busted my plan, and he > was right. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/gallium/auxiliary/tgsi/tgsi_strings.c | 1 + > src/gallium/auxiliary/tgsi/tgsi_ureg.c | 79 > ++++++++++++++++++++++++++++++ > src/gallium/auxiliary/tgsi/tgsi_ureg.h | 7 +++ > src/gallium/docs/source/tgsi.rst | 37 ++++++++++++-- > src/gallium/include/pipe/p_shader_tokens.h | 1 + > src/gallium/include/pipe/p_state.h | 1 + > 6 files changed, 122 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c > b/src/gallium/auxiliary/tgsi/tgsi_strings.c > index 0872db9..4f28b49 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c > @@ -58,6 +58,7 @@ static const char *tgsi_file_names[] = > "BUFFER", > "MEMORY", > "CONSTBUF", > + "HWATOMIC", > }; > > const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = > diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c > b/src/gallium/auxiliary/tgsi/tgsi_ureg.c > index b26434c..7e88f9b 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c > @@ -80,6 +80,7 @@ struct ureg_tokens { > #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS > #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) > #define UREG_MAX_CONSTANT_RANGE 32 > +#define UREG_MAX_HW_ATOMIC_RANGE 32 > #define UREG_MAX_IMMEDIATE 4096 > #define UREG_MAX_ADDR 3 > #define UREG_MAX_ARRAY_TEMPS 256 > @@ -92,6 +93,15 @@ struct const_decl { > unsigned nr_constant_ranges; > }; > > +struct hw_atomic_decl { > + struct { > + unsigned first; > + unsigned last; > + unsigned array_id; > + } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE]; > + unsigned nr_hw_atomic_ranges; > +}; > + > #define DOMAIN_DECL 0 > #define DOMAIN_INSN 1 > > @@ -182,6 +192,8 @@ struct ureg_program > > struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; > > + struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS]; > + > unsigned properties[TGSI_PROPERTY_COUNT]; > > unsigned nr_addrs; > @@ -583,6 +595,28 @@ out: > return ureg_src_dimension(src, 0); > } > > + > +/* Returns a new hw atomic register. Keep track of which have been > + * referred to so that we can emit decls later. > + */ > +void > +ureg_DECL_hw_atomic(struct ureg_program *ureg, > + unsigned first, > + unsigned last, > + unsigned buffer_id, > + unsigned array_id) > +{ > + struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id]; > + > + if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) { > + uint i = decl->nr_hw_atomic_ranges++; > + > + decl->hw_atomic_range[i].first = first; > + decl->hw_atomic_range[i].last = last; > + decl->hw_atomic_range[i].array_id = array_id; > + } > +} > + > static struct ureg_dst alloc_temporary( struct ureg_program *ureg, > boolean local ) > { > @@ -1501,6 +1535,35 @@ emit_decl_semantic(struct ureg_program *ureg, > } > } > > +static void > +emit_decl_atomic_2d(struct ureg_program *ureg, > + unsigned first, > + unsigned last, > + unsigned index2D, > + unsigned array_id) > +{ > + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : > 3); > + > + out[0].value = 0; > + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; > + out[0].decl.NrTokens = 3; > + out[0].decl.File = TGSI_FILE_HW_ATOMIC; > + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; > + out[0].decl.Dimension = 1; > + out[0].decl.Array = array_id != 0; > + > + out[1].value = 0; > + out[1].decl_range.First = first; > + out[1].decl_range.Last = last; > + > + out[2].value = 0; > + out[2].decl_dim.Index2D = index2D; > + > + if (array_id) { > + out[3].value = 0; > + out[3].array.ArrayID = array_id; > + } > +} > > static void > emit_decl_fs(struct ureg_program *ureg, > @@ -1908,6 +1971,22 @@ static void emit_decls( struct ureg_program *ureg ) > } > } > > + for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) { > + struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i]; > + > + if (decl->nr_hw_atomic_ranges) { > + uint j; > + > + for (j = 0; j < decl->nr_hw_atomic_ranges; j++) { > + emit_decl_atomic_2d(ureg, > + decl->hw_atomic_range[j].first, > + decl->hw_atomic_range[j].last, > + i, > + decl->hw_atomic_range[j].array_id); > + } > + } > + } > + > if (ureg->nr_temps) { > unsigned array = 0; > for (i = 0; i < ureg->nr_temps;) { > diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h > b/src/gallium/auxiliary/tgsi/tgsi_ureg.h > index e88c2c1..96aef25 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h > @@ -316,6 +316,13 @@ struct ureg_src > ureg_DECL_constant( struct ureg_program *, > unsigned index ); > > +void > +ureg_DECL_hw_atomic(struct ureg_program *ureg, > + unsigned first, > + unsigned last, > + unsigned buffer_id, > + unsigned array_id); > + > struct ureg_dst > ureg_DECL_temporary( struct ureg_program * ); > > diff --git a/src/gallium/docs/source/tgsi.rst > b/src/gallium/docs/source/tgsi.rst > index 1a51fe9..9aace1a 100644 > --- a/src/gallium/docs/source/tgsi.rst > +++ b/src/gallium/docs/source/tgsi.rst > @@ -2638,9 +2638,11 @@ logical operations. In this context atomicity means > that another > concurrent memory access operation that affects the same memory > location is guaranteed to be performed strictly before or after the > entire execution of the atomic operation. The resource may be a BUFFER, > -IMAGE, or MEMORY. In the case of an image, the offset works the same as for > -``LOAD`` and ``STORE``, specified above. These atomic operations may > -only be used with 32-bit integer image formats. > +IMAGE, ATOMIC, or MEMORY. In the case of an image, the offset works > +the same as for ``LOAD`` and ``STORE``, specified above. For atomic > +counters, the offset is an immediate index to the base hw atomic > +counter for this operation. > +These atomic operations may only be used with 32-bit integer image formats. > > .. opcode:: ATOMUADD - Atomic integer addition > > @@ -3440,7 +3442,6 @@ TGSI_SEMANTIC_SUBGROUP_LT_MASK > A bit mask of ``bit index < TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e. > ``(1 << subgroup_invocation) - 1`` in arbitrary precision arithmetic. > > - > Declaration Interpolate > ^^^^^^^^^^^^^^^^^^^^^^^ > > @@ -3517,6 +3518,34 @@ accessing a misaligned address is undefined. > Usage of the STORE opcode is only allowed if the WR (writable) flag > is set. > > +Hardware Atomic Register File > +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > + > +Hardware atomics are declared as a 2D array with an optional array id. > + > +The first member of the dimension is the buffer resource the atomic > +is located in. > +The second member is a range into the buffer resource, either for > +one or multiple counters. If this is an array, the declaration will have > +an unique array id. > + > +Each counter is 4 bytes in size, and index and ranges are in counters not > bytes. > +DCL BUFFER[0], ATOMIC > +DCL ATOMIC[0][0] > +DCL ATOMIC[0][1] > + > +This declares two atomics, one at the start of the buffer and one in the > +second 4 bytes. > + > +DCL BUFFER[0], ATOMIC > +DCL BUFFER[1], ATOMIC
Why are there BUFFER declarations? Marek > +DCL ATOMIC[0][0] > +DCL ATOMIC[1][0] > +DCL ATOMIC[1][1..3], ARRAY(1) > + > +This declares 5 atomics, one in buffer 0 at 0, > +one in buffer 1 at 0, and an array of 3 atomics in > +the buffer 1, starting at 1. > > Properties > ^^^^^^^^^^^^^^^^^^^^^^^^ > diff --git a/src/gallium/include/pipe/p_shader_tokens.h > b/src/gallium/include/pipe/p_shader_tokens.h > index 97deef7..4e95789 100644 > --- a/src/gallium/include/pipe/p_shader_tokens.h > +++ b/src/gallium/include/pipe/p_shader_tokens.h > @@ -75,6 +75,7 @@ enum tgsi_file_type { > TGSI_FILE_BUFFER, > TGSI_FILE_MEMORY, > TGSI_FILE_CONSTBUF, > + TGSI_FILE_HW_ATOMIC, > TGSI_FILE_COUNT, /**< how many TGSI_FILE_ types */ > }; > > diff --git a/src/gallium/include/pipe/p_state.h > b/src/gallium/include/pipe/p_state.h > index 90dc561..10d21db 100644 > --- a/src/gallium/include/pipe/p_state.h > +++ b/src/gallium/include/pipe/p_state.h > @@ -75,6 +75,7 @@ extern "C" { > #define PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT 2 > #define PIPE_MAX_WINDOW_RECTANGLES 8 > > +#define PIPE_MAX_HW_ATOMIC_BUFFERS 32 > > struct pipe_reference > { > -- > 2.9.5 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev