On Tue, Apr 4, 2017 at 10:41 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Ilia Mirkin <imir...@alum.mit.edu> > > v2 (Nicolai): > - BALLOT isn't per-channel > - expand the documentation (also for VOTE_*) > > v3: > - only BALLOT returns a 64-bit lanemask (Boyan) > - relax the requirement on READ_INVOC: the invocation number to read > from must be uniform within a sub-group. This matches the > GL_ARB_shader_ballot spect (and the v_readlane instruction of AMD > GCN) > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> > --- > src/gallium/auxiliary/tgsi/tgsi_info.c | 6 +-- > src/gallium/docs/source/tgsi.rst | 68 > +++++++++++++++++++++++++----- > src/gallium/include/pipe/p_shader_tokens.h | 6 +-- > 3 files changed, 63 insertions(+), 17 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > b/src/gallium/auxiliary/tgsi/tgsi_info.c > index 5a6a9bc..30bad6d 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > @@ -106,51 +106,51 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, > { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, > { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, > { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, > { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ > + { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC }, > { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, > { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, > { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, > { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, > { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, > { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, > { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, > { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, > { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, > { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, > { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, > { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS }, > { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, > { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, > { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, > { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ > { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, > { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, > diff --git a/src/gallium/docs/source/tgsi.rst > b/src/gallium/docs/source/tgsi.rst > index 05b06ce..4c68062 100644 > --- a/src/gallium/docs/source/tgsi.rst > +++ b/src/gallium/docs/source/tgsi.rst > @@ -2852,36 +2852,82 @@ only be used with 32-bit integer image formats. > > The following operation is performed atomically: > > .. math:: > > dst_x = resource[offset] > > resource[offset] = (dst_x > src_x ? dst_x : src_x) > > > -.. _voteopcodes: > +.. _interlaneopcodes: > + > +Inter-lane opcodes > +^^^^^^^^^^^^^^^^^^ > + > +These opcodes reduce the given value across the shader invocations > +running in the current SIMD group. Every thread in the subgroup will receive > +the same result. The BALLOT operations accept a single-channel argument that > +is treated as a boolean and produce a 64-bit value. > + > +.. opcode:: VOTE_ANY - Value is set in any of the active invocations > + > + Syntax: ``VOTE_ANY dst, value`` > + > + Example: ``VOTE_ANY TEMP[0].xy, TEMP[1].x`` > + > + > +.. opcode:: VOTE_ALL - Value is set in all of the active invocations > + > + Syntax: ``VOTE_ALL dst, value`` > + > + Example: ``VOTE_ALL TEMP[0].xy, TEMP[1].x`` > + > + > +.. opcode:: VOTE_EQ - Value is the same in all of the active invocations > + > + Syntax: ``VOTE_EQ dst, value`` > + > + Example: ``VOTE_EQ TEMP[0].xy, TEMP[1].x``
All of the above should be TEMP[0].x as the dst. > + > + > +.. opcode:: BALLOT - Lanemask of whether the value is set in each active > + invocation > + > + Syntax: ``BALLOT dst, value`` > + > + Example: ``BALLOT TEMP[0].xy, TEMP[1].x`` > + > + When the argument is a constant true, this produces a bitmask of active > + invocations. In fragment shaders, this can include helper invocations > + (invocations whose outputs and writes to memory are discarded, but which > + are used to compute derivatives). > + > + > +.. opcode:: READ_FIRST - Broadcast the value from the first active > + invocation to all active lanes > + > + Syntax: ``READ_FIRST dst, value`` > + > + Example: ``READ_FIRST TEMP[0], TEMP[1]`` > > -Vote opcodes > -^^^^^^^^^^^^ > > -These opcodes compare the given value across the shader invocations > -running in the current SIMD group. The details of exactly which > -invocations get compared are implementation-defined, and it would be a > -correct implementation to only ever consider the current thread's > -value. (i.e. SIMD group of 1). The argument is treated as a boolean. > +.. opcode:: READ_INVOC - Retrieve the value from the given invocation > + (need not be uniform) > > -.. opcode:: VOTE_ANY - Value is set in any of the current invocations > + Syntax: ``READ_INVOC dst, value, invocation`` > > -.. opcode:: VOTE_ALL - Value is set in all of the current invocations > + Example: ``READ_INVOC TEMP[0].xy, TEMP[1].xy, TEMP[2].x`` > > -.. opcode:: VOTE_EQ - Value is the same in all of the current invocations > + invocation.x controls the invocation number to read from for all channels. > + The invocation number must be the same across all active invocations in a > + sub-group; otherwise, the results are undefined. > > > Explanation of symbols used > ------------------------------ > > > Functions > ^^^^^^^^^^^^^^ > > > diff --git a/src/gallium/include/pipe/p_shader_tokens.h > b/src/gallium/include/pipe/p_shader_tokens.h > index 8c08f27..d461f78 100644 > --- a/src/gallium/include/pipe/p_shader_tokens.h > +++ b/src/gallium/include/pipe/p_shader_tokens.h > @@ -398,53 +398,53 @@ struct tgsi_property_data { > #define TGSI_OPCODE_CMP 66 > #define TGSI_OPCODE_SCS 67 > #define TGSI_OPCODE_TXB 68 > #define TGSI_OPCODE_FBFETCH 69 > #define TGSI_OPCODE_DIV 70 > #define TGSI_OPCODE_DP2 71 > #define TGSI_OPCODE_TXL 72 > #define TGSI_OPCODE_BRK 73 > #define TGSI_OPCODE_IF 74 > #define TGSI_OPCODE_UIF 75 > - /* gap */ > +#define TGSI_OPCODE_READ_INVOC 76 > #define TGSI_OPCODE_ELSE 77 > #define TGSI_OPCODE_ENDIF 78 > > #define TGSI_OPCODE_DDX_FINE 79 > #define TGSI_OPCODE_DDY_FINE 80 > > #define TGSI_OPCODE_PUSHA 81 > #define TGSI_OPCODE_POPA 82 > #define TGSI_OPCODE_CEIL 83 > #define TGSI_OPCODE_I2F 84 > #define TGSI_OPCODE_NOT 85 > #define TGSI_OPCODE_TRUNC 86 > #define TGSI_OPCODE_SHL 87 > - /* gap */ > +#define TGSI_OPCODE_BALLOT 88 > #define TGSI_OPCODE_AND 89 > #define TGSI_OPCODE_OR 90 > #define TGSI_OPCODE_MOD 91 > #define TGSI_OPCODE_XOR 92 > #define TGSI_OPCODE_SAD 93 > #define TGSI_OPCODE_TXF 94 > #define TGSI_OPCODE_TXQ 95 > #define TGSI_OPCODE_CONT 96 > #define TGSI_OPCODE_EMIT 97 > #define TGSI_OPCODE_ENDPRIM 98 > #define TGSI_OPCODE_BGNLOOP 99 > #define TGSI_OPCODE_BGNSUB 100 > #define TGSI_OPCODE_ENDLOOP 101 > #define TGSI_OPCODE_ENDSUB 102 > #define TGSI_OPCODE_TXQ_LZ 103 /* TXQ for mipmap level 0 */ > #define TGSI_OPCODE_TXQS 104 > #define TGSI_OPCODE_RESQ 105 > - /* gap */ > +#define TGSI_OPCODE_READ_FIRST 106 > #define TGSI_OPCODE_NOP 107 > > #define TGSI_OPCODE_FSEQ 108 > #define TGSI_OPCODE_FSGE 109 > #define TGSI_OPCODE_FSLT 110 > #define TGSI_OPCODE_FSNE 111 > > #define TGSI_OPCODE_MEMBAR 112 > #define TGSI_OPCODE_CALLNZ 113 > /* gap */ > -- > 2.9.3 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev