From: Ilia Mirkin <imir...@alum.mit.edu> v2 (Nicolai): - BALLOT isn't per-channel - expand the documentation (also for VOTE_*)
v3: - only BALLOT returns a 64-bit lanemask (Boyan) - relax the requirement on READ_INVOC: the invocation number to read from must be uniform within a sub-group. This matches the GL_ARB_shader_ballot spect (and the v_readlane instruction of AMD GCN) v4: - hopefully really fix the doc of VOTE_* returns (Ilia) Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/auxiliary/tgsi/tgsi_info.c | 6 +-- src/gallium/docs/source/tgsi.rst | 68 +++++++++++++++++++++++++----- src/gallium/include/pipe/p_shader_tokens.h | 6 +-- 3 files changed, 63 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 5a6a9bc..30bad6d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -106,51 +106,51 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH }, { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ + { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC }, { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS }, { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST }, { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 05b06ce..9362d43 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2852,36 +2852,82 @@ only be used with 32-bit integer image formats. The following operation is performed atomically: .. math:: dst_x = resource[offset] resource[offset] = (dst_x > src_x ? dst_x : src_x) -.. _voteopcodes: +.. _interlaneopcodes: + +Inter-lane opcodes +^^^^^^^^^^^^^^^^^^ + +These opcodes reduce the given value across the shader invocations +running in the current SIMD group. Every thread in the subgroup will receive +the same result. The BALLOT operations accept a single-channel argument that +is treated as a boolean and produce a 64-bit value. + +.. opcode:: VOTE_ANY - Value is set in any of the active invocations + + Syntax: ``VOTE_ANY dst, value`` + + Example: ``VOTE_ANY TEMP[0].x, TEMP[1].x`` + + +.. opcode:: VOTE_ALL - Value is set in all of the active invocations + + Syntax: ``VOTE_ALL dst, value`` + + Example: ``VOTE_ALL TEMP[0].x, TEMP[1].x`` + + +.. opcode:: VOTE_EQ - Value is the same in all of the active invocations + + Syntax: ``VOTE_EQ dst, value`` + + Example: ``VOTE_EQ TEMP[0].x, TEMP[1].x`` + + +.. opcode:: BALLOT - Lanemask of whether the value is set in each active + invocation + + Syntax: ``BALLOT dst, value`` + + Example: ``BALLOT TEMP[0].xy, TEMP[1].x`` + + When the argument is a constant true, this produces a bitmask of active + invocations. In fragment shaders, this can include helper invocations + (invocations whose outputs and writes to memory are discarded, but which + are used to compute derivatives). + + +.. opcode:: READ_FIRST - Broadcast the value from the first active + invocation to all active lanes + + Syntax: ``READ_FIRST dst, value`` + + Example: ``READ_FIRST TEMP[0], TEMP[1]`` -Vote opcodes -^^^^^^^^^^^^ -These opcodes compare the given value across the shader invocations -running in the current SIMD group. The details of exactly which -invocations get compared are implementation-defined, and it would be a -correct implementation to only ever consider the current thread's -value. (i.e. SIMD group of 1). The argument is treated as a boolean. +.. opcode:: READ_INVOC - Retrieve the value from the given invocation + (need not be uniform) -.. opcode:: VOTE_ANY - Value is set in any of the current invocations + Syntax: ``READ_INVOC dst, value, invocation`` -.. opcode:: VOTE_ALL - Value is set in all of the current invocations + Example: ``READ_INVOC TEMP[0].xy, TEMP[1].xy, TEMP[2].x`` -.. opcode:: VOTE_EQ - Value is the same in all of the current invocations + invocation.x controls the invocation number to read from for all channels. + The invocation number must be the same across all active invocations in a + sub-group; otherwise, the results are undefined. Explanation of symbols used ------------------------------ Functions ^^^^^^^^^^^^^^ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 8c08f27..d461f78 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -398,53 +398,53 @@ struct tgsi_property_data { #define TGSI_OPCODE_CMP 66 #define TGSI_OPCODE_SCS 67 #define TGSI_OPCODE_TXB 68 #define TGSI_OPCODE_FBFETCH 69 #define TGSI_OPCODE_DIV 70 #define TGSI_OPCODE_DP2 71 #define TGSI_OPCODE_TXL 72 #define TGSI_OPCODE_BRK 73 #define TGSI_OPCODE_IF 74 #define TGSI_OPCODE_UIF 75 - /* gap */ +#define TGSI_OPCODE_READ_INVOC 76 #define TGSI_OPCODE_ELSE 77 #define TGSI_OPCODE_ENDIF 78 #define TGSI_OPCODE_DDX_FINE 79 #define TGSI_OPCODE_DDY_FINE 80 #define TGSI_OPCODE_PUSHA 81 #define TGSI_OPCODE_POPA 82 #define TGSI_OPCODE_CEIL 83 #define TGSI_OPCODE_I2F 84 #define TGSI_OPCODE_NOT 85 #define TGSI_OPCODE_TRUNC 86 #define TGSI_OPCODE_SHL 87 - /* gap */ +#define TGSI_OPCODE_BALLOT 88 #define TGSI_OPCODE_AND 89 #define TGSI_OPCODE_OR 90 #define TGSI_OPCODE_MOD 91 #define TGSI_OPCODE_XOR 92 #define TGSI_OPCODE_SAD 93 #define TGSI_OPCODE_TXF 94 #define TGSI_OPCODE_TXQ 95 #define TGSI_OPCODE_CONT 96 #define TGSI_OPCODE_EMIT 97 #define TGSI_OPCODE_ENDPRIM 98 #define TGSI_OPCODE_BGNLOOP 99 #define TGSI_OPCODE_BGNSUB 100 #define TGSI_OPCODE_ENDLOOP 101 #define TGSI_OPCODE_ENDSUB 102 #define TGSI_OPCODE_TXQ_LZ 103 /* TXQ for mipmap level 0 */ #define TGSI_OPCODE_TXQS 104 #define TGSI_OPCODE_RESQ 105 - /* gap */ +#define TGSI_OPCODE_READ_FIRST 106 #define TGSI_OPCODE_NOP 107 #define TGSI_OPCODE_FSEQ 108 #define TGSI_OPCODE_FSGE 109 #define TGSI_OPCODE_FSLT 110 #define TGSI_OPCODE_FSNE 111 #define TGSI_OPCODE_MEMBAR 112 #define TGSI_OPCODE_CALLNZ 113 /* gap */ -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev