On Mon, Apr 3, 2017 at 10:01 PM, Boyan Ding <boyan.j.d...@gmail.com> wrote: > 2017-04-01 1:14 GMT+08:00 Nicolai Hähnle <nhaeh...@gmail.com>: >> From: Ilia Mirkin <imir...@alum.mit.edu> >> >> v2 (Nicolai): >> - BALLOT isn't per-channel >> - expand the documentation (also for VOTE_*) >> >> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >> Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> >> --- >> src/gallium/auxiliary/tgsi/tgsi_info.c | 6 +-- >> src/gallium/docs/source/tgsi.rst | 67 >> +++++++++++++++++++++++++----- >> src/gallium/include/pipe/p_shader_tokens.h | 6 +-- >> 3 files changed, 62 insertions(+), 17 deletions(-) >> >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c >> b/src/gallium/auxiliary/tgsi/tgsi_info.c >> index 5a6a9bc..30bad6d 100644 >> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c >> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c >> @@ -106,51 +106,51 @@ static const struct tgsi_opcode_info >> opcode_info[TGSI_OPCODE_LAST] = >> { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, >> { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, >> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, >> { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, >> { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, >> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, >> { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK }, >> { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF }, >> { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF }, >> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ >> + { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC }, >> { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, >> { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, >> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, >> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, >> { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, >> { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, >> { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, >> { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F }, >> { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT }, >> { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL }, >> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */ >> + { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR }, >> { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD }, >> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, >> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, >> { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, >> { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, >> { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, >> { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, >> { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, >> { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, >> { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB }, >> { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ }, >> { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS }, >> { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ }, >> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */ >> + { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST }, >> { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, >> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, >> { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, >> { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, >> { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ >> { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, >> { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, >> diff --git a/src/gallium/docs/source/tgsi.rst >> b/src/gallium/docs/source/tgsi.rst >> index 05b06ce..7e9b47c 100644 >> --- a/src/gallium/docs/source/tgsi.rst >> +++ b/src/gallium/docs/source/tgsi.rst >> @@ -2852,36 +2852,81 @@ only be used with 32-bit integer image formats. >> >> The following operation is performed atomically: >> >> .. math:: >> >> dst_x = resource[offset] >> >> resource[offset] = (dst_x > src_x ? dst_x : src_x) >> >> >> -.. _voteopcodes: >> +.. _interlaneopcodes: >> + >> +Inter-lane opcodes >> +^^^^^^^^^^^^^^^^^^ >> + >> +These opcodes reduce the given value across the shader invocations >> +running in the current SIMD group. For all but the READ_INVOC operations, >> +every thread in the subgroup will receive the same result. The VOTE_* and >> +BALLOT operations accept a single-channel argument that is treated as a >> +boolean and produce a 64-bit value. > > Really? Aren't VOTE_* opcodes producing bool (32-bit) values?
Yeah, they should be producing (32-bit) bools. At least how they're implemented in nouveau is also per-channel, i.e. case TGSI_OPCODE_VOTE_ALL: case TGSI_OPCODE_VOTE_ANY: case TGSI_OPCODE_VOTE_EQ: val0 = new_LValue(func, FILE_PREDICATE); FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero); mkOp1(op, dstTy, val0, val0) ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0); } break; although that per-channel-ness isn't used by st/mesa. -ilia _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev