Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle: > From: Dave Airlie <airl...@redhat.com> > > This just adds the basic support for 64-bit opcodes, > and the new types. > > v2: add conversion opcodes. > add documentation. > v3: > - make docs more consistent > - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64 > > Reviewed-by: Marek Olšák <marek.ol...@amd.com> (v2) > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/gallium/auxiliary/tgsi/tgsi_info.c | 92 +++++++++-- > src/gallium/auxiliary/tgsi/tgsi_info.h | 4 +- > src/gallium/docs/source/tgsi.rst | 240 > +++++++++++++++++++++++++++++ > src/gallium/include/pipe/p_shader_tokens.h | 46 ++++-- > 4 files changed, 362 insertions(+), 20 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > b/src/gallium/auxiliary/tgsi/tgsi_info.c > index 60e0f2c..18e1bc8 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, > { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, > + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, > { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, > { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, > { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */ > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, > + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, > { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, > { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */ > { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, > { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, > @@ -258,20 +258,42 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX }, > + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS }, > + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG }, > + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD }, > + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD }, > }; > > const struct tgsi_opcode_info * > tgsi_get_opcode_info( uint opcode ) > { > static boolean firsttime = 1; > > if (firsttime) { > unsigned i; > firsttime = 0; > @@ -375,20 +397,26 @@ tgsi_opcode_infer_type( uint opcode ) > case TGSI_OPCODE_UARL: > case TGSI_OPCODE_IABS: > case TGSI_OPCODE_ISSG: > case TGSI_OPCODE_IMUL_HI: > case TGSI_OPCODE_IBFE: > case TGSI_OPCODE_IMSB: > case TGSI_OPCODE_DSEQ: > case TGSI_OPCODE_DSGE: > case TGSI_OPCODE_DSLT: > case TGSI_OPCODE_DSNE: > + case TGSI_OPCODE_U64SEQ: > + case TGSI_OPCODE_U64SNE: > + case TGSI_OPCODE_U64SLT: > + case TGSI_OPCODE_U64SGE: > + case TGSI_OPCODE_I64SLT: > + case TGSI_OPCODE_I64SGE: > return TGSI_TYPE_SIGNED; > case TGSI_OPCODE_DADD: > case TGSI_OPCODE_DABS: > case TGSI_OPCODE_DFMA: > case TGSI_OPCODE_DNEG: > case TGSI_OPCODE_DMUL: > case TGSI_OPCODE_DMAX: > case TGSI_OPCODE_DMIN: > case TGSI_OPCODE_DRCP: > case TGSI_OPCODE_DSQRT: > @@ -398,21 +426,47 @@ tgsi_opcode_infer_type( uint opcode ) > case TGSI_OPCODE_DFRAC: > case TGSI_OPCODE_DRSQ: > case TGSI_OPCODE_DTRUNC: > case TGSI_OPCODE_DCEIL: > case TGSI_OPCODE_DFLR: > case TGSI_OPCODE_DROUND: > case TGSI_OPCODE_DSSG: > case TGSI_OPCODE_F2D: > case TGSI_OPCODE_I2D: > case TGSI_OPCODE_U2D: > + case TGSI_OPCODE_U642D: > + case TGSI_OPCODE_I642D: > return TGSI_TYPE_DOUBLE; > + case TGSI_OPCODE_U64MAX: > + case TGSI_OPCODE_U64MIN: > + case TGSI_OPCODE_U64ADD: > + case TGSI_OPCODE_U64MUL: > + case TGSI_OPCODE_U64DIV: > + case TGSI_OPCODE_U64MOD: > + case TGSI_OPCODE_U64SHL: > + case TGSI_OPCODE_U64SHR: > + case TGSI_OPCODE_F2U64: > + case TGSI_OPCODE_D2U64: > + return TGSI_TYPE_UNSIGNED64; > + case TGSI_OPCODE_I64MAX: > + case TGSI_OPCODE_I64MIN: > + case TGSI_OPCODE_I64ABS: > + case TGSI_OPCODE_I64SSG: > + case TGSI_OPCODE_I64NEG: > + case TGSI_OPCODE_I64SHR: > + case TGSI_OPCODE_I64DIV: > + case TGSI_OPCODE_I64MOD: > + case TGSI_OPCODE_F2I64: > + case TGSI_OPCODE_U2I64: > + case TGSI_OPCODE_I2I64: > + case TGSI_OPCODE_D2I64: > + return TGSI_TYPE_SIGNED64; > default: > return TGSI_TYPE_FLOAT; > } > } > > /* > * infer the source type of a TGSI opcode. > */ > enum tgsi_opcode_type > tgsi_opcode_infer_src_type( uint opcode ) > @@ -423,45 +477,63 @@ tgsi_opcode_infer_src_type( uint opcode ) > case TGSI_OPCODE_BREAKC: > case TGSI_OPCODE_U2F: > case TGSI_OPCODE_U2D: > case TGSI_OPCODE_UADD: > case TGSI_OPCODE_SWITCH: > case TGSI_OPCODE_CASE: > case TGSI_OPCODE_SAMPLE_I: > case TGSI_OPCODE_SAMPLE_I_MS: > case TGSI_OPCODE_UMUL_HI: > case TGSI_OPCODE_UP2H: > + case TGSI_OPCODE_U2I64: > return TGSI_TYPE_UNSIGNED; > case TGSI_OPCODE_IMUL_HI: > case TGSI_OPCODE_I2F: > case TGSI_OPCODE_I2D: > + case TGSI_OPCODE_I2I64: > return TGSI_TYPE_SIGNED; > case TGSI_OPCODE_ARL: > case TGSI_OPCODE_ARR: > case TGSI_OPCODE_TXQ_LZ: > case TGSI_OPCODE_F2D: > case TGSI_OPCODE_F2I: > case TGSI_OPCODE_F2U: > case TGSI_OPCODE_FSEQ: > case TGSI_OPCODE_FSGE: > case TGSI_OPCODE_FSLT: > case TGSI_OPCODE_FSNE: > case TGSI_OPCODE_UCMP: > + case TGSI_OPCODE_F2U64: > + case TGSI_OPCODE_F2I64: > return TGSI_TYPE_FLOAT; > case TGSI_OPCODE_D2F: > case TGSI_OPCODE_D2U: > case TGSI_OPCODE_D2I: > case TGSI_OPCODE_DSEQ: > case TGSI_OPCODE_DSGE: > case TGSI_OPCODE_DSLT: > case TGSI_OPCODE_DSNE: > + case TGSI_OPCODE_D2U64: > + case TGSI_OPCODE_D2I64: > return TGSI_TYPE_DOUBLE; > + case TGSI_OPCODE_U64SEQ: > + case TGSI_OPCODE_U64SNE: > + case TGSI_OPCODE_U64SLT: > + case TGSI_OPCODE_U64SGE: > + case TGSI_OPCODE_U642F: > + case TGSI_OPCODE_U642D: > + return TGSI_TYPE_UNSIGNED64; > + case TGSI_OPCODE_I64SLT: > + case TGSI_OPCODE_I64SGE: > + case TGSI_OPCODE_I642F: > + case TGSI_OPCODE_I642D: > + return TGSI_TYPE_SIGNED64; > default: > return tgsi_opcode_infer_type(opcode); > } > } > > /* > * infer the destination type of a TGSI opcode. > */ > enum tgsi_opcode_type > tgsi_opcode_infer_dst_type( uint opcode ) > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h > b/src/gallium/auxiliary/tgsi/tgsi_info.h > index c43bdfd..8830f5a 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h > @@ -91,21 +91,23 @@ tgsi_get_opcode_name( uint opcode ); > > const char * > tgsi_get_processor_name( uint processor ); > > enum tgsi_opcode_type { > TGSI_TYPE_UNTYPED, /* for MOV */ > TGSI_TYPE_VOID, > TGSI_TYPE_UNSIGNED, > TGSI_TYPE_SIGNED, > TGSI_TYPE_FLOAT, > - TGSI_TYPE_DOUBLE > + TGSI_TYPE_DOUBLE, > + TGSI_TYPE_UNSIGNED64, > + TGSI_TYPE_SIGNED64, > }; > > static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type) > { > if (type == TGSI_TYPE_DOUBLE) > return true; > return false; > } > > enum tgsi_opcode_type > diff --git a/src/gallium/docs/source/tgsi.rst > b/src/gallium/docs/source/tgsi.rst > index 881aef6..000ea3a 100644 > --- a/src/gallium/docs/source/tgsi.rst > +++ b/src/gallium/docs/source/tgsi.rst > @@ -2075,20 +2075,260 @@ Perform a * b + c with no intermediate rounding step. > dst.zw = double(src0.y) > > .. opcode:: D2U - Double to Unsigned Int > > .. math:: > > dst.x = unsigned(src0.xy) > > dst.y = unsigned(src0.zw) > > +64-bit Integer ISA > +^^^^^^^^^^^^^^^^^^ > + > +The 64-bit integer opcodes reinterpret four-component vectors into > +two-component vectors with 64-bits in each component. > + > +.. opcode:: I64ABS - 64-bit Integer Absolute Value > + > + dst.xy = |src0.xy| > + dst.zw = |src0.zw| > + > +.. opcode:: I64NEG - 64-bit Integer Negate > + > + Two's complement. > + > +.. math:: > + > + dst.xy = -src.xy > + dst.zw = -src.zw > + > +.. opcode:: I64SSG - 64-bit Integer Set Sign > + > +.. math:: > + > + dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0 > + dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0 > + > +.. opcode:: U64ADD - 64-bit Integer Add > + > +.. math:: > + > + dst.xy = src0.xy + src1.xy > + dst.zw = src0.zw + src1.zw > + > +.. opcode:: U64MUL - 64-bit Integer Multiply > + > +.. math:: > + > + dst.xy = src0.xy * src1.xy > + dst.zw = src0.zw * src1.zw > + > +.. opcode:: U64SEQ - 64-bit Integer Set on Equal > + > +.. math:: > + > + dst.x = src0.xy == src1.xy ? \sim 0 : 0 > + dst.z = src0.zw == src1.zw ? \sim 0 : 0 > + > +.. opcode:: U64SNE - 64-bit Integer Set on Not Equal > + > +.. math:: > + > + dst.x = src0.xy != src1.xy ? \sim 0 : 0 > + dst.z = src0.zw != src1.zw ? \sim 0 : 0 > + > +.. opcode:: U64SLT - 64-bit Unsigned Integer Set on Less Than > + > +.. math:: > + > + dst.x = src0.xy < src1.xy ? \sim 0 : 0 > + dst.z = src0.zw < src1.zw ? \sim 0 : 0 > + > +.. opcode:: U64SGE - 64-bit Unsigned Integer Set on Greater Equal > + > +.. math:: > + > + dst.x = src0.xy >= src1.xy ? \sim 0 : 0 > + dst.z = src0.zw >= src1.zw ? \sim 0 : 0 > + > +.. opcode:: I64SLT - 64-bit Signed Integer Set on Less Than > + > +.. math:: > + > + dst.x = src0.xy < src1.xy ? \sim 0 : 0 > + dst.z = src0.zw < src1.zw ? \sim 0 : 0 > + > +.. opcode:: I64SGE - 64-bit Signed Integer Set on Greater Equal > + > +.. math:: > + > + dst.x = src0.xy >= src1.xy ? \sim 0 : 0 > + dst.z = src0.zw >= src1.zw ? \sim 0 : 0 > + > +.. opcode:: I64MIN - Minimum of 64-bit Signed Integers > + > +.. math:: > + > + dst.xy = min(src0.xy, src1.xy) > + dst.zw = min(src0.zw, src1.zw) > + > +.. opcode:: U64MIN - Minimum of 64-bit Unsigned Integers > + > +.. math:: > + > + dst.xy = min(src0.xy, src1.xy) > + dst.zw = min(src0.zw, src1.zw) > + > +.. opcode:: I64MAX - Maximum of 64-bit Signed Integers > + > +.. math:: > + > + dst.xy = max(src0.xy, src1.xy) > + dst.zw = max(src0.zw, src1.zw) > + > +.. opcode:: U64MAX - Maximum of 64-bit Unsigned Integers > + > +.. math:: > + > + dst.xy = max(src0.xy, src1.xy) > + dst.zw = max(src0.zw, src1.zw) > + > +.. opcode:: U64SHL - Shift Left 64-bit Unsigned Integer > + > + The shift count is masked with 0x1f before the shift is applied. Another 0x1f -> 0x3f :-).
Otherwise, looks alright to me, though still not sure if I think filling all the opcode gaps that way is a good idea. Reviewed-by: Roland Scheidegger <srol...@vmware.com> > + > +.. math:: > + > + dst.xy = src0.xy << (0x3f \& src1.x) > + dst.zw = src0.zw << (0x3f \& src1.y) > + > +.. opcode:: I64SHR - Arithmetic Shift Right (of 64-bit Signed Integer) > + > + The shift count is masked with 0x3f before the shift is applied. > + > +.. math:: > + > + dst.xy = src0.xy >> (0x3f \& src1.x) > + dst.zw = src0.zw >> (0x3f \& src1.y) > + > +.. opcode:: U64SHR - Logical Shift Right (of 64-bit Unsigned Integer) > + > + The shift count is masked with 0x3f before the shift is applied. > + > +.. math:: > + > + dst.xy = src0.xy >> (unsigned) (0x3f \& src1.x) > + dst.zw = src0.zw >> (unsigned) (0x3f \& src1.y) > + > +.. opcode:: I64DIV - 64-bit Signed Integer Division > + > +.. math:: > + > + dst.xy = src0.xy \ src1.xy > + dst.zw = src0.zw \ src1.zw > + > +.. opcode:: U64DIV - 64-bit Unsigned Integer Division > + > +.. math:: > + > + dst.xy = src0.xy \ src1.xy > + dst.zw = src0.zw \ src1.zw > + > +.. opcode:: U64MOD - 64-bit Unsigned Integer Remainder > + > +.. math:: > + > + dst.xy = src0.xy \bmod src1.xy > + dst.zw = src0.zw \bmod src1.zw > + > +.. opcode:: I64MOD - 64-bit Signed Integer Remainder > + > +.. math:: > + > + dst.xy = src0.xy \bmod src1.xy > + dst.zw = src0.zw \bmod src1.zw > + > +.. opcode:: F2U64 - Float to 64-bit Unsigned Int > + > +.. math:: > + > + dst.xy = (uint64_t) src0.x > + dst.zw = (uint64_t) src0.y > + > +.. opcode:: F2I64 - Float to 64-bit Int > + > +.. math:: > + > + dst.xy = (int64_t) src0.x > + dst.zw = (int64_t) src0.y > + > +.. opcode:: U2I64 - Unsigned Integer to 64-bit Integer > + > + This is a zero extension. > + > +.. math:: > + > + dst.xy = (uint64_t) src0.x > + dst.zw = (uint64_t) src0.y > + > +.. opcode:: I2I64 - Signed Integer to 64-bit Integer > + > + This is a sign extension. > + > +.. math:: > + > + dst.xy = (int64_t) src0.x > + dst.zw = (int64_t) src0.y > + > +.. opcode:: D2U64 - Double to 64-bit Unsigned Int > + > +.. math:: > + > + dst.xy = (uint64_t) src0.xy > + dst.zw = (uint64_t) src0.zw > + > +.. opcode:: D2I64 - Double to 64-bit Int > + > +.. math:: > + > + dst.xy = (int64_t) src0.xy > + dst.zw = (int64_t) src0.zw > + > +.. opcode:: U642F - 64-bit unsigned integer to float > + > +.. math:: > + > + dst.x = (float) src0.xy > + dst.y = (float) src0.zw > + > +.. opcode:: I642F - 64-bit Int to Float > + > +.. math:: > + > + dst.x = (float) src0.xy > + dst.y = (float) src0.zw > + > +.. opcode:: U642D - 64-bit unsigned integer to double > + > +.. math:: > + > + dst.xy = (double) src0.xy > + dst.zw = (double) src0.zw > + > +.. opcode:: I642D - 64-bit Int to double > + > +.. math:: > + > + dst.xy = (double) src0.xy > + dst.zw = (double) src0.zw > + > .. _samplingopcodes: > > Resource Sampling Opcodes > ^^^^^^^^^^^^^^^^^^^^^^^^^ > > Those opcodes follow very closely semantics of the respective Direct3D > instructions. If in doubt double check Direct3D documentation. > Note that the swizzle on SVIEW (src1) determines texel swizzling > after lookup. > > diff --git a/src/gallium/include/pipe/p_shader_tokens.h > b/src/gallium/include/pipe/p_shader_tokens.h > index 39ce9ea..a8d323a 100644 > --- a/src/gallium/include/pipe/p_shader_tokens.h > +++ b/src/gallium/include/pipe/p_shader_tokens.h > @@ -338,59 +338,61 @@ struct tgsi_property_data { > #define TGSI_OPCODE_MIN 12 > #define TGSI_OPCODE_MAX 13 > #define TGSI_OPCODE_SLT 14 > #define TGSI_OPCODE_SGE 15 > #define TGSI_OPCODE_MAD 16 > #define TGSI_OPCODE_SUB 17 > #define TGSI_OPCODE_LRP 18 > #define TGSI_OPCODE_FMA 19 > #define TGSI_OPCODE_SQRT 20 > #define TGSI_OPCODE_DP2A 21 > - /* gap */ > +#define TGSI_OPCODE_F2U64 22 > +#define TGSI_OPCODE_F2I64 23 > #define TGSI_OPCODE_FRC 24 > #define TGSI_OPCODE_CLAMP 25 > #define TGSI_OPCODE_FLR 26 > #define TGSI_OPCODE_ROUND 27 > #define TGSI_OPCODE_EX2 28 > #define TGSI_OPCODE_LG2 29 > #define TGSI_OPCODE_POW 30 > #define TGSI_OPCODE_XPD 31 > - /* gap */ > +#define TGSI_OPCODE_U2I64 32 > #define TGSI_OPCODE_ABS 33 > - /* gap */ > +#define TGSI_OPCODE_I2I64 34 > #define TGSI_OPCODE_DPH 35 > #define TGSI_OPCODE_COS 36 > #define TGSI_OPCODE_DDX 37 > #define TGSI_OPCODE_DDY 38 > #define TGSI_OPCODE_KILL 39 /* unconditional */ > #define TGSI_OPCODE_PK2H 40 > #define TGSI_OPCODE_PK2US 41 > #define TGSI_OPCODE_PK4B 42 > #define TGSI_OPCODE_PK4UB 43 > - /* gap */ > +#define TGSI_OPCODE_D2U64 44 > #define TGSI_OPCODE_SEQ 45 > - /* gap */ > +#define TGSI_OPCODE_D2I64 46 > #define TGSI_OPCODE_SGT 47 > #define TGSI_OPCODE_SIN 48 > #define TGSI_OPCODE_SLE 49 > #define TGSI_OPCODE_SNE 50 > - /* gap */ > +#define TGSI_OPCODE_U642D 51 > #define TGSI_OPCODE_TEX 52 > #define TGSI_OPCODE_TXD 53 > #define TGSI_OPCODE_TXP 54 > #define TGSI_OPCODE_UP2H 55 > #define TGSI_OPCODE_UP2US 56 > #define TGSI_OPCODE_UP4B 57 > #define TGSI_OPCODE_UP4UB 58 > - /* gap */ > +#define TGSI_OPCODE_U642F 59 > +#define TGSI_OPCODE_I642F 60 > #define TGSI_OPCODE_ARR 61 > - /* gap */ > +#define TGSI_OPCODE_I642D 62 > #define TGSI_OPCODE_CAL 63 > #define TGSI_OPCODE_RET 64 > #define TGSI_OPCODE_SSG 65 /* SGN */ > #define TGSI_OPCODE_CMP 66 > #define TGSI_OPCODE_SCS 67 > #define TGSI_OPCODE_TXB 68 > /* gap */ > #define TGSI_OPCODE_DIV 70 > #define TGSI_OPCODE_DP2 71 > #define TGSI_OPCODE_TXL 72 > @@ -561,21 +563,47 @@ struct tgsi_property_data { > #define TGSI_OPCODE_DTRUNC 218 /* nvc0 */ > #define TGSI_OPCODE_DCEIL 219 /* nvc0 */ > #define TGSI_OPCODE_DFLR 220 /* nvc0 */ > #define TGSI_OPCODE_DROUND 221 /* nvc0 */ > #define TGSI_OPCODE_DSSG 222 > > #define TGSI_OPCODE_VOTE_ANY 223 > #define TGSI_OPCODE_VOTE_ALL 224 > #define TGSI_OPCODE_VOTE_EQ 225 > > -#define TGSI_OPCODE_LAST 226 > +#define TGSI_OPCODE_U64SEQ 226 > +#define TGSI_OPCODE_U64SNE 227 > +#define TGSI_OPCODE_I64SLT 228 > +#define TGSI_OPCODE_U64SLT 229 > +#define TGSI_OPCODE_I64SGE 230 > +#define TGSI_OPCODE_U64SGE 231 > + > +#define TGSI_OPCODE_I64MIN 232 > +#define TGSI_OPCODE_U64MIN 233 > +#define TGSI_OPCODE_I64MAX 234 > +#define TGSI_OPCODE_U64MAX 235 > + > +#define TGSI_OPCODE_I64ABS 236 > +#define TGSI_OPCODE_I64SSG 237 > +#define TGSI_OPCODE_I64NEG 238 > + > +#define TGSI_OPCODE_U64ADD 239 > +#define TGSI_OPCODE_U64MUL 240 > +#define TGSI_OPCODE_U64SHL 241 > +#define TGSI_OPCODE_I64SHR 242 > +#define TGSI_OPCODE_U64SHR 243 > + > +#define TGSI_OPCODE_I64DIV 244 > +#define TGSI_OPCODE_U64DIV 245 > +#define TGSI_OPCODE_I64MOD 246 > +#define TGSI_OPCODE_U64MOD 247 > +#define TGSI_OPCODE_LAST 248 > > /** > * Opcode is the operation code to execute. A given operation defines the > * semantics how the source registers (if any) are interpreted and what is > * written to the destination registers (if any) as a result of execution. > * > * NumDstRegs and NumSrcRegs is the number of destination and source > registers, > * respectively. For a given operation code, those numbers are fixed and are > * present here only for convenience. > * > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev