On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchao...@gmail.com> wrote: > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.h | 26 > ++++++++++++++++++++++ > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 18 +++++++++++++-- > .../drivers/nouveau/codegen/nv50_ir_print.cpp | 19 ++++++++++++++++ > .../drivers/nouveau/codegen/nv50_ir_target.cpp | 7 +++--- > .../nouveau/codegen/nv50_ir_target_gm107.cpp | 1 + > .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + > .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 15 +++++++++++++ > 7 files changed, 82 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index 0b220cc48d..13822a08c3 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -58,6 +58,9 @@ enum operation > OP_FMA, > OP_SAD, // abs(src0 - src1) + src2 > OP_SHLADD, > + // extended multiply-add (GM107+), does a lot of things. > + // see envytools for detailed documentation > + OP_XMAD, > OP_ABS, > OP_NEG, > OP_NOT, > @@ -256,6 +259,29 @@ enum operation > #define NV50_IR_SUBOP_MINMAX_MED 2 > #define NV50_IR_SUBOP_MINMAX_HIGH 3 > > +// xmad(src0, src1, 0) << 16 + src2 > +#define NV50_IR_SUBOP_XMAD_PSL (1 << 0) > +// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16) > +#define NV50_IR_SUBOP_XMAD_MRG (1 << 1) > +// xmad(src0, src1, src2.lo) > +#define NV50_IR_SUBOP_XMAD_CLO (1 << 2) > +// xmad(src0, src1, src2.hi) > +#define NV50_IR_SUBOP_XMAD_CHI (2 << 2) > +// if both operands to the multiplication are non-zero, subtract 65536 for > each > +// negative operand > +#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2) > +// xmad(src0, src1, src2) + src1 << 16 > +#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2) > +#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2 > +#define NV50_IR_SUBOP_XMAD_CMODE_MASK uint16_t(0x7 << > NV50_IR_SUBOP_XMAD_CMODE_SHIFT) > + > +// use the high 16 bits instead of the low 16 bits for the multiplication. > +// if the instruction's sType is signed, sign extend the operand from 16 bits > +// to 32 before multiplication. > +#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5 > +#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i))) > +#define NV50_IR_SUBOP_XMAD_H1_MASK uint16_t(0x3 << > NV50_IR_SUBOP_XMAD_H1_SHIFT)
is the uint16_t really needed? > + > enum DataType > { > TYPE_NONE, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 16022e6f23..6deea7a360 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -191,9 +191,17 @@ void > LoadPropagation::checkSwapSrc01(Instruction *insn) > { > const Target *targ = prog->getTarget(); > - if (!targ->getOpInfo(insn).commutative) > - if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB) > + if (!targ->getOpInfo(insn).commutative) { > + if (insn->op != OP_SET && insn->op != OP_SLCT && > + insn->op != OP_SUB && insn->op != OP_XMAD) > return; > + // XMAD is only commutative if both the CBCC and MRG flags are not set. > + if (insn->op == OP_XMAD && > + (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == > NV50_IR_SUBOP_XMAD_CBCC) > + return; > + if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG)) > + return; > + } > if (insn->src(1).getFile() != FILE_GPR) > return; > // This is the special OP_SET used for alphatesting, we can't reverse its > @@ -236,6 +244,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn) > if (insn->op == OP_SUB) { > insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); > insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); > + } else > + if (insn->op == OP_XMAD) { > + // swap h1 flags > + uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) | > + (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1)); > + insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1; > } > } > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > index ee3506fbae..7eab8b8d70 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = > "fma", > "sad", > "shladd", > + "xmad", > "abs", > "neg", > "not", > @@ -240,6 +241,11 @@ static const char *barOpStr[] = > "sync", "arrive", "red and", "red or", "red popc" > }; > > +static const char *xmadOpCModeStr[] = > +{ > + "clo", "chi", "csfu", "cbcc" > +}; > + > static const char *DataTypeStr[] = > { > "-", > @@ -625,6 +631,19 @@ void Instruction::print() const > if (subOp < ARRAY_SIZE(barOpStr)) > PRINT("%s ", barOpStr[subOp]); > break; > + case OP_XMAD: { > + if (subOp & NV50_IR_SUBOP_XMAD_PSL) > + PRINT("psl "); > + if (subOp & NV50_IR_SUBOP_XMAD_MRG) > + PRINT("mrg "); > + unsigned cmode = (subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK); > + cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT; > + if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr)) > + PRINT("%s ", xmadOpCModeStr[cmode - 1]); > + for (int i = 0; i < 2; i++) > + PRINT("h%d ", (subOp & NV50_IR_SUBOP_XMAD_H1(i)) ? 1 : 0); > + break; > + } > default: > if (subOp) > PRINT("(SUBOP:%u) ", subOp); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > index 298e7c6ef9..9193a01f18 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > @@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] = > 0, 0, // NOP, PHI > 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT > 1, 1, 2, // MOV, LOAD, STORE > - 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, > SHLADD > + 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD > + 3, 3, // SHLADD, XMAD > 1, 1, 1, // ABS, NEG, NOT > 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR > 2, 2, 1, // MAX, MIN, SAT > @@ -70,10 +71,10 @@ const OpClass Target::operationClass[] = > OPCLASS_MOVE, > OPCLASS_LOAD, > OPCLASS_STORE, > - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD > + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD > OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, > OPCLASS_ARITH, OPCLASS_ARITH, > - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, > + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, > // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR > OPCLASS_CONVERT, OPCLASS_CONVERT, > OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > index adbfcc3cfe..7293fb27dd 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > @@ -60,6 +60,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) const > case OP_SQRT: > case OP_DIV: > case OP_MOD: > + case OP_XMAD: > return false; > default: > break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index 1ad3467337..2981497340 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -443,6 +443,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const > case OP_EXIT: // want exit modifier instead (on NOP if required) > case OP_MEMBAR: > case OP_SHLADD: > + case OP_XMAD: > return false; > case OP_SAD: > return ty == TYPE_S32; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index 7e059235f4..7e66d2950b 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -356,6 +356,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s, > if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 && > sf == FILE_MEMORY_CONST) > return false; > + // constant buffer loads can't be used with cbcc xmads > + if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && > + (i->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC) > + return false; > + // constant buffer loads for the third operand can't be used with psl/mrg > xmads > + if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 && > + (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG))) > + return false; > + // for xmads, immediates can't have the h1 flag set > + if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && s < 2 && > + i->subOp & NV50_IR_SUBOP_XMAD_H1(s)) > + return false; > > for (int k = 0; i->srcExists(k); ++k) { > if (i->src(k).getFile() == FILE_IMMEDIATE) { > @@ -448,6 +460,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const > return false; > if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) > return false; > + if (op == OP_XMAD) > + return false; > return true; > } > > @@ -467,6 +481,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int > s, Modifier mod) const > case OP_XOR: > case OP_POPCNT: > case OP_BFIND: > + case OP_XMAD: > break; > case OP_SET: > if (insn->sType != TYPE_F32) > -- > 2.14.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev