Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 3 ++- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 14 ++++++++++++ .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 12 +++++++++-- .../drivers/nouveau/codegen/nv50_ir_print.cpp | 20 +++++++++++++++++ .../drivers/nouveau/codegen/nv50_ir_target.cpp | 7 +++--- .../nouveau/codegen/nv50_ir_target_gm107.cpp | 1 + .../nouveau/codegen/nv50_ir_target_nv50.cpp | 5 +++-- .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 25 ++++++++++++++++++++-- 8 files changed, 77 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 49425b98b9..99bf8de370 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -53,7 +53,8 @@ Modifier Modifier::operator*(const Modifier m) const b &= ~NV50_IR_MOD_NEG; a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); - c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); + c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT | + NV50_IR_MOD_H1 | NV50_IR_MOD_SEXT); return Modifier(a | c); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f4f3c70888..4deaf09989 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -58,6 +58,7 @@ enum operation OP_FMA, OP_SAD, // abs(src0 - src1) + src2 OP_SHLADD, + OP_XMAD, // extended multiply-add (GM107+), does a lot of things OP_ABS, OP_NEG, OP_NOT, @@ -251,6 +252,13 @@ enum operation #define NV50_IR_SUBOP_VOTE_ALL 0 #define NV50_IR_SUBOP_VOTE_ANY 1 #define NV50_IR_SUBOP_VOTE_UNI 2 +#define NV50_IR_SUBOP_XMAD_PSL (1 << 0) +#define NV50_IR_SUBOP_XMAD_MRG (1 << 1) +#define NV50_IR_SUBOP_XMAD_CLO (1 << 2) +#define NV50_IR_SUBOP_XMAD_CHI (2 << 2) +#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2) +#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2) +#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << 2) #define NV50_IR_SUBOP_MINMAX_LOW 1 #define NV50_IR_SUBOP_MINMAX_MED 2 @@ -527,6 +535,9 @@ struct Storage #define NV50_IR_MOD_SAT (1 << 2) #define NV50_IR_MOD_NOT (1 << 3) #define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS) +// modifiers only for XMAD +#define NV50_IR_MOD_H1 (1 << 4) +#define NV50_IR_MOD_SEXT (1 << 5) #define NV50_IR_INTERP_MODE_MASK 0x3 #define NV50_IR_INTERP_LINEAR (0 << 0) @@ -556,11 +567,14 @@ public: inline Modifier operator&(const Modifier m) const { return bits & m.bits; } inline Modifier operator|(const Modifier m) const { return bits | m.bits; } inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; } + inline Modifier operator~() const { return ~bits; } operation getOp() const; inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; } inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; } + inline int h1() const { return (bits & NV50_IR_MOD_H1) ? 1 : 0; } + inline int sext() const { return (bits & NV50_IR_MOD_SEXT) ? 1 : 0; } inline operator bool() const { return bits ? true : false; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 4d0589214d..a43b481a01 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -191,9 +191,16 @@ void LoadPropagation::checkSwapSrc01(Instruction *insn) { const Target *targ = prog->getTarget(); - if (!targ->getOpInfo(insn).commutative) - if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB) + if (!targ->getOpInfo(insn).commutative) { + if (insn->op != OP_SET && insn->op != OP_SLCT && + insn->op != OP_SUB && insn->op != OP_XMAD) return; + // XMAD is only commutative if both the CBCC and MRG flags are not set. + if (insn->op == OP_XMAD && (insn->subOp & 0x1c) == NV50_IR_SUBOP_XMAD_CBCC) + return; + if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG)) + return; + } if (insn->src(1).getFile() != FILE_GPR) return; // This is the special OP_SET used for alphatesting, we can't reverse its @@ -488,6 +495,7 @@ Modifier::applyTo(ImmediateValue& imm) const imm.reg.data.s32 = -imm.reg.data.s32; if (bits & NV50_IR_MOD_NOT) imm.reg.data.s32 = ~imm.reg.data.s32; + // NOTE: applying the h1 and sext modifiers is confusing and not very useful break; case TYPE_F64: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index cbb21f5f72..c4906c31a8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = "fma", "sad", "shladd", + "xmad", "abs", "neg", "not", @@ -239,6 +240,11 @@ static const char *barOpStr[] = { "sync", "arrive", "red and", "red or", "red popc" }; + +static const char *xmadOpCModeStr[] = +{ + "clo", "chi", "csfu", "cbcc" +}; static const char *DataTypeStr[] = { @@ -387,6 +393,10 @@ int Modifier::print(char *buf, size_t size) const SPACE_PRINT(pos > base && pos < size, "neg"); if (bits & NV50_IR_MOD_ABS) SPACE_PRINT(pos > base && pos < size, "abs"); + if (bits & NV50_IR_MOD_H1) + SPACE_PRINT(pos > base && pos < size, "h1"); + if (bits & NV50_IR_MOD_SEXT) + SPACE_PRINT(pos > base && pos < size, "sext"); return pos; } @@ -624,6 +634,16 @@ void Instruction::print() const if (subOp < ARRAY_SIZE(barOpStr)) PRINT("%s ", barOpStr[subOp]); break; + case OP_XMAD: { + if (subOp & NV50_IR_SUBOP_XMAD_PSL) + PRINT("psl "); + if (subOp & NV50_IR_SUBOP_XMAD_MRG) + PRINT("mrg "); + unsigned cmode = (subOp >> 2) & 0x7; + if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr)) + PRINT("%s ", xmadOpCModeStr[cmode - 1]); + break; + } default: if (subOp) PRINT("(SUBOP:%u) ", subOp); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 298e7c6ef9..9193a01f18 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] = 0, 0, // NOP, PHI 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 1, 1, 2, // MOV, LOAD, STORE - 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD + 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 3, 3, // SHLADD, XMAD 1, 1, 1, // ABS, NEG, NOT 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 2, 2, 1, // MAX, MIN, SAT @@ -70,10 +71,10 @@ const OpClass Target::operationClass[] = OPCLASS_MOVE, OPCLASS_LOAD, OPCLASS_STORE, - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp index 04cbd402a1..24a1cbb8da 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -60,6 +60,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) const case OP_SQRT: case OP_DIV: case OP_MOD: + case OP_XMAD: return false; default: break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 83b4102b0a..ef5f06442e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -115,12 +115,12 @@ void TargetNV50::initOpInfo() { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR, // SET, SELP, SLCT - 0x0ce0ca00, 0x0000007e, 0x00000000, 0x00000000 + 0x19c0ca00, 0x000000fc, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // MOV, ADD, SUB, MUL, MAD, SAD, RCP, L/PINTERP, TEX, TXF - 0x00014e40, 0x00000080, 0x00001260, 0x00000000 + 0x00014e40, 0x00000100, 0x000024c0, 0x00000000 }; static const operation noDestList[] = { @@ -439,6 +439,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_EXIT: // want exit modifier instead (on NOP if required) case OP_MEMBAR: case OP_SHLADD: + case OP_XMAD: return false; case OP_SAD: return ty == TYPE_S32; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index 954aec0a2f..66efa0135f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -195,13 +195,13 @@ void TargetNVC0::initOpInfo() { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR, // SET, SELP, SLCT - 0x0ce0ca00, 0x0000007e, 0x00000000, 0x00000000 + 0x19c0ca00, 0x000000fc, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN - 0x0ce0ca00, 0x00000000, 0x00000000, 0x00000000 + 0x19c0ca00, 0x00000000, 0x00000000, 0x00000000 }; static const operation noDest[] = @@ -354,6 +354,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s, if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 && sf == FILE_MEMORY_CONST) return false; + // constant buffer loads can't be used with cbcc xmads + if (i->op == OP_XMAD && (i->subOp & 0x1c) == NV50_IR_SUBOP_XMAD_CBCC && + sf == FILE_MEMORY_CONST) + return false; + // constant buffer loads for the third operand can't be used with psl/mrg xmads + if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 && + (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG))) + return false; + // for xmads, immediates can't have the h1 flag set + if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && + i->src(s).mod & Modifier(NV50_IR_MOD_H1)) + return false; for (int k = 0; i->srcExists(k); ++k) { if (i->src(k).getFile() == FILE_IMMEDIATE) { @@ -445,6 +457,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const return false; if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) return false; + if (op == OP_XMAD) + return false; return true; } @@ -485,6 +499,13 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const if (insn->src(s ? 0 : 2).mod.neg()) return false; break; + case OP_XMAD: + if (insn->getSrc(s)->inFile(FILE_IMMEDIATE) && + mod & Modifier(NV50_IR_MOD_H1)) + return false; + if (s < 2 && !(mod & ~Modifier(NV50_IR_MOD_H1 | NV50_IR_MOD_SEXT))) + return true; + break; default: return false; } -- 2.14.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev