okay, with that removed, patch is Reviewed-by: Karol Herbst <kher...@redhat.com>
On Fri, Aug 10, 2018 at 6:33 PM, Rhys Perry <pendingchao...@gmail.com> wrote: > I don't think so. > > On Wed, Aug 8, 2018 at 11:27 PM, Karol Herbst <kher...@redhat.com> wrote: >> On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchao...@gmail.com> >> wrote: >>> Signed-off-by: Rhys Perry <pendingchao...@gmail.com> >>> --- >>> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 26 >>> ++++++++++++++++++++++ >>> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 18 +++++++++++++-- >>> .../drivers/nouveau/codegen/nv50_ir_print.cpp | 19 ++++++++++++++++ >>> .../drivers/nouveau/codegen/nv50_ir_target.cpp | 7 +++--- >>> .../nouveau/codegen/nv50_ir_target_gm107.cpp | 1 + >>> .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + >>> .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 15 +++++++++++++ >>> 7 files changed, 82 insertions(+), 5 deletions(-) >>> >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> index 0b220cc48d..13822a08c3 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> @@ -58,6 +58,9 @@ enum operation >>> OP_FMA, >>> OP_SAD, // abs(src0 - src1) + src2 >>> OP_SHLADD, >>> + // extended multiply-add (GM107+), does a lot of things. >>> + // see envytools for detailed documentation >>> + OP_XMAD, >>> OP_ABS, >>> OP_NEG, >>> OP_NOT, >>> @@ -256,6 +259,29 @@ enum operation >>> #define NV50_IR_SUBOP_MINMAX_MED 2 >>> #define NV50_IR_SUBOP_MINMAX_HIGH 3 >>> >>> +// xmad(src0, src1, 0) << 16 + src2 >>> +#define NV50_IR_SUBOP_XMAD_PSL (1 << 0) >>> +// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16) >>> +#define NV50_IR_SUBOP_XMAD_MRG (1 << 1) >>> +// xmad(src0, src1, src2.lo) >>> +#define NV50_IR_SUBOP_XMAD_CLO (1 << 2) >>> +// xmad(src0, src1, src2.hi) >>> +#define NV50_IR_SUBOP_XMAD_CHI (2 << 2) >>> +// if both operands to the multiplication are non-zero, subtract 65536 for >>> each >>> +// negative operand >>> +#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2) >>> +// xmad(src0, src1, src2) + src1 << 16 >>> +#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2) >>> +#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2 >>> +#define NV50_IR_SUBOP_XMAD_CMODE_MASK uint16_t(0x7 << >>> NV50_IR_SUBOP_XMAD_CMODE_SHIFT) >>> + >>> +// use the high 16 bits instead of the low 16 bits for the multiplication. >>> +// if the instruction's sType is signed, sign extend the operand from 16 >>> bits >>> +// to 32 before multiplication. >>> +#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5 >>> +#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i))) >>> +#define NV50_IR_SUBOP_XMAD_H1_MASK uint16_t(0x3 << >>> NV50_IR_SUBOP_XMAD_H1_SHIFT) >> >> is the uint16_t really needed? >> >>> + >>> enum DataType >>> { >>> TYPE_NONE, >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> index 16022e6f23..6deea7a360 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> @@ -191,9 +191,17 @@ void >>> LoadPropagation::checkSwapSrc01(Instruction *insn) >>> { >>> const Target *targ = prog->getTarget(); >>> - if (!targ->getOpInfo(insn).commutative) >>> - if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB) >>> + if (!targ->getOpInfo(insn).commutative) { >>> + if (insn->op != OP_SET && insn->op != OP_SLCT && >>> + insn->op != OP_SUB && insn->op != OP_XMAD) >>> return; >>> + // XMAD is only commutative if both the CBCC and MRG flags are not >>> set. >>> + if (insn->op == OP_XMAD && >>> + (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == >>> NV50_IR_SUBOP_XMAD_CBCC) >>> + return; >>> + if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG)) >>> + return; >>> + } >>> if (insn->src(1).getFile() != FILE_GPR) >>> return; >>> // This is the special OP_SET used for alphatesting, we can't reverse >>> its >>> @@ -236,6 +244,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn) >>> if (insn->op == OP_SUB) { >>> insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); >>> insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); >>> + } else >>> + if (insn->op == OP_XMAD) { >>> + // swap h1 flags >>> + uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) | >>> + (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1)); >>> + insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1; >>> } >>> } >>> >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> index ee3506fbae..7eab8b8d70 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = >>> "fma", >>> "sad", >>> "shladd", >>> + "xmad", >>> "abs", >>> "neg", >>> "not", >>> @@ -240,6 +241,11 @@ static const char *barOpStr[] = >>> "sync", "arrive", "red and", "red or", "red popc" >>> }; >>> >>> +static const char *xmadOpCModeStr[] = >>> +{ >>> + "clo", "chi", "csfu", "cbcc" >>> +}; >>> + >>> static const char *DataTypeStr[] = >>> { >>> "-", >>> @@ -625,6 +631,19 @@ void Instruction::print() const >>> if (subOp < ARRAY_SIZE(barOpStr)) >>> PRINT("%s ", barOpStr[subOp]); >>> break; >>> + case OP_XMAD: { >>> + if (subOp & NV50_IR_SUBOP_XMAD_PSL) >>> + PRINT("psl "); >>> + if (subOp & NV50_IR_SUBOP_XMAD_MRG) >>> + PRINT("mrg "); >>> + unsigned cmode = (subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK); >>> + cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT; >>> + if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr)) >>> + PRINT("%s ", xmadOpCModeStr[cmode - 1]); >>> + for (int i = 0; i < 2; i++) >>> + PRINT("h%d ", (subOp & NV50_IR_SUBOP_XMAD_H1(i)) ? 1 : 0); >>> + break; >>> + } >>> default: >>> if (subOp) >>> PRINT("(SUBOP:%u) ", subOp); >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp >>> index 298e7c6ef9..9193a01f18 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp >>> @@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] = >>> 0, 0, // NOP, PHI >>> 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT >>> 1, 1, 2, // MOV, LOAD, STORE >>> - 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, >>> SHLADD >>> + 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD >>> + 3, 3, // SHLADD, XMAD >>> 1, 1, 1, // ABS, NEG, NOT >>> 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR >>> 2, 2, 1, // MAX, MIN, SAT >>> @@ -70,10 +71,10 @@ const OpClass Target::operationClass[] = >>> OPCLASS_MOVE, >>> OPCLASS_LOAD, >>> OPCLASS_STORE, >>> - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD >>> + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD >>> OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, >>> OPCLASS_ARITH, OPCLASS_ARITH, >>> - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, >>> + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, >>> OPCLASS_ARITH, >>> // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR >>> OPCLASS_CONVERT, OPCLASS_CONVERT, >>> OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> index adbfcc3cfe..7293fb27dd 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> @@ -60,6 +60,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) >>> const >>> case OP_SQRT: >>> case OP_DIV: >>> case OP_MOD: >>> + case OP_XMAD: >>> return false; >>> default: >>> break; >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> index 1ad3467337..2981497340 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> @@ -443,6 +443,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) >>> const >>> case OP_EXIT: // want exit modifier instead (on NOP if required) >>> case OP_MEMBAR: >>> case OP_SHLADD: >>> + case OP_XMAD: >>> return false; >>> case OP_SAD: >>> return ty == TYPE_S32; >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> index 7e059235f4..7e66d2950b 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> @@ -356,6 +356,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s, >>> if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 && >>> sf == FILE_MEMORY_CONST) >>> return false; >>> + // constant buffer loads can't be used with cbcc xmads >>> + if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && >>> + (i->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == >>> NV50_IR_SUBOP_XMAD_CBCC) >>> + return false; >>> + // constant buffer loads for the third operand can't be used with >>> psl/mrg xmads >>> + if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 && >>> + (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG))) >>> + return false; >>> + // for xmads, immediates can't have the h1 flag set >>> + if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && s < 2 && >>> + i->subOp & NV50_IR_SUBOP_XMAD_H1(s)) >>> + return false; >>> >>> for (int k = 0; i->srcExists(k); ++k) { >>> if (i->src(k).getFile() == FILE_IMMEDIATE) { >>> @@ -448,6 +460,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) >>> const >>> return false; >>> if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) >>> return false; >>> + if (op == OP_XMAD) >>> + return false; >>> return true; >>> } >>> >>> @@ -467,6 +481,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int >>> s, Modifier mod) const >>> case OP_XOR: >>> case OP_POPCNT: >>> case OP_BFIND: >>> + case OP_XMAD: >>> break; >>> case OP_SET: >>> if (insn->sType != TYPE_F32) >>> -- >>> 2.14.4 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev