Yeah, "immediate = false;" looks incorrect. Looking at nvdisasm and the placement of XMAD's various atoms, it seems it can only have 16-bit unsigned immediates, so envydis and the patches should probably be updated.
As for how multiplication by immediates work with mul/mad -> XMAD conversion, the actual conversion should be done before LoadPropagation (so it doesn't have to worry about them and it's free of IMUL/IMAD's limitations) and the immediates should be propagated when possible later. After adjusting emitXMAD and TargetNVC0::insnCanLoad, I think the problem would be solved. On Thu, Aug 9, 2018 at 11:32 PM, Karol Herbst <kher...@redhat.com> wrote: > On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchao...@gmail.com> wrote: >> Signed-off-by: Rhys Perry <pendingchao...@gmail.com> >> --- >> .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 65 >> ++++++++++++++++++++++ >> .../nouveau/codegen/nv50_ir_target_gm107.cpp | 6 +- >> .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + >> 3 files changed, 71 insertions(+), 1 deletion(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> index 1d31f181e4..c3d7be0f0e 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> @@ -156,6 +156,7 @@ private: >> void emitIMUL(); >> void emitIMAD(); >> void emitISCADD(); >> + void emitXMAD(); >> void emitIMNMX(); >> void emitICMP(); >> void emitISET(); >> @@ -1892,6 +1893,67 @@ CodeEmitterGM107::emitISCADD() >> emitGPR (0x00, insn->def(0)); >> } >> >> +void >> +CodeEmitterGM107::emitXMAD() >> +{ >> + assert(insn->src(0).getFile() == FILE_GPR); >> + >> + bool constbuf = false; >> + bool psl_mrg = true; >> + bool immediate = false; >> + if (insn->src(2).getFile() == FILE_MEMORY_CONST) { >> + assert(insn->src(1).getFile() == FILE_GPR); >> + constbuf = true; >> + psl_mrg = false; >> + emitInsn(0x51000000); >> + emitGPR(0x27, insn->src(1)); >> + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); >> + } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) { >> + assert(insn->src(2).getFile() == FILE_GPR); >> + constbuf = true; >> + emitInsn(0x4e000000); >> + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); >> + emitGPR(0x27, insn->src(2)); >> + } else if (insn->src(1).getFile() == FILE_IMMEDIATE) { >> + assert(insn->src(2).getFile() == FILE_GPR); >> + assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1))); >> + immediate = false; > > has to be immediate = true; > >> + emitInsn(0x36000000); >> + emitIMMD(0x14, 19, insn->src(1)); > > we can only do 16 bit sized immediates with XMAD I think. I think we > also have to adjust the target so that those don't get load > propagated? How does this works out for mul/mad -> XMAD conversions > anyway? We might want to recheck that we actually do the right thing > there actually (or maybe it doesn't come up, still, would be nice to > fix it inside the target in case it is actually buggy). > >> + emitGPR(0x27, insn->src(2)); >> + } else { >> + assert(insn->src(1).getFile() == FILE_GPR); >> + assert(insn->src(2).getFile() == FILE_GPR); >> + emitInsn(0x5b000000); >> + emitGPR(0x14, insn->src(1)); >> + emitGPR(0x27, insn->src(2)); >> + } >> + >> + if (psl_mrg) >> + emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3); >> + >> + unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK); >> + cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT; >> + emitField(0x32, constbuf ? 2 : 3, cmode); >> + >> + emitX(constbuf ? 0x36 : 0x26); >> + emitCC(0x2f); >> + >> + emitGPR(0x0, insn->def(0)); >> + emitGPR(0x8, insn->src(0)); >> + >> + // source flags >> + if (isSignedType(insn->sType)) { >> + uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK; >> + emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT); >> + } >> + emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0); >> + if (!immediate) { >> + bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1); >> + emitField(constbuf ? 0x34 : 0x23, 1, h1); >> + } >> +} >> + >> void >> CodeEmitterGM107::emitIMNMX() >> { >> @@ -3266,6 +3328,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) >> case OP_SHLADD: >> emitISCADD(); >> break; >> + case OP_XMAD: >> + emitXMAD(); >> + break; >> case OP_MIN: >> case OP_MAX: >> if (isFloatType(insn->dType)) { >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >> index 7293fb27dd..bb1c234c43 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >> @@ -60,8 +60,11 @@ TargetGM107::isOpSupported(operation op, DataType ty) >> const >> case OP_SQRT: >> case OP_DIV: >> case OP_MOD: >> - case OP_XMAD: >> return false; >> + case OP_XMAD: >> + if (isFloatType(ty)) >> + return false; >> + break; >> default: >> break; >> } >> @@ -231,6 +234,7 @@ TargetGM107::getLatency(const Instruction *insn) const >> case OP_SUB: >> case OP_VOTE: >> case OP_XOR: >> + case OP_XMAD: >> if (insn->dType != TYPE_F64) >> return 6; >> break; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> index 7e66d2950b..5257f353e4 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> @@ -161,6 +161,7 @@ static const struct opProperties _initPropsGM107[] = { >> { OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, >> { OP_SUREDB, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, >> { OP_SUREDP, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, >> + { OP_XMAD, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, >> }; >> >> void TargetNVC0::initProps(const struct opProperties *props, int size) >> -- >> 2.14.4 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev