On Fri, Aug 10, 2018 at 8:36 PM, Rhys Perry <pendingchao...@gmail.com> wrote: > Yeah, "immediate = false;" looks incorrect. > > Looking at nvdisasm and the placement of XMAD's various atoms, it seems it > can only have 16-bit unsigned immediates, so envydis and the patches > should probably be updated. > > As for how multiplication by immediates work with mul/mad -> XMAD > conversion, the actual conversion should be done before LoadPropagation > (so it doesn't have to worry about them and it's free of IMUL/IMAD's > limitations) and the immediates should be propagated when possible later. > > After adjusting emitXMAD and TargetNVC0::insnCanLoad, I think the problem > would be solved. >
My point was if LoadPropagation would actually load 17+ bit sized immediates into XMAD > On Thu, Aug 9, 2018 at 11:32 PM, Karol Herbst <kher...@redhat.com> wrote: >> On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchao...@gmail.com> >> wrote: >>> Signed-off-by: Rhys Perry <pendingchao...@gmail.com> >>> --- >>> .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 65 >>> ++++++++++++++++++++++ >>> .../nouveau/codegen/nv50_ir_target_gm107.cpp | 6 +- >>> .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + >>> 3 files changed, 71 insertions(+), 1 deletion(-) >>> >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> index 1d31f181e4..c3d7be0f0e 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> @@ -156,6 +156,7 @@ private: >>> void emitIMUL(); >>> void emitIMAD(); >>> void emitISCADD(); >>> + void emitXMAD(); >>> void emitIMNMX(); >>> void emitICMP(); >>> void emitISET(); >>> @@ -1892,6 +1893,67 @@ CodeEmitterGM107::emitISCADD() >>> emitGPR (0x00, insn->def(0)); >>> } >>> >>> +void >>> +CodeEmitterGM107::emitXMAD() >>> +{ >>> + assert(insn->src(0).getFile() == FILE_GPR); >>> + >>> + bool constbuf = false; >>> + bool psl_mrg = true; >>> + bool immediate = false; >>> + if (insn->src(2).getFile() == FILE_MEMORY_CONST) { >>> + assert(insn->src(1).getFile() == FILE_GPR); >>> + constbuf = true; >>> + psl_mrg = false; >>> + emitInsn(0x51000000); >>> + emitGPR(0x27, insn->src(1)); >>> + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); >>> + } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) { >>> + assert(insn->src(2).getFile() == FILE_GPR); >>> + constbuf = true; >>> + emitInsn(0x4e000000); >>> + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); >>> + emitGPR(0x27, insn->src(2)); >>> + } else if (insn->src(1).getFile() == FILE_IMMEDIATE) { >>> + assert(insn->src(2).getFile() == FILE_GPR); >>> + assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1))); >>> + immediate = false; >> >> has to be immediate = true; >> >>> + emitInsn(0x36000000); >>> + emitIMMD(0x14, 19, insn->src(1)); >> >> we can only do 16 bit sized immediates with XMAD I think. I think we >> also have to adjust the target so that those don't get load >> propagated? How does this works out for mul/mad -> XMAD conversions >> anyway? We might want to recheck that we actually do the right thing >> there actually (or maybe it doesn't come up, still, would be nice to >> fix it inside the target in case it is actually buggy). >> >>> + emitGPR(0x27, insn->src(2)); >>> + } else { >>> + assert(insn->src(1).getFile() == FILE_GPR); >>> + assert(insn->src(2).getFile() == FILE_GPR); >>> + emitInsn(0x5b000000); >>> + emitGPR(0x14, insn->src(1)); >>> + emitGPR(0x27, insn->src(2)); >>> + } >>> + >>> + if (psl_mrg) >>> + emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3); >>> + >>> + unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK); >>> + cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT; >>> + emitField(0x32, constbuf ? 2 : 3, cmode); >>> + >>> + emitX(constbuf ? 0x36 : 0x26); >>> + emitCC(0x2f); >>> + >>> + emitGPR(0x0, insn->def(0)); >>> + emitGPR(0x8, insn->src(0)); >>> + >>> + // source flags >>> + if (isSignedType(insn->sType)) { >>> + uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK; >>> + emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT); >>> + } >>> + emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0); >>> + if (!immediate) { >>> + bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1); >>> + emitField(constbuf ? 0x34 : 0x23, 1, h1); >>> + } >>> +} >>> + >>> void >>> CodeEmitterGM107::emitIMNMX() >>> { >>> @@ -3266,6 +3328,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) >>> case OP_SHLADD: >>> emitISCADD(); >>> break; >>> + case OP_XMAD: >>> + emitXMAD(); >>> + break; >>> case OP_MIN: >>> case OP_MAX: >>> if (isFloatType(insn->dType)) { >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> index 7293fb27dd..bb1c234c43 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp >>> @@ -60,8 +60,11 @@ TargetGM107::isOpSupported(operation op, DataType ty) >>> const >>> case OP_SQRT: >>> case OP_DIV: >>> case OP_MOD: >>> - case OP_XMAD: >>> return false; >>> + case OP_XMAD: >>> + if (isFloatType(ty)) >>> + return false; >>> + break; >>> default: >>> break; >>> } >>> @@ -231,6 +234,7 @@ TargetGM107::getLatency(const Instruction *insn) const >>> case OP_SUB: >>> case OP_VOTE: >>> case OP_XOR: >>> + case OP_XMAD: >>> if (insn->dType != TYPE_F64) >>> return 6; >>> break; >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> index 7e66d2950b..5257f353e4 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> @@ -161,6 +161,7 @@ static const struct opProperties _initPropsGM107[] = { >>> { OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, >>> { OP_SUREDB, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, >>> { OP_SUREDP, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, >>> + { OP_XMAD, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, >>> }; >>> >>> void TargetNVC0::initProps(const struct opProperties *props, int size) >>> -- >>> 2.14.4 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev