This patch is: Reviewied-By: Karol Herbst <kher...@redhat.com>
forgot to add that On Mon, Jul 23, 2018 at 11:40 AM, Rhys Perry <pendingchao...@gmail.com> wrote: > This hits the shader-db numbers a good bit, though a few xmads is way > faster than an imul or imad and the cost is mitigated by the next commit, > which optimizes many multiplications by immediates into shorter and less > register heavy instructions than the xmads. > > total instructions in shared programs : 5787704 -> 5839715 (0.90%) > total gprs used in shared programs : 669878 -> 670553 (0.10%) > total shared used in shared programs : 548832 -> 548832 (0.00%) > total local used in shared programs : 21068 -> 21164 (0.46%) > > local shared gpr inst bytes > helped 0 0 39 0 0 > hurt 1 0 365 3076 3076 > > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > --- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 56 > ++++++++++++++++++++++ > .../nouveau/codegen/nv50_ir_target_gm107.cpp | 1 - > 2 files changed, 56 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 6deea7a360..a6ddb284b8 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -2292,13 +2292,18 @@ AlgebraicOpt::visit(BasicBlock *bb) > // > ============================================================================= > > // ADD(SHL(a, b), c) -> SHLADD(a, b, c) > +// MUL(a, b) -> a few XMADs > +// MAD/FMA(a, b, c) -> a few XMADs > class LateAlgebraicOpt : public Pass > { > private: > virtual bool visit(Instruction *); > > void handleADD(Instruction *); > + void handleMULMAD(Instruction *); > bool tryADDToSHLADD(Instruction *); > + > + BuildUtil bld; > }; > > void > @@ -2359,6 +2364,52 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add) > return true; > } > > +// MUL(a, b) -> a few XMADs > +// MAD/FMA(a, b, c) -> a few XMADs > +void > +LateAlgebraicOpt::handleMULMAD(Instruction *i) > +{ > + // TODO: handle NV50_IR_SUBOP_MUL_HIGH > + if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32)) > + return; > + if (isFloatType(i->dType) || typeSizeof(i->dType) != 4) > + return; > + if (i->subOp || i->usesFlags() || i->flagsDef >= 0) > + return; > + > + assert(!i->src(0).mod); > + assert(!i->src(1).mod); > + assert(i->op == OP_MUL ? 1 : !i->src(2).mod); > + > + bld.setPosition(i, false); > + > + Value *a = i->getSrc(0); > + Value *b = i->getSrc(1); > + Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2); > + > + Value *tmp0 = bld.getSSA(); > + Value *tmp1 = bld.getSSA(); > + > + Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c); > + insn->setPredicate(i->cc, i->getPredicate()); > + > + insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0)); > + insn->setPredicate(i->cc, i->getPredicate()); > + insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1); > + > + Value *pred = i->getPredicate(); > + i->setPredicate(i->cc, NULL); > + > + i->op = OP_XMAD; > + i->setSrc(0, b); > + i->setSrc(1, tmp1); > + i->setSrc(2, tmp0); > + i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC; > + i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1); > + > + i->setPredicate(i->cc, pred); > +} > + > bool > LateAlgebraicOpt::visit(Instruction *i) > { > @@ -2366,6 +2417,11 @@ LateAlgebraicOpt::visit(Instruction *i) > case OP_ADD: > handleADD(i); > break; > + case OP_MUL: > + case OP_MAD: > + case OP_FMA: > + handleMULMAD(i); > + break; > default: > break; > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > index bb1c234c43..edb823afb4 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > @@ -166,7 +166,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) > const > } > break; > case OPCLASS_ARITH: > - // TODO: IMUL/IMAD require barriers too, use of XMAD instead! > if ((insn->op == OP_MUL || insn->op == OP_MAD) && > !isFloatType(insn->dType)) > return true; > -- > 2.14.4 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev