Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>
On Mon, Jan 28, 2019 at 5:47 PM Karol Herbst <kher...@redhat.com> wrote: > > gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and > gp107. > > reduces the amount of generated convert instructions by roughly 30% in > shader-db. > > v2: only for 32 bit operations > move some common code out of the switch > handle OP_SAT with modifiers > v3: only for registers and const memory > rework if clauses > merge isCvt into this patch > v4: merge isCvt into its use > > Signed-off-by: Karol Herbst <kher...@redhat.com> > --- > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 64 +++++++++++++++++++ > .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + > 2 files changed, 65 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index 295497be2f9..1d122d1ebdc 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -719,6 +719,67 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) > bb->remove(bb->getEntry()); > } > > +// replaces instructions which would end up as f2f or i2i with faster > +// alternatives: > +// - abs(a) -> add(0, abs a) > +// - fneg(a) -> fadd(neg 0, neg a) > +// - ineg(a) -> iadd(0, neg a) > +// - fneg(abs a) -> fadd(neg 0, neg abs a) > +// - ineg(abs a) -> iadd(0, neg abs a) > +// - sat(a) -> sat add(0, a) > +void > +NVC0LegalizePostRA::replaceCvt(Instruction *cvt) > +{ > + if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4) > + return; > + if (cvt->sType != cvt->dType) > + return; > + // we could make it work, but in this case we have optimizations disabled > + // and we don't really care either way. > + if (cvt->src(0).getFile() != FILE_GPR && > + cvt->src(0).getFile() != FILE_MEMORY_CONST) > + return; > + > + Modifier mod0, mod1; > + > + switch (cvt->op) { > + case OP_ABS: > + if (cvt->src(0).mod) > + return; > + if (!isFloatType(cvt->sType)) > + return; > + mod0 = 0; > + mod1 = NV50_IR_MOD_ABS; > + break; > + case OP_NEG: > + if (!isFloatType(cvt->sType) && cvt->src(0).mod) > + return; > + if (isFloatType(cvt->sType) && > + (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS))) > + return; > + > + mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0; > + mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS) ? > + NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG; > + break; > + case OP_SAT: > + if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs()) > + return; > + mod0 = 0; > + mod1 = cvt->src(0).mod; > + cvt->saturate = true; > + break; > + default: > + return; > + } > + > + cvt->op = OP_ADD; > + cvt->moveSources(0, 1); > + cvt->setSrc(0, rZero); > + cvt->src(0).mod = mod0; > + cvt->src(1).mod = mod1; > +} > + > bool > NVC0LegalizePostRA::visit(BasicBlock *bb) > { > @@ -758,6 +819,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) > next = hi; > } > > + if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS) > + replaceCvt(i); > + > if (i->op != OP_MOV && i->op != OP_PFETCH) > replaceZero(i); > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > index e0f50ab0904..4679c56471b 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > @@ -81,6 +81,7 @@ private: > virtual bool visit(Function *); > virtual bool visit(BasicBlock *); > > + void replaceCvt(Instruction *); > void replaceZero(Instruction *); > bool tryReplaceContWithBra(BasicBlock *); > void propagateJoin(BasicBlock *); > -- > 2.20.1 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev