On Mon, Jan 28, 2019 at 4:21 PM Karol Herbst <kher...@redhat.com> wrote: > > gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and > gp107. > > reduces the amount of generated convert instructions by roughly 30% in > shader-db. > > v2: only for 32 bit operations > move some common code out of the switch > handle OP_SAT with modifiers > v3: only for registers and const memory > rework if clauses > merge isCvt into this patch > > Signed-off-by: Karol Herbst <kher...@redhat.com> > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + > .../drivers/nouveau/codegen/nv50_ir_inlines.h | 17 +++++ > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 64 +++++++++++++++++++ > .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + > 4 files changed, 83 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index 8085bb2f542..b00a005bdef 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -867,6 +867,7 @@ public: > inline bool isPseudo() const { return op < OP_MOV; } > bool isDead() const; > bool isNop() const; > + inline bool isCvt() const; > bool isCommutationLegal(const Instruction *) const; // must be adjacent ! > bool isActionEqual(const Instruction *) const; > bool isResultEqual(const Instruction *) const; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h > index 4cb53ab42ed..06882058dc9 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h > @@ -419,4 +419,21 @@ LValue *Function::getLValue(int id) > return reinterpret_cast<LValue *>(allLValues.get(id)); > } > > +bool Instruction::isCvt() const > +{ > + switch (op) { > + case OP_ABS: > + case OP_CEIL: > + case OP_FLOOR: > + case OP_NEG: > + case OP_TRUNC: > + case OP_SAT: > + return true; > + case OP_CVT: > + return def(0).getFile() != FILE_PREDICATE && src(0).getFile() != > FILE_PREDICATE;
Maybe better to whitelist the things you like? E.g. FILE_FLAGS is probably also not appropriate for this? And I know this is a left-over from your earlier change to print this out more generically, however I'd just fold this into lowering_nvc0 somewhere. (Is it needed at all? The current logic should reject anythign it doesn't like anyways...) > + default: > + return false; > + } > +} > + > #endif // __NV50_IR_INLINES_H__ > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index 295497be2f9..28f0aae6432 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -719,6 +719,67 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) > bb->remove(bb->getEntry()); > } > > +// replaces instructions which would end up as f2f or i2i with faster > +// alternatives: > +// - abs(a) -> add(0, abs a) > +// - fneg(a) -> fadd(neg 0, neg a) > +// - ineg(a) -> iadd(0, neg a) > +// - fneg(abs a) -> fadd(neg 0, neg abs a) > +// - ineg(abs a) -> iadd(0, neg abs a) > +// - sat(a) -> sat add(0, a) > +void > +NVC0LegalizePostRA::replaceCvt(Instruction *cvt) > +{ > + if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4) > + return; > + if (cvt->sType != cvt->dType) > + return; > + // we could make it work, but in this case we have optimizations disabled > + // and we don't really care either way. > + if (cvt->src(0).getFile() != FILE_GPR && > + cvt->src(0).getFile() != FILE_MEMORY_CONST) > + return; > + > + Modifier mod0, mod1; > + > + switch (cvt->op) { > + case OP_ABS: > + if (cvt->src(0).mod) > + return; > + if (!isFloatType(cvt->sType)) > + return; > + mod0 = 0; > + mod1 = NV50_IR_MOD_ABS; > + break; > + case OP_NEG: > + if (!isFloatType(cvt->sType) && cvt->src(0).mod) > + return; > + if (isFloatType(cvt->sType) && > + (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS))) > + return; > + > + mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0; > + mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS) > + ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG; I'd much prefer the ? on the previous line. > + break; > + case OP_SAT: > + if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs()) > + return; > + mod0 = 0; > + mod1 = cvt->src(0).mod; > + cvt->saturate = true; > + break; > + default: > + return; > + } > + > + cvt->op = OP_ADD; > + cvt->moveSources(0, 1); > + cvt->setSrc(0, rZero); > + cvt->src(0).mod = mod0; > + cvt->src(1).mod = mod1; > +} > + > bool > NVC0LegalizePostRA::visit(BasicBlock *bb) > { > @@ -758,6 +819,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) > next = hi; > } > > + if (i->isCvt()) > + replaceCvt(i); > + > if (i->op != OP_MOV && i->op != OP_PFETCH) > replaceZero(i); > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > index e0f50ab0904..4679c56471b 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > @@ -81,6 +81,7 @@ private: > virtual bool visit(Function *); > virtual bool visit(BasicBlock *); > > + void replaceCvt(Instruction *); > void replaceZero(Instruction *); > bool tryReplaceContWithBra(BasicBlock *); > void propagateJoin(BasicBlock *); > -- > 2.20.1 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev