changes for GpuTest /test=pixmark_piano /benchmark /no_scorebox /msaa=0 /benchmark_duration_ms=60000 /width=1024 /height=640:
score: 1026 -> 1044 changes for shader-db: total instructions in shared programs : 2818606 -> 2811662 (-0.25%) total gprs used in shared programs : 379273 -> 379273 (0.00%) total local used in shared programs : 9505 -> 9505 (0.00%) total bytes used in shared programs : 25837192 -> 25773432 (-0.25%) local gpr inst bytes helped 0 0 3084 3084 hurt 0 0 0 0 v2: removed TODO reorderd to show changes without RA modification removed stale debugging print() call Signed-off-by: Karol Herbst <karolher...@gmail.com> --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 64 +++++++++++++++++++--- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 1f47ba2..bcbc0c0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2954,19 +2954,18 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb) // Fold Immediate into MAD; must be done after register allocation due to // constraint SDST == SSRC2 -// TODO: -// Does NVC0+ have other situations where this pass makes sense? class PostRaConstantFolding : public Pass { private: virtual bool visit(Instruction *); - void handleMAD(Instruction *); + void handleMADforNV50(Instruction *); + void handleMADforNVC0(Instruction *); }; // Fold Immediate into MAD; must be done after register allocation due to // constraint SDST == SSRC2 void -PostRaConstantFolding::handleMAD(Instruction *i) +PostRaConstantFolding::handleMADforNV50(Instruction *i) { if (i->def(0).getFile() != FILE_GPR || i->src(0).getFile() != FILE_GPR || @@ -3019,12 +3018,64 @@ PostRaConstantFolding::handleMAD(Instruction *i) } } +void +PostRaConstantFolding::handleMADforNVC0(Instruction *i) +{ + if (i->def(0).getFile() != FILE_GPR || + i->src(0).getFile() != FILE_GPR || + i->src(1).getFile() != FILE_GPR || + i->src(2).getFile() != FILE_GPR || + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) + return; + + int chipset = prog->getTarget()->getChipset(); + if (i->getPredicate()) { + // prior gk110 we can't do that if we have a predicate + if (chipset < NVISA_GK20A_CHIPSET) + return; + // and gk110 can't handle a cc + if (chipset < NVISA_GM107_CHIPSET && i->cc) + return; + } + + // TODO: gm107 can also do this for S32 + if (i->dType != TYPE_F32) + return; + + if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG)) + return; + + ImmediateValue val; + int s; + + if (i->src(0).getImmediate(val)) + s = 1; + else if (i->src(1).getImmediate(val)) + s = 0; + else + return; + + if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG)) + return; + + if (s == 1) + i->swapSources(0, 1); + + Instruction *imm = i->getSrc(1)->getInsn(); + i->setSrc(1, imm->getSrc(0)); + if (imm->isDead(true)) + delete_Instruction(prog, imm); +} + bool PostRaConstantFolding::visit(Instruction *i) { switch (i->op) { case OP_MAD: - handleMAD(i); + if (prog->getTarget()->getChipset() < 0xc0) + handleMADforNV50(i); + else + handleMADforNVC0(i); break; default: break; @@ -3447,8 +3498,7 @@ bool Program::optimizePostRA(int level) { RUN_PASS(2, FlatteningPass, run); - if (getTarget()->getChipset() < 0xc0) - RUN_PASS(2, PostRaConstantFolding, run); + RUN_PASS(2, PostRaConstantFolding, run); return true; } -- 2.10.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev