This approach is generally right but implemented in the wrong place.
This "lowerPOW" happens pre-ssa. What actually needs to happen is that
this type of optimization is done at SSA time as part of
ConstantFolding. And the fallback for POW should be implemented as
part of the "legalize" step. That will potentially miss some
opportunities for CSE in case the same number is exponentiated to
different powers, but I think that's acceptable.

On Thu, Jun 22, 2017 at 3:28 PM, Karol Herbst <karolher...@gmail.com> wrote:
> If the exponent is a small integer immediate value, we can lower POW to
> MULs instead to save a few instructions. Also MUL instructions execute
> faster than what we lower POW in the default case to.
>
> score change for GpuTest /test=pixmark_piano /benchmark /no_scorebox
> /msaa=0 /benchmark_duration_ms=60000 /width=1024 /height=640:
> 1045 -> 1060
>
> changes in shader-db:
> total instructions in shared programs : 4350261 -> 4349451 (-0.02%)
> total gprs used in shared programs    : 525853 -> 525861 (0.00%)
> total local used in shared programs   : 30081 -> 30081 (0.00%)
> total bytes used in shared programs   : 39865176 -> 39857712 (-0.02%)
>
>                 local        gpr       inst      bytes
>     helped           0           4         313         313
>       hurt           0          12           1           1
>
> Signed-off-by: Karol Herbst <karolher...@gmail.com>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_build_util.cpp | 49 
> ++++++++++++++++++++++
>  1 file changed, 49 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> index 5756e1b4d4..b31dcec1ab 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> @@ -640,6 +640,55 @@ bool
>  BuildUtil::lowerPOW(Instruction *i)
>  {
>     LValue *val = getScratch();
> +   ImmediateValue imm;
> +
> +   if (i->src(1).getImmediate(imm)) {
> +      Value *src = i->getSrc(0);
> +      if (imm.isInteger(0)) {
> +         i->op = OP_MOV;
> +         i->setSrc(0, loadImm(NULL, 1));
> +         i->setSrc(1, NULL);
> +         return true;
> +      } else
> +      if (imm.isInteger(1)) {
> +         i->op = OP_MOV;
> +         i->setSrc(1, NULL);
> +         return true;
> +      } else
> +      if (imm.isInteger(2)) {
> +         i->op = OP_MUL;
> +         i->setSrc(1, src);
> +         return true;
> +      } else
> +      if (imm.isInteger(3)) {
> +         mkOp2(OP_MUL, i->dType, val, src, src);
> +         i->op = OP_MUL;
> +         i->setSrc(1, val);
> +         return true;
> +      } else
> +      if (imm.isInteger(4)) {
> +         mkOp2(OP_MUL, i->dType, val, src, src);
> +         i->op = OP_MUL;
> +         i->setSrc(0, val);
> +         i->setSrc(1, val);
> +         return true;
> +      } else
> +      if (imm.isInteger(5)) {
> +         mkOp2(OP_MUL, i->dType, val, src, src);
> +         mkOp2(OP_MUL, i->dType, val, val, val);
> +         i->op = OP_MUL;
> +         i->setSrc(1, val);
> +         return true;
> +      } else
> +      if (imm.isInteger(8)) {
> +         mkOp2(OP_MUL, i->dType, val, src, src);
> +         mkOp2(OP_MUL, i->dType, val, val, val);
> +         i->op = OP_MUL;
> +         i->setSrc(0, val);
> +         i->setSrc(1, val);
> +         return true;
> +      }
> +   }
>
>     mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
>     mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
> --
> 2.13.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to