On 2/27/19 4:45 AM, Iago Toral Quiroga wrote: > Now that we propagate constants to the first source of 2src instructions we > see more opportunities of constant folding in the backend.
All the benefit of the series is from more constant folding? Once upon a time, I had a patch that added another call to nir_opt_constant_folding after we call nir_opt_algebraic_late. My recollection is that it hurt vec4 shaders, but it helped scalar shaders quite a bit. How does doing that affect these results? Hrm... I can collect that data. > Shader-db results on KBL: > > total instructions in shared programs: 14965607 -> 14855983 (-0.73%) > instructions in affected programs: 3988102 -> 3878478 (-2.75%) > helped: 14292 > HURT: 59 > > total cycles in shared programs: 344324295 -> 340656008 (-1.07%) > cycles in affected programs: 247527740 -> 243859453 (-1.48%) > helped: 14056 > HURT: 3314 > > total loops in shared programs: 4283 -> 4283 (0.00%) > loops in affected programs: 0 -> 0 > helped: 0 > HURT: 0 > > total spills in shared programs: 27812 -> 24350 (-12.45%) > spills in affected programs: 24921 -> 21459 (-13.89%) > helped: 345 > HURT: 19 > > total fills in shared programs: 24173 -> 22032 (-8.86%) > fills in affected programs: 21124 -> 18983 (-10.14%) > helped: 355 > HURT: 25 > > LOST: 0 > GAINED: 5 > --- > src/intel/compiler/brw_fs.cpp | 203 ++++++++++++++++++++++++++++++++-- > 1 file changed, 195 insertions(+), 8 deletions(-) > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index 2358acbeb59..b2b60237c82 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -2583,9 +2583,55 @@ fs_visitor::opt_algebraic() > break; > > case BRW_OPCODE_MUL: > - if (inst->src[1].file != IMM) > + if (inst->src[0].file != IMM && inst->src[1].file != IMM) > continue; > > + /* Constant folding */ > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + assert(inst->src[0].type == inst->src[1].type); > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_HF: { > + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu); > + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu); > + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 * v2)); > + break; > + } > + case BRW_REGISTER_TYPE_W: { > + int16_t v1 = inst->src[0].ud & 0xffffu; > + int16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_w(v1 * v2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_uw(v1 * v2); > + break; > + } > + case BRW_REGISTER_TYPE_F: > + inst->src[0].f *= inst->src[1].f; > + break; > + case BRW_REGISTER_TYPE_D: > + inst->src[0].d *= inst->src[1].d; > + break; > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud *= inst->src[1].ud; > + break; > + default: > + local_progress = false; > + break; > + }; > + > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > + > + > /* a * 1.0 = a */ > if (inst->src[1].is_one()) { > inst->opcode = BRW_OPCODE_MOV; > @@ -2594,6 +2640,14 @@ fs_visitor::opt_algebraic() > break; > } > > + if (inst->src[0].is_one()) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + > /* a * -1.0 = -a */ > if (inst->src[1].is_negative_one()) { > inst->opcode = BRW_OPCODE_MOV; > @@ -2603,27 +2657,160 @@ fs_visitor::opt_algebraic() > break; > } > > - if (inst->src[0].file == IMM) { > - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); > + if (inst->src[0].is_negative_one()) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[0].negate = !inst->src[1].negate; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + > + /* a * 0 = 0 (this is not exact for floating point) */ > + if (inst->src[1].is_zero() && > + brw_reg_type_is_integer(inst->src[1].type)) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + > + if (inst->src[0].is_zero() && > + brw_reg_type_is_integer(inst->src[0].type)) { > inst->opcode = BRW_OPCODE_MOV; > - inst->src[0].f *= inst->src[1].f; > inst->src[1] = reg_undef; > progress = true; > break; > } > break; > case BRW_OPCODE_ADD: > - if (inst->src[1].file != IMM) > + if (inst->src[0].file != IMM && inst->src[1].file != IMM) > continue; > > - if (inst->src[0].file == IMM) { > - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); > + /* Constant folding */ > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + assert(inst->src[0].type == inst->src[1].type); > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_HF: { > + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu); > + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu); > + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 + v2)); > + break; > + } > + case BRW_REGISTER_TYPE_W: { > + int16_t v1 = inst->src[0].ud & 0xffffu; > + int16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_w(v1 + v2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_uw(v1 + v2); > + break; > + } > + case BRW_REGISTER_TYPE_F: > + inst->src[0].f += inst->src[1].f; > + break; > + case BRW_REGISTER_TYPE_D: > + inst->src[0].d += inst->src[1].d; > + break; > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud += inst->src[1].ud; > + break; > + default: > + local_progress = false; > + break; > + }; > + > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > + > + /* a + 0 = a (this is not exact for floating point) */ > + if (inst->src[1].is_zero() && > + brw_reg_type_is_integer(inst->src[1].type)) { > inst->opcode = BRW_OPCODE_MOV; > - inst->src[0].f += inst->src[1].f; > inst->src[1] = reg_undef; > progress = true; > break; > } > + > + if (inst->src[0].is_zero() && > + brw_reg_type_is_integer(inst->src[0].type)) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + break; > + case BRW_OPCODE_SHL: > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_D: > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud <<= inst->src[1].ud; > + break; > + case BRW_REGISTER_TYPE_W: > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = retype(brw_imm_uw(v1 << v2), > inst->src[0].type); > + break; > + } > + default: > + local_progress = false; > + break; > + } > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > + break; > + case BRW_OPCODE_SHR: > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_D: > + inst->src[0].d >>= inst->src[1].ud; > + break; > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud >>= inst->src[1].ud; > + break; > + case BRW_REGISTER_TYPE_W: { > + int16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_w(v1 >> v2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_uw(v1 >> v2); > + break; > + } > + default: > + local_progress = false; > + break; > + } > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > break; > case BRW_OPCODE_OR: > if (inst->src[0].equals(inst->src[1]) || _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev