sample_d is slower than the lowered version on gen7. For gen7, this improves Xonotic benchmark with Ultimate effects by as much as 25%:
before the change: 40.06 fps after the change: 51.10 fps after the change with INTEL_DEBUG=no16: 44.46 fps As sample_d is not allowed in SIMD16 mode, I firstly thought the difference was from SIMD8 versus SIMD16. If that was the case, we would want to apply brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode. But, as the numbers show, there is still 10% improvement when SIMD16 is forced off after the change. Thus textureGrad() is lowered unconditionally for now. Due to this and that I haven't tried it on Haswell, this is still RFC. No piglit regressions. Signed-off-by: Chia-I Wu <olva...@gmail.com> --- .../dri/i965/brw_lower_texture_gradients.cpp | 54 ++++++++++++++-------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp index 1589a20..f3fcb56 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp @@ -34,8 +34,8 @@ using namespace ir_builder; class lower_texture_grad_visitor : public ir_hierarchical_visitor { public: - lower_texture_grad_visitor(bool has_sample_d_c) - : has_sample_d_c(has_sample_d_c) + lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c) + : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c) { progress = false; } @@ -44,6 +44,7 @@ public: bool progress; + bool has_sample_d; bool has_sample_d_c; private: @@ -90,22 +91,33 @@ txs_type(const glsl_type *type) ir_visitor_status lower_texture_grad_visitor::visit_leave(ir_texture *ir) { - /* Only lower textureGrad with shadow samplers */ - if (ir->op != ir_txd || !ir->shadow_comparitor) + if (ir->op != ir_txd) return visit_continue; - /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c - * message. GLSL provides gradients for the 'r' coordinate. Unfortunately: - * - * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description: - * "The r coordinate contains the faceid, and the r gradients are ignored - * by hardware." - * - * We likely need to do a similar treatment for samplerCube and - * samplerCubeArray, but we have insufficient testing for that at the moment. - */ - bool need_lowering = !has_sample_d_c || - ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE; + bool need_lowering = false; + + if (ir->shadow_comparitor) { + /* Lower textureGrad() with samplerCubeShadow even if we have the + * sample_d_c message. GLSL provides gradients for the 'r' coordinate. + * Unfortunately: + * + * From the Ivybridge PRM, Volume 4, Part 1, sample_d message + * description: "The r coordinate contains the faceid, and the r + * gradients are ignored by hardware." + */ + if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) + need_lowering = true; + else if (!has_sample_d_c) + need_lowering = true; + } + else { + /* We likely need to do a similar treatment for samplerCube and + * samplerCubeArray, but we have insufficient testing for that at the + * moment. + */ + if (!has_sample_d) + need_lowering = true; + } if (!need_lowering) return visit_continue; @@ -154,7 +166,9 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir) expr(ir_unop_sqrt, dot(dPdy, dPdy))); } - /* lambda_base = log2(rho). We're ignoring GL state biases for now. */ + /* lambda_base = log2(rho). It will be biased and clamped by values + * defined in SAMPLER_STATE to get the final lambda. + */ ir->op = ir_txl; ir->lod_info.lod = expr(ir_unop_log2, rho); @@ -168,8 +182,12 @@ bool brw_lower_texture_gradients(struct brw_context *brw, struct exec_list *instructions) { + /* sample_d is slower than the lowered version on gen7, and is not allowed + * in SIMD16 mode. Treating it as unsupported improves the performance. + */ + bool has_sample_d = brw->gen != 7; bool has_sample_d_c = brw->gen >= 8 || brw->is_haswell; - lower_texture_grad_visitor v(has_sample_d_c); + lower_texture_grad_visitor v(has_sample_d, has_sample_d_c); visit_list_elements(&v, instructions); -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev