On 09/05/2013 03:35 AM, Chia-I Wu wrote: > sample_d is slower than the lowered version on gen7. For gen7, this improves > Xonotic benchmark with Ultimate effects by as much as 25%: > > before the change: 40.06 fps > after the change: 51.10 fps > after the change with INTEL_DEBUG=no16: 44.46 fps > > As sample_d is not allowed in SIMD16 mode, I firstly thought the difference > was from SIMD8 versus SIMD16. If that was the case, we would want to apply > brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode. > > But, as the numbers show, there is still 10% improvement when SIMD16 is forced > off after the change. Thus textureGrad() is lowered unconditionally for now. > Due to this and that I haven't tried it on Haswell, this is still RFC.
A lot of this code depends on the texture targets being used. What texture targets is Xonotic using with textureGrad? > No piglit regressions. > > Signed-off-by: Chia-I Wu <olva...@gmail.com> > --- > .../dri/i965/brw_lower_texture_gradients.cpp | 54 > ++++++++++++++-------- > 1 file changed, 36 insertions(+), 18 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > index 1589a20..f3fcb56 100644 > --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > @@ -34,8 +34,8 @@ using namespace ir_builder; > > class lower_texture_grad_visitor : public ir_hierarchical_visitor { > public: > - lower_texture_grad_visitor(bool has_sample_d_c) > - : has_sample_d_c(has_sample_d_c) > + lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c) > + : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c) > { > progress = false; > } > @@ -44,6 +44,7 @@ public: > > > bool progress; > + bool has_sample_d; > bool has_sample_d_c; > > private: > @@ -90,22 +91,33 @@ txs_type(const glsl_type *type) > ir_visitor_status > lower_texture_grad_visitor::visit_leave(ir_texture *ir) > { > - /* Only lower textureGrad with shadow samplers */ > - if (ir->op != ir_txd || !ir->shadow_comparitor) > + if (ir->op != ir_txd) > return visit_continue; > > - /* Lower textureGrad() with samplerCubeShadow even if we have the > sample_d_c > - * message. GLSL provides gradients for the 'r' coordinate. > Unfortunately: > - * > - * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description: > - * "The r coordinate contains the faceid, and the r gradients are ignored > - * by hardware." > - * > - * We likely need to do a similar treatment for samplerCube and > - * samplerCubeArray, but we have insufficient testing for that at the > moment. > - */ > - bool need_lowering = !has_sample_d_c || > - ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE; > + bool need_lowering = false; > + > + if (ir->shadow_comparitor) { > + /* Lower textureGrad() with samplerCubeShadow even if we have the > + * sample_d_c message. GLSL provides gradients for the 'r' coordinate. > + * Unfortunately: > + * > + * From the Ivybridge PRM, Volume 4, Part 1, sample_d message > + * description: "The r coordinate contains the faceid, and the r > + * gradients are ignored by hardware." > + */ > + if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) > + need_lowering = true; > + else if (!has_sample_d_c) > + need_lowering = true; This should look like the old code: need_lowering = !has_sample_d_c || ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE; > + } > + else { > + /* We likely need to do a similar treatment for samplerCube and > + * samplerCubeArray, but we have insufficient testing for that at the > + * moment. > + */ > + if (!has_sample_d) > + need_lowering = true; need_lowering = !has_sample_d; > + } > > if (!need_lowering) > return visit_continue; > @@ -154,7 +166,9 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir) > expr(ir_unop_sqrt, dot(dPdy, dPdy))); > } > > - /* lambda_base = log2(rho). We're ignoring GL state biases for now. */ > + /* lambda_base = log2(rho). It will be biased and clamped by values > + * defined in SAMPLER_STATE to get the final lambda. > + */ > ir->op = ir_txl; > ir->lod_info.lod = expr(ir_unop_log2, rho); > > @@ -168,8 +182,12 @@ bool > brw_lower_texture_gradients(struct brw_context *brw, > struct exec_list *instructions) > { > + /* sample_d is slower than the lowered version on gen7, and is not allowed > + * in SIMD16 mode. Treating it as unsupported improves the performance. > + */ > + bool has_sample_d = brw->gen != 7; Based on the data in one of the other messages, it sounds like this selector needs to be slightly more complex. > bool has_sample_d_c = brw->gen >= 8 || brw->is_haswell; > - lower_texture_grad_visitor v(has_sample_d_c); > + lower_texture_grad_visitor v(has_sample_d, has_sample_d_c); > > visit_list_elements(&v, instructions); _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev