On Wed, Feb 17, 2016 at 3:05 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > (This is commit 4a1c8a3037cd29938b2a6e2c680c341e9903cfbe for vec4 mode.) > > Using the push model for inputs is much more efficient than pulling > inputs - the hardware can simply copy a large chunk into URB registers > at thread creation time, rather than having the thread send messages to > request data from the L3 cache. Unfortunately, it's possible to have > more TES inputs than fit in registers, so we have to fall back to the > pull model in some cases. > > However, it turns out that most tessellation evaluation shaders are > fairly simple, and don't use many inputs. An arbitrary cut-off of > 24 vec4 slots (12 registers) should suffice. (I chose this instead of > the 32 vec4 slots used in the scalar backend to avoid regressing a few > Piglit tests due to the vec4 register allocator being too stupid to > figure out what to do. We probably ought to fix that, but it's a > separate issue.) > > Improves performance in GPUTest's tessmark_x64 microbenchmark by > 41.5394% +/- 0.288519% (n = 115) at 1024x768 on my Clevo W740SU > (with Iris Pro 5200). > > Improves performance in Synmark's Gl40TerrainFlyTess microbenchmark by > 38.3576% +/- 0.759748% (n = 42). > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 4 +- > src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 86 > +++++++++++++++++++----------- > 2 files changed, 56 insertions(+), 34 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index 74ec4f0..9b721e5 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -685,9 +685,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > case nir_intrinsic_load_instance_id: > case nir_intrinsic_load_base_instance: > case nir_intrinsic_load_draw_id: > - case nir_intrinsic_load_invocation_id: > - case nir_intrinsic_load_tess_level_inner: > - case nir_intrinsic_load_tess_level_outer: { > + case nir_intrinsic_load_invocation_id: { > gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); > src_reg val = src_reg(nir_system_values[sv]); > assert(val.file != BAD_FILE); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp > index ce5fefc..90cbd2b8 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp > @@ -28,6 +28,7 @@ > */ > > #include "brw_vec4_tes.h" > +#include "brw_cfg.h" > > namespace brw { > > @@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, > const glsl_type *type) > void > vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr > *instr) > { > - const struct brw_tes_prog_data *tes_prog_data = > - (const struct brw_tes_prog_data *) prog_data; > - > switch (instr->intrinsic) { > - case nir_intrinsic_load_tess_level_outer: { > - dst_reg dst(this, glsl_type::vec4_type); > - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst; > - > - dst_reg temp(this, glsl_type::vec4_type); > - vec4_instruction *read = > - emit(VEC4_OPCODE_URB_READ, temp, input_read_header); > - read->offset = 1; > - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; > - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); > + case nir_intrinsic_load_tess_level_outer: > + case nir_intrinsic_load_tess_level_inner: > break; > - } > - case nir_intrinsic_load_tess_level_inner: { > - dst_reg dst(this, glsl_type::vec2_type); > - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst; > - > - /* Set up the message header to reference the proper parts of the URB > */ > - dst_reg temp(this, glsl_type::vec4_type); > - vec4_instruction *read = > - emit(VEC4_OPCODE_URB_READ, temp, input_read_header); > - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; > - if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { > - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); > - } else { > - read->offset = 1; > - emit(MOV(dst, src_reg(temp))); > - } > - break; > - } > default: > vec4_visitor::nir_setup_system_value_intrinsic(instr); > } > @@ -105,6 +77,27 @@ vec4_tes_visitor::setup_payload() > > reg = setup_uniforms(reg); > > + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > + for (int i = 0; i < 3; i++) { > + if (inst->src[i].file != ATTR) > + continue; > + > + struct brw_reg grf = > + brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % > 2)); > + grf = stride(grf, 0, 4, 1); > + grf.swizzle = inst->src[i].swizzle; > + grf.type = inst->src[i].type; > + if (inst->src[i].abs) > + grf = brw_abs(grf); > + if (inst->src[i].negate) > + grf = negate(grf);
Just grf.abs = inst->src[i].abs; grf.negate = inst->src[i].negate; Otherwise, Reviewed-by: Matt Turner <matts...@gmail.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev