On Wed, Jan 4, 2017 at 3:07 AM, Kenneth Graunke <kenn...@whitecape.org> wrote:
> Treating everything as scalar arrays allows us to drop a bunch of > special case input/output munging all throughout the backend. > Instead, we just need to remap the TessLevel components to the > appropriate patch URB header locations in remap_patch_urb_offsets(). > > We also switch to treating the TES input versions of these as ordinary > shader inputs rather than system values, as remap_patch_urb_offsets() > just makes everything work out without special handling. > > This regresses one Piglit test: > arb_tessellation_shader-large-uniforms/GL_TESS_CONTROL_ > SHADER-array-at-limit > > The compiler starts promoting the constant arrays assigned to gl_TessLevel* > to uniform arrays. Since the shader also has a uniform array that uses > the maximum number of uniform components, this puts it over the uniform > component limit enforced by the linker. This is arguably a bug in the > constant array promotion code (it should avoid pushing us over limits), > but is unlikely to penalize any real application. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_context.c | 2 +- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 164 > +-------------------- > src/mesa/drivers/dri/i965/brw_nir.c | 74 +++++++++- > src/mesa/drivers/dri/i965/brw_nir.h | 3 +- > .../drivers/dri/i965/brw_nir_tcs_workarounds.c | 8 +- > src/mesa/drivers/dri/i965/brw_shader.cpp | 61 +------- > src/mesa/drivers/dri/i965/brw_shader.h | 5 - > src/mesa/drivers/dri/i965/brw_tcs.c | 5 +- > src/mesa/drivers/dri/i965/brw_tes.c | 17 +-- > src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 117 +-------------- > 10 files changed, 92 insertions(+), 364 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.c > b/src/mesa/drivers/dri/i965/brw_context.c > index 45490a0f5cf..22f872fe782 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.c > +++ b/src/mesa/drivers/dri/i965/brw_context.c > @@ -672,7 +672,7 @@ brw_initialize_context_constants(struct brw_context > *brw) > if (brw->gen >= 5 || brw->is_g4x) > ctx->Const.MaxClipPlanes = 8; > > - ctx->Const.LowerTessLevel = true; > + ctx->Const.GLSLTessLevelsAsInputs = true; > ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8; > ctx->Const.LowerTESPatchVerticesIn = true; > ctx->Const.PrimitiveRestartForPatches = true; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 2ed843bd03d..8f745dff440 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -2520,78 +2520,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const > fs_builder &bld, > bld.MOV(patch_handle, > retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); > > - if (imm_offset == 0) { > - /* This is a read of gl_TessLevelInner[], which lives in the > - * Patch URB header. The layout depends on the domain. > - */ > - dst.type = BRW_REGISTER_TYPE_F; > - switch (tcs_key->tes_primitive_mode) { > - case GL_QUADS: { > - /* DWords 3-2 (reversed) */ > - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4); > - > - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, > patch_handle); > - inst->offset = 0; > - inst->mlen = 1; > - inst->size_written = 4 * REG_SIZE; > - > - /* dst.xy = tmp.wz */ > - bld.MOV(dst, offset(tmp, bld, 3)); > - bld.MOV(offset(dst, bld, 1), offset(tmp, bld, 2)); > - break; > - } > - case GL_TRIANGLES: > - /* DWord 4; hardcode offset = 1 and size_written = > REG_SIZE */ > - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, > patch_handle); > - inst->offset = 1; > - inst->mlen = 1; > - inst->size_written = REG_SIZE; > - break; > - case GL_ISOLINES: > - /* All channels are undefined. */ > - break; > - default: > - unreachable("Bogus tessellation domain"); > - } > - } else if (imm_offset == 1) { > - /* This is a read of gl_TessLevelOuter[], which lives in the > - * Patch URB header. The layout depends on the domain. > - */ > - dst.type = BRW_REGISTER_TYPE_F; > - > - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4); > - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, > patch_handle); > - inst->offset = 1; > - inst->mlen = 1; > - inst->size_written = 4 * REG_SIZE; > - > - /* Reswizzle: WZYX */ > - fs_reg srcs[4] = { > - offset(tmp, bld, 3), > - offset(tmp, bld, 2), > - offset(tmp, bld, 1), > - offset(tmp, bld, 0), > - }; > - > - unsigned num_components; > - switch (tcs_key->tes_primitive_mode) { > - case GL_QUADS: > - num_components = 4; > - break; > - case GL_TRIANGLES: > - num_components = 3; > - break; > - case GL_ISOLINES: > - /* Isolines are not reversed; swizzle .zw -> .xy */ > - srcs[0] = offset(tmp, bld, 2); > - srcs[1] = offset(tmp, bld, 3); > - num_components = 2; > - break; > - default: > - unreachable("Bogus tessellation domain"); > - } > - bld.LOAD_PAYLOAD(dst, srcs, num_components, 0); > - } else { > + { > if (first_component != 0) { > unsigned read_components = > instr->num_components + first_component; > @@ -2656,55 +2585,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const > fs_builder &bld, > > if (indirect_offset.file != BAD_FILE) { > srcs[header_regs++] = indirect_offset; > - } else if (!is_passthrough_shader) { > - if (imm_offset == 0) { > - value.type = BRW_REGISTER_TYPE_F; > - > - mask &= (1 << > tesslevel_inner_components(tcs_key->tes_primitive_mode)) > - 1; > - > - /* This is a write to gl_TessLevelInner[], which lives in the > - * Patch URB header. The layout depends on the domain. > - */ > - switch (tcs_key->tes_primitive_mode) { > - case GL_QUADS: > - /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). > - * We use an XXYX swizzle to reverse put .xy in the .wz > - * channels, and use a .zw writemask. > - */ > - mask = writemask_for_backwards_vector(mask); > - swiz = BRW_SWIZZLE4(0, 0, 1, 0); > - break; > - case GL_TRIANGLES: > - /* gl_TessLevelInner[].x lives at DWord 4, so we set the > - * writemask to X and bump the URB offset by 1. > - */ > - imm_offset = 1; > - break; > - case GL_ISOLINES: > - /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ > - return; > - default: > - unreachable("Bogus tessellation domain"); > - } > - } else if (imm_offset == 1) { > - /* This is a write to gl_TessLevelOuter[] which lives in the > - * Patch URB Header at DWords 4-7. However, it's reversed, so > - * instead of .xyzw we have .wzyx. > - */ > - value.type = BRW_REGISTER_TYPE_F; > - > - mask &= (1 << > tesslevel_outer_components(tcs_key->tes_primitive_mode)) > - 1; > - > - if (tcs_key->tes_primitive_mode == GL_ISOLINES) { > - /* Isolines .xy should be stored in .zw, in order. */ > - swiz = BRW_SWIZZLE4(0, 0, 0, 1); > - mask <<= 2; > - } else { > - /* Other domains are reversed; store .wzyx instead of > .xyzw */ > - swiz = BRW_SWIZZLE_WZYX; > - mask = writemask_for_backwards_vector(mask); > - } > - } > } > > if (mask == 0) > @@ -2851,48 +2731,6 @@ fs_visitor::nir_emit_tes_intrinsic(const > fs_builder &bld, > } > break; > > - case nir_intrinsic_load_tess_level_outer: > - /* When the TES reads gl_TessLevelOuter, we ensure that the patch > header > - * appears as a push-model input. So, we can simply use the ATTR > file > - * rather than issuing URB read messages. The data is stored in the > - * high DWords in reverse order - DWord 7 contains .x, DWord 6 > contains > - * .y, and so on. > - */ > - switch (tes_prog_data->domain) { > - case BRW_TESS_DOMAIN_QUAD: > - for (unsigned i = 0; i < 4; i++) > - bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - > i)); > - break; > - case BRW_TESS_DOMAIN_TRI: > - for (unsigned i = 0; i < 3; i++) > - bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - > i)); > - break; > - case BRW_TESS_DOMAIN_ISOLINE: > - for (unsigned i = 0; i < 2; i++) > - bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 6 + > i)); > - break; > - } > - break; > - > - case nir_intrinsic_load_tess_level_inner: > - /* When the TES reads gl_TessLevelInner, we ensure that the patch > header > - * appears as a push-model input. So, we can simply use the ATTR > file > - * rather than issuing URB read messages. > - */ > - switch (tes_prog_data->domain) { > - case BRW_TESS_DOMAIN_QUAD: > - bld.MOV(dest, component(fs_reg(ATTR, 0), 3)); > - bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2)); > - break; > - case BRW_TESS_DOMAIN_TRI: > - bld.MOV(dest, component(fs_reg(ATTR, 0), 4)); > - break; > - case BRW_TESS_DOMAIN_ISOLINE: > - /* ignore - value is undefined */ > - break; > - } > - break; > - > case nir_intrinsic_load_input: > case nir_intrinsic_load_per_vertex_input: { > fs_reg indirect_offset = get_indirect_offset(instr); > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 6f37e97a86f..46eeb1723b4 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -141,9 +141,68 @@ remap_inputs_with_vue_map(nir_block *block, const > struct brw_vue_map *vue_map) > } > > static bool > +remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, > + GLenum primitive_mode) > +{ > + const int location = nir_intrinsic_base(intr); > + const unsigned component = nir_intrinsic_component(intr); > + bool out_of_bounds; > + > + if (location == VARYING_SLOT_TESS_LEVEL_INNER) { > + switch (primitive_mode) { > + case GL_QUADS: > + /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */ > + nir_intrinsic_set_base(intr, 0); > + nir_intrinsic_set_component(intr, 3 - component); > + out_of_bounds = false; > What if component > 1? I guess that's not really a problem but it is out-of-bounds... > + break; > + case GL_TRIANGLES: > + /* gl_TessLevelInner[0] lives at DWord 4. */ > + nir_intrinsic_set_base(intr, 1); > + out_of_bounds = component > 0; > + break; > + case GL_ISOLINES: > + out_of_bounds = true; > + break; > + default: > + unreachable("Bogus tessellation domain"); > + } > + } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) { > + if (primitive_mode == GL_ISOLINES) { > + /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */ > + nir_intrinsic_set_base(intr, 1); > + nir_intrinsic_set_component(intr, 2 + > nir_intrinsic_component(intr)); > + out_of_bounds = component > 1; > + } else { > + /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) > */ > + nir_intrinsic_set_base(intr, 1); > + nir_intrinsic_set_component(intr, 3 - > nir_intrinsic_component(intr)); > + out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES; > + } > + } else { > + return false; > + } > + > + if (out_of_bounds) { > + if (nir_intrinsic_infos[intr->intrinsic].has_dest) { > + b->cursor = nir_before_instr(&intr->instr); > + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); > + nir_ssa_def_rewrite_uses(&intr->dest.ssa, > nir_src_for_ssa(undef)); > + } > + nir_instr_remove(&intr->instr); > + } > + > + return true; > +} > + > +static bool > remap_patch_urb_offsets(nir_block *block, nir_builder *b, > - const struct brw_vue_map *vue_map) > + const struct brw_vue_map *vue_map, > + GLenum tes_primitive_mode) > { > + const bool is_passthrough_tcs = b->shader->info->name && > + strcmp(b->shader->info->name, "passthrough") == 0; > This is gross... Also... Why? What's so special about the passthrough that it doesn't need tess level remaps? I have a feeling there's some more general thing we could be doing here. > + > nir_foreach_instr_safe(instr, block) { > if (instr->type != nir_instr_type_intrinsic) > continue; > @@ -154,6 +213,11 @@ remap_patch_urb_offsets(nir_block *block, > nir_builder *b, > > if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || > (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { > + > + if (!is_passthrough_tcs && > + remap_tess_levels(b, intrin, tes_primitive_mode)) > + continue; > Let's make sure I've got this right... We map everything from the varying identifiers to VUE slots. For the case of tesslevel, they will be assigned VUE slots 0 and 1 so the code below will hever cause any other output to alias. Then we rework stuff in remap_tess_levels so that they map to the right locaion in 8-dword chunk at the begining of the VUE. This function (remap_patch_urb_offsets) could really use a comment at the top saying what it's doing. > + > int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; > assert(vue_slot != -1); > intrin->const_index[0] = vue_slot; > @@ -273,7 +337,8 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const > struct brw_vue_map *vue_map) > nir_builder b; > nir_builder_init(&b, function->impl); > nir_foreach_block(block, function->impl) { > - remap_patch_urb_offsets(block, &b, vue_map); > + remap_patch_urb_offsets(block, &b, vue_map, > + nir->info->tes.primitive_mode); > } > } > } > @@ -341,7 +406,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir, > } > > void > -brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map > *vue_map) > +brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map > *vue_map, > + GLenum tes_primitive_mode) > { > nir_foreach_variable(var, &nir->outputs) { > var->data.driver_location = var->data.location; > @@ -359,7 +425,7 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const > struct brw_vue_map *vue_map) > nir_builder b; > nir_builder_init(&b, function->impl); > nir_foreach_block(block, function->impl) { > - remap_patch_urb_offsets(block, &b, vue_map); > + remap_patch_urb_offsets(block, &b, vue_map, > tes_primitive_mode); > } > } > } > diff --git a/src/mesa/drivers/dri/i965/brw_nir.h > b/src/mesa/drivers/dri/i965/brw_nir.h > index 8cfb6c1be68..c6ef437d4f9 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.h > +++ b/src/mesa/drivers/dri/i965/brw_nir.h > @@ -109,7 +109,8 @@ void brw_nir_lower_fs_inputs(nir_shader *nir, struct > brw_vue_map *vue_map, > const struct gen_device_info *devinfo, > const struct brw_wm_prog_key *key); > void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar); > -void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map > *vue); > +void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map > *vue, > + GLenum tes_primitive_mode); > void brw_nir_lower_fs_outputs(nir_shader *nir); > void brw_nir_lower_cs_shared(nir_shader *nir); > > diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > index caf5c393a5e..e130c8c9c1a 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > +++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > @@ -73,7 +73,7 @@ > */ > > static inline nir_ssa_def * > -load_output(nir_builder *b, int num_components, int offset) > +load_output(nir_builder *b, int num_components, int offset, int component) > { > nir_intrinsic_instr *load = > nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output); > @@ -81,6 +81,7 @@ load_output(nir_builder *b, int num_components, int > offset) > load->num_components = num_components; > load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); > nir_intrinsic_set_base(load, offset); > + nir_intrinsic_set_component(load, component); > > nir_builder_instr_insert(b, &load->instr); > > @@ -92,8 +93,8 @@ emit_quads_workaround(nir_builder *b, nir_block *block) > { > b->cursor = nir_after_block_before_jump(block); > > - nir_ssa_def *inner = load_output(b, 2, 0); > - nir_ssa_def *outer = load_output(b, 4, 1); > + nir_ssa_def *inner = load_output(b, 2, 0, 2); > + nir_ssa_def *outer = load_output(b, 4, 1, 0); > > nir_ssa_def *any_greater_than_1 = > nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)), > @@ -113,6 +114,7 @@ emit_quads_workaround(nir_builder *b, nir_block > *block) > nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); > store->num_components = 2; > nir_intrinsic_set_write_mask(store, WRITEMASK_XY); > + nir_intrinsic_set_component(store, 2); > store->src[0] = nir_src_for_ssa(inner); > store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); > nir_builder_instr_insert(b, &store->instr); > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index dfc7407ea5b..7dbe3a502ec 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -648,53 +648,6 @@ get_atomic_counter_op(nir_intrinsic_op op) > } > } > > -unsigned > -tesslevel_outer_components(GLenum tes_primitive_mode) > -{ > - switch (tes_primitive_mode) { > - case GL_QUADS: > - return 4; > - case GL_TRIANGLES: > - return 3; > - case GL_ISOLINES: > - return 2; > - default: > - unreachable("Bogus tessellation domain"); > - } > - return 0; > -} > - > -unsigned > -tesslevel_inner_components(GLenum tes_primitive_mode) > -{ > - switch (tes_primitive_mode) { > - case GL_QUADS: > - return 2; > - case GL_TRIANGLES: > - return 1; > - case GL_ISOLINES: > - return 0; > - default: > - unreachable("Bogus tessellation domain"); > - } > - return 0; > -} > - > -/** > - * Given a normal .xyzw writemask, convert it to a writemask for a vector > - * that's stored backwards, i.e. .wzyx. > - */ > -unsigned > -writemask_for_backwards_vector(unsigned mask) > -{ > - unsigned new_mask = 0; > - > - for (int i = 0; i < 4; i++) > - new_mask |= ((mask >> i) & 1) << (3 - i); > - > - return new_mask; > -} > - > backend_shader::backend_shader(const struct brw_compiler *compiler, > void *log_data, > void *mem_ctx, > @@ -712,8 +665,6 @@ backend_shader::backend_shader(const struct > brw_compiler *compiler, > debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_ > stage(stage); > stage_name = _mesa_shader_stage_to_string(stage); > stage_abbrev = _mesa_shader_stage_to_abbrev(stage); > - is_passthrough_shader = > - nir->info->name && strcmp(nir->info->name, "passthrough") == 0; > } > > bool > @@ -1399,17 +1350,7 @@ brw_compile_tes(const struct brw_compiler *compiler, > > /* URB entry sizes are stored as a multiple of 64 bytes. */ > prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; > - > - bool need_patch_header = nir->info->system_values_read & > - (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) | > - BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER)); > - > - /* The TES will pull most inputs using URB read messages. > - * > - * However, we push the patch header for TessLevel factors when > required, > - * as it's a tiny amount of extra data. > - */ > - prog_data->base.urb_read_length = need_patch_header ? 1 : 0; > + prog_data->base.urb_read_length = 0; > > if (unlikely(INTEL_DEBUG & DEBUG_TES)) { > fprintf(stderr, "TES Input "); > diff --git a/src/mesa/drivers/dri/i965/brw_shader.h > b/src/mesa/drivers/dri/i965/brw_shader.h > index 6b5ee3719a9..13f271db8c1 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.h > +++ b/src/mesa/drivers/dri/i965/brw_shader.h > @@ -217,7 +217,6 @@ public: > bool debug_enabled; > const char *stage_name; > const char *stage_abbrev; > - bool is_passthrough_shader; > > brw::simple_allocator alloc; > > @@ -301,10 +300,6 @@ bool brw_cs_precompile(struct gl_context *ctx, > GLboolean brw_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog); > struct gl_linked_shader *brw_new_shader(gl_shader_stage stage); > > -unsigned tesslevel_outer_components(GLenum tes_primitive_mode); > -unsigned tesslevel_inner_components(GLenum tes_primitive_mode); > -unsigned writemask_for_backwards_vector(unsigned mask); > - > unsigned get_atomic_counter_op(nir_intrinsic_op op); > > #ifdef __cplusplus > diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c > b/src/mesa/drivers/dri/i965/brw_tcs.c > index f890ccf0296..567ae792dcd 100644 > --- a/src/mesa/drivers/dri/i965/brw_tcs.c > +++ b/src/mesa/drivers/dri/i965/brw_tcs.c > @@ -51,7 +51,8 @@ create_passthrough_tcs(void *mem_ctx, const struct > brw_compiler *compiler, > nir_ssa_def *invoc_id = > nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0); > > - nir->info->inputs_read = key->outputs_written; > + nir->info->inputs_read = key->outputs_written & > + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); > nir->info->outputs_written = key->outputs_written; > nir->info->tcs.vertices_out = key->input_vertices; > nir->info->name = ralloc_strdup(nir, "passthrough"); > @@ -81,7 +82,7 @@ create_passthrough_tcs(void *mem_ctx, const struct > brw_compiler *compiler, > } > > /* Copy inputs to outputs. */ > - uint64_t varyings = key->outputs_written; > + uint64_t varyings = nir->info->inputs_read; > > while (varyings != 0) { > const int varying = ffsll(varyings) - 1; > diff --git a/src/mesa/drivers/dri/i965/brw_tes.c > b/src/mesa/drivers/dri/i965/brw_tes.c > index 20313660734..56d75c28447 100644 > --- a/src/mesa/drivers/dri/i965/brw_tes.c > +++ b/src/mesa/drivers/dri/i965/brw_tes.c > @@ -239,16 +239,12 @@ brw_tes_populate_key(struct brw_context *brw, > */ > if (tcp) { > struct gl_program *tcp_prog = &tcp->program; > - per_vertex_slots |= tcp_prog->info.outputs_written; > + per_vertex_slots |= tcp_prog->info.outputs_written & > + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); > per_patch_slots |= tcp_prog->info.patch_outputs_written; > } > > - /* Ignore gl_TessLevelInner/Outer - we treat them as system values, > - * not inputs, and they're always present in the URB entry regardless > - * of whether or not we read them. > - */ > - key->inputs_read = per_vertex_slots & > - ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); > + key->inputs_read = per_vertex_slots; > key->patch_inputs_read = per_patch_slots; > > /* _NEW_TEXTURE */ > @@ -305,14 +301,11 @@ brw_tes_precompile(struct gl_context *ctx, > if (shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) { > struct gl_program *tcp = > shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program; > - key.inputs_read |= tcp->nir->info->outputs_written; > + key.inputs_read |= tcp->nir->info->outputs_written & > + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); > key.patch_inputs_read |= tcp->nir->info->patch_outputs_written; > } > > - /* Ignore gl_TessLevelInner/Outer - they're system values. */ > - key.inputs_read &= ~(VARYING_BIT_TESS_LEVEL_INNER | > - VARYING_BIT_TESS_LEVEL_OUTER); > - > brw_setup_tex_for_precompile(brw, &key.tex, prog); > > success = brw_codegen_tes_prog(brw, shader_prog, btep, &key); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > index bfa22458f0e..9ef3dc04665 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > @@ -319,62 +319,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); > dst.writemask = brw_writemask_for_size(instr->num_components); > > - if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { > - dst.type = BRW_REGISTER_TYPE_F; > - > - /* This is a read of gl_TessLevelInner[], which lives in the > - * Patch URB header. The layout depends on the domain. > - */ > - switch (key->tes_primitive_mode) { > - case GL_QUADS: { > - /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ > - dst_reg tmp(this, glsl_type::vec4_type); > - emit_output_urb_read(tmp, 0, 0, src_reg()); > - emit(MOV(writemask(dst, WRITEMASK_XY), > - swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); > - break; > - } > - case GL_TRIANGLES: > - /* DWord 4; use offset 1 but normal swizzle/writemask. */ > - emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, > - src_reg()); > - break; > - case GL_ISOLINES: > - /* All channels are undefined. */ > - return; > - default: > - unreachable("Bogus tessellation domain"); > - } > - } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { > - dst.type = BRW_REGISTER_TYPE_F; > - unsigned swiz = BRW_SWIZZLE_WZYX; > - > - /* This is a read of gl_TessLevelOuter[], which lives in the > - * high 4 DWords of the Patch URB header, in reverse order. > - */ > - switch (key->tes_primitive_mode) { > - case GL_QUADS: > - dst.writemask = WRITEMASK_XYZW; > - break; > - case GL_TRIANGLES: > - dst.writemask = WRITEMASK_XYZ; > - break; > - case GL_ISOLINES: > - /* Isolines are not reversed; swizzle .zw -> .xy */ > - swiz = BRW_SWIZZLE_ZWZW; > - dst.writemask = WRITEMASK_XY; > - return; > - default: > - unreachable("Bogus tessellation domain"); > - } > - > - dst_reg tmp(this, glsl_type::vec4_type); > - emit_output_urb_read(tmp, 1, 0, src_reg()); > - emit(MOV(dst, swizzle(src_reg(tmp), swiz))); > - } else { > - emit_output_urb_read(dst, imm_offset, > nir_intrinsic_component(instr), > - indirect_offset); > - } > + emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr) > , > + indirect_offset); > break; > } > case nir_intrinsic_store_output: > @@ -386,62 +332,6 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > src_reg indirect_offset = get_indirect_offset(instr); > unsigned imm_offset = instr->const_index[0]; > > - /* The passthrough shader writes the whole patch header as two > vec4s; > - * skip all the gl_TessLevelInner/Outer swizzling. > - */ > - if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) { > - if (imm_offset == 0) { > - value.type = BRW_REGISTER_TYPE_F; > - > - mask &= > - (1 << tesslevel_inner_components(key->tes_primitive_mode)) > - 1; > - > - /* This is a write to gl_TessLevelInner[], which lives in the > - * Patch URB header. The layout depends on the domain. > - */ > - switch (key->tes_primitive_mode) { > - case GL_QUADS: > - /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). > - * We use an XXYX swizzle to reverse put .xy in the .wz > - * channels, and use a .zw writemask. > - */ > - swiz = BRW_SWIZZLE4(0, 0, 1, 0); > - mask = writemask_for_backwards_vector(mask); > - break; > - case GL_TRIANGLES: > - /* gl_TessLevelInner[].x lives at DWord 4, so we set the > - * writemask to X and bump the URB offset by 1. > - */ > - imm_offset = 1; > - break; > - case GL_ISOLINES: > - /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ > - return; > - default: > - unreachable("Bogus tessellation domain"); > - } > - } else if (imm_offset == 1) { > - value.type = BRW_REGISTER_TYPE_F; > - > - mask &= > - (1 << tesslevel_outer_components(key->tes_primitive_mode)) > - 1; > - > - /* This is a write to gl_TessLevelOuter[] which lives in the > - * Patch URB Header at DWords 4-7. However, it's reversed, so > - * instead of .xyzw we have .wzyx. > - */ > - if (key->tes_primitive_mode == GL_ISOLINES) { > - /* Isolines .xy should be stored in .zw, in order. */ > - swiz = BRW_SWIZZLE4(0, 0, 0, 1); > - mask <<= 2; > - } else { > - /* Other domains are reversed; store .wzyx instead of > .xyzw. */ > - swiz = BRW_SWIZZLE_WZYX; > - mask = writemask_for_backwards_vector(mask); > - } > - } > - } > - > unsigned first_component = nir_intrinsic_component(instr); > if (first_component) { > if (nir_src_bit_size(instr->src[0]) == 64) > @@ -522,7 +412,8 @@ brw_compile_tcs(const struct brw_compiler *compiler, > > nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); > brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); > - brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); > + brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map, > + key->tes_primitive_mode); > if (key->quads_workaround) > brw_nir_apply_tcs_quads_workaround(nir); > > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev