On Wed, Aug 17, 2016 at 10:15 AM, Kenneth Graunke <kenn...@whitecape.org> wrote: > Fixes several GL44-CTS.tessellation_shader (and GL45 and ES31) subcases: > - vertex_spacing > - tessellation_shader_point_mode.points_verification > - tessellation_shader_quads_tessellation.inner_tessellation_level_rounding > > Cc: mesa-sta...@lists.freedesktop.org > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > src/mesa/drivers/dri/i965/brw_compiler.h | 2 + > src/mesa/drivers/dri/i965/brw_nir.h | 2 + > .../drivers/dri/i965/brw_nir_tcs_workarounds.c | 152 > +++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_tcs.c | 18 ++- > src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 3 + > 6 files changed, 175 insertions(+), 3 deletions(-) > create mode 100644 src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index df6b5dd..2492a31 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -48,6 +48,7 @@ i965_compiler_FILES = \ > brw_nir_attribute_workarounds.c \ > brw_nir_intrinsics.c \ > brw_nir_opt_peephole_ffma.c \ > + brw_nir_tcs_workarounds.c \ > brw_packed_float.c \ > brw_predicated_break.cpp \ > brw_reg.h \ > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index 10e9f47..7d15c28 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -220,6 +220,8 @@ struct brw_tcs_prog_key > /** A bitfield of per-vertex outputs written. */ > uint64_t outputs_written; > > + bool quads_workaround; > + > struct brw_sampler_prog_key_data tex; > }; > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.h > b/src/mesa/drivers/dri/i965/brw_nir.h > index 74c354f..6185310 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.h > +++ b/src/mesa/drivers/dri/i965/brw_nir.h > @@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir, > > bool brw_nir_apply_trig_workarounds(nir_shader *nir); > > +void brw_nir_apply_tcs_quads_workaround(nir_shader *nir); > + > nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, > const struct brw_device_info *devinfo, > const struct brw_sampler_prog_key_data > *key, > diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > new file mode 100644 > index 0000000..0626981 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c > @@ -0,0 +1,152 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "compiler/nir/nir_builder.h" > +#include "brw_nir.h" > + > +/** > + * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8). > + * > + * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the > + * definition of the patch header layouts): > + * > + * "HW Bug: The Tessellation stage will incorrectly add domain points > + * along patch edges under the following conditions, which may result > + * in conformance failures and/or cracking artifacts: > + * > + * * QUAD domain > + * * INTEGER partitioning > + * * All three TessFactors in a given U or V direction (e.g., V > + * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0 > + * * All three TessFactors in the other direction are > 1.0 and all > + * round up to the same integer value (e.g, U direction: > + * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4) > + * > + * The suggested workaround (to be implemented as part of the postamble > + * to the HS shader in the HS kernel) is: > + * > + * if ( > + * (TF[UEQ0] > 1.0) || > + * (TF[VEQ0] > 1.0) || > + * (TF[UEQ1] > 1.0) || > + * (TF[VEQ1] > 1.0) || > + * (TF[INSIDE_U] > 1.0) || > + * (TF[INSIDE_V] > 1.0) ) > + * { > + * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U]; > + * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V]; > + * }" > + * > + * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes > + * that the above workaround fails to fix certain GL/ES CTS tests which > + * have inside tessellation factors of -1.0. This can be explained by > + * a quote from the ARB_tessellation_shader specification: > + * > + * "If "equal_spacing" is used, the floating-point tessellation level is > + * first clamped to the range [1,<max>], where <max> is implementation- > + * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)." > + * > + * In other words, the actual inner tessellation factor used is > + * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped > + * value against 1.0. To accomplish this, we change the comparison from > + * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0). > + */ > + > +static inline nir_ssa_def * > +load_output(nir_builder *b, int num_components, int offset) > +{ > + nir_intrinsic_instr *load = > + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output); > + nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL); > + load->num_components = num_components; > + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); > + nir_intrinsic_set_base(load, offset); > + > + nir_builder_instr_insert(b, &load->instr); > + > + return &load->dest.ssa; > +} > + > +static inline void > +store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps) > +{ > + nir_intrinsic_instr *store = > + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); > + store->num_components = comps; > + nir_intrinsic_set_write_mask(store, (1u << comps) - 1); > + store->src[0] = nir_src_for_ssa(value); > + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); > + nir_builder_instr_insert(b, &store->instr); > +} > + > +static void > +emit_quads_workaround(nir_builder *b, nir_block *block) > +{ > + /* We're going to insert a new if-statement in a predecessor of the end > + * block. This would normally create a new block (after the if) which > + * would then become the predecessor of the end block, causing our set > + * walking to get screwed up. To avoid this, just emit a constant at > + * the end of our current block, and insert the if before that. > + */
Eww... tbh, it would probably be better if you just copied the set and iterated over that rather than relying on internal details of the control-flow modification code. > + b->cursor = nir_after_block_before_jump(block); > + b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr); > + > + nir_ssa_def *inner = load_output(b, 2, 0); > + nir_ssa_def *outer = load_output(b, 4, 1); > + > + nir_ssa_def *any_greater_than_1 = > + nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)), > + nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner))); > + > + nir_if *if_stmt = nir_if_create(b->shader); > + if_stmt->condition = nir_src_for_ssa(any_greater_than_1); > + nir_builder_cf_insert(b, &if_stmt->cf_node); > + > + /* Fill out the new then-block */ > + b->cursor = nir_after_cf_list(&if_stmt->then_list); > + > + store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner), > + nir_imm_float(b, 2.0f), inner), 0, 2); > +} > + > +void > +brw_nir_apply_tcs_quads_workaround(nir_shader *nir) > +{ > + assert(nir->stage == MESA_SHADER_TESS_CTRL); > + > + nir_foreach_function(func, nir) { Presumably, you want to insert the workaround whenever the shader exits... in that case, it's better to use nir_shader_get_entrypoint(). > + if (!func->impl) > + continue; > + > + nir_builder b; > + nir_builder_init(&b, func->impl); > + > + struct set_entry *entry; > + set_foreach(func->impl->end_block->predecessors, entry) { > + nir_block *pred = (nir_block *) entry->key; > + emit_quads_workaround(&b, pred); > + } > + > + nir_metadata_preserve(func->impl, 0); > + } > +} > diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c > b/src/mesa/drivers/dri/i965/brw_tcs.c > index da94bf2..2a4c775 100644 > --- a/src/mesa/drivers/dri/i965/brw_tcs.c > +++ b/src/mesa/drivers/dri/i965/brw_tcs.c > @@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw, > key->patch_outputs_written); > found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode, > key->tes_primitive_mode); > + found |= key_debug(brw, "quads and equal_spacing workaround", > + old_key->quads_workaround, key->quads_workaround); > found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); > > if (!found) { > @@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw, > * based on the domain the DS is expecting to tessellate. > */ > key.tes_primitive_mode = tep->program.PrimitiveMode; > + key.quads_workaround = brw->gen < 9 && > + tep->program.PrimitiveMode == GL_QUADS && > + tep->program.Spacing == GL_EQUAL; > > if (tcp) { > key.program_string_id = tcp->id; > @@ -382,6 +387,8 @@ brw_tcs_precompile(struct gl_context *ctx, > > struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog; > struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp); > + const struct gl_linked_shader *tes = > + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; > > memset(&key, 0, sizeof(key)); > > @@ -394,9 +401,14 @@ brw_tcs_precompile(struct gl_context *ctx, > _LinkedShaders[MESA_SHADER_TESS_CTRL]->info.TessCtrl.VerticesOut; > } > > - key.tes_primitive_mode = > shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] > - ? > shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]->info.TessEval.PrimitiveMode > - : GL_TRIANGLES; > + if (tes) { > + key.tes_primitive_mode = tes->info.TessEval.PrimitiveMode; > + key.quads_workaround = brw->gen < 9 && > + tes->info.TessEval.PrimitiveMode == GL_QUADS && > + tes->info.TessEval.Spacing == GL_EQUAL; > + } else { > + key.tes_primitive_mode = GL_TRIANGLES; > + } > > key.outputs_written = prog->OutputsWritten; > key.patch_outputs_written = prog->PatchOutputsWritten; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > index 30c81c5..9944803 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > @@ -471,6 +471,9 @@ brw_compile_tcs(const struct brw_compiler *compiler, > nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); > brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); > brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); > + if (key->quads_workaround) > + brw_nir_apply_tcs_quads_workaround(nir); > + > nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); > > if (is_scalar) > -- > 2.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev