On 2015-12-22 02:20:57, Kenneth Graunke wrote: > GL_ARB_separate_shader_objects allows the application to mix-and-match > TCS and TES programs separately. This means that the interface between > the two stages isn't known until the final SSO pipeline is in place. > > This isn't a great match for our hardware: the TCS and TES have to agree > on the Patch URB entry layout. Since we store data as per-patch slots > followed by per-vertex slots, changing the number of per-patch slots can > significantly alter the layout. This can easily happen with SSO. > > To handle this, we store the [Patch]OutputsWritten and [Patch]InputsRead > bitfields in the TCS/TES program keys, introducing program recompiles. > brw_upload_programs() decides the layout for both TCS and TES, and > passes it to brw_upload_tcs/tes(), which store it in the key. > > When creating the NIR for a shader specialization, we override > nir->info.inputs_read (and friends) to the program key's values. > Since everything uses those, no further compiler changes are needed. > This also replaces the hack in brw_create_nir(). > > To avoid recompiles, brw_precompile_tes() looks to see if there's a > TCS in the linked shader. If so, it accounts for the TCS outputs, > just as brw_upload_programs() would. This eliminates all recompiles > in the non-SSO case. In the SSO case, there should only be recompiles > when using a TCS and TES that have different input/output interfaces. > > Fixes Piglit's mix-and-match-tcs-tes test. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_compiler.h | 9 +++++++++ > src/mesa/drivers/dri/i965/brw_nir.c | 11 ---------- > src/mesa/drivers/dri/i965/brw_program.h | 6 ++++-- > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ > src/mesa/drivers/dri/i965/brw_state_upload.c | 18 +++++++++++++++-- > src/mesa/drivers/dri/i965/brw_tcs.c | 13 +++++++++++- > src/mesa/drivers/dri/i965/brw_tes.c | 30 > +++++++++++++++++++++++++++- > src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 2 ++ > 8 files changed, 74 insertions(+), 17 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index 0ffaac7..e66deb1 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -200,6 +200,9 @@ struct brw_tcs_prog_key > > unsigned input_vertices; > > + /** A bitfield of per-patch outputs written. */ > + uint32_t patch_outputs_written; > + > /** A bitfield of per-vertex outputs written. */ > uint64_t outputs_written; > > @@ -211,6 +214,12 @@ struct brw_tes_prog_key > { > unsigned program_string_id; > > + /** A bitfield of per-patch inputs read. */ > + uint32_t patch_inputs_read; > + > + /** A bitfield of per-vertex inputs read. */ > + uint64_t inputs_read; > + > struct brw_sampler_prog_key_data tex; > }; > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 8b06dbe..eebd2a3 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -618,17 +618,6 @@ brw_create_nir(struct brw_context *brw, > /* First, lower the GLSL IR or Mesa IR to NIR */ > if (shader_prog) { > nir = glsl_to_nir(shader_prog, stage, options); > - > - if (nir->stage == MESA_SHADER_TESS_EVAL && > - shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) { > - const struct gl_program *tcs = > - shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program; > - /* Work around the TCS having bonus outputs used as shared memory > - * segments, which makes OutputsWritten not match InputsRead > - */ > - nir->info.inputs_read = tcs->OutputsWritten; > - nir->info.patch_inputs_read = tcs->PatchOutputsWritten; > - } > } else { > nir = prog_to_nir(prog, options); > OPT_V(nir_convert_to_ssa); /* turn registers into SSA */ > diff --git a/src/mesa/drivers/dri/i965/brw_program.h > b/src/mesa/drivers/dri/i965/brw_program.h > index 3d9e1b9..059ccf8 100644 > --- a/src/mesa/drivers/dri/i965/brw_program.h > +++ b/src/mesa/drivers/dri/i965/brw_program.h > @@ -56,8 +56,10 @@ void > brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog, > struct gl_shader *shader, struct gl_program *prog); > > -void brw_upload_tcs_prog(struct brw_context *brw); > -void brw_upload_tes_prog(struct brw_context *brw); > +void brw_upload_tcs_prog(struct brw_context *brw, > + uint64_t per_vertex_slots, uint32_t > per_patch_slots); > +void brw_upload_tes_prog(struct brw_context *brw, > + uint64_t per_vertex_slots, uint32_t > per_patch_slots); > > #ifdef __cplusplus > } /* extern "C" */ > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 57f9eb2..5140cfb 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -1329,6 +1329,8 @@ brw_compile_tes(const struct brw_compiler *compiler, > > nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); > nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); > + nir->info.inputs_read = key->inputs_read; > + nir->info.patch_inputs_read = key->patch_inputs_read; > nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar); > nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); > > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c > b/src/mesa/drivers/dri/i965/brw_state_upload.c > index eb0357b..9ad4ae8 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c > @@ -680,8 +680,22 @@ brw_upload_programs(struct brw_context *brw, > if (pipeline == BRW_RENDER_PIPELINE) { > brw_upload_vs_prog(brw); > if (brw->tess_eval_program) { > - brw_upload_tcs_prog(brw); > - brw_upload_tes_prog(brw); > + uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead; > + uint32_t per_patch_slots = > + brw->tess_eval_program->Base.PatchInputsRead; > + > + /* The TCS may have additional outputs which aren't read by the > + * TES (possibly for cross-thread communication). These need to > + * be stored in the Patch URB Entry as well. > + */ > + if (brw->tess_ctrl_program) { > + per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten; > + per_patch_slots |= > + brw->tess_ctrl_program->Base.PatchOutputsWritten; > + } > + > + brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots); > + brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
How about a new brw_upload_tess_progs function, and put this hunk in there? -Jordan > } else { > brw->tcs.prog_data = NULL; > brw->tcs.base.prog_data = NULL; > diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c > b/src/mesa/drivers/dri/i965/brw_tcs.c > index ecb6fd0..2c925e7 100644 > --- a/src/mesa/drivers/dri/i965/brw_tcs.c > +++ b/src/mesa/drivers/dri/i965/brw_tcs.c > @@ -67,6 +67,10 @@ brw_tcs_debug_recompile(struct brw_context *brw, > > found |= key_debug(brw, "input vertices", old_key->input_vertices, > key->input_vertices); > + found |= key_debug(brw, "outputs written", old_key->outputs_written, > + key->outputs_written); > + found |= key_debug(brw, "patch outputs written", > old_key->patch_outputs_written, > + key->patch_outputs_written); > found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode, > key->tes_primitive_mode); > found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); > @@ -224,7 +228,9 @@ brw_codegen_tcs_prog(struct brw_context *brw, > > > void > -brw_upload_tcs_prog(struct brw_context *brw) > +brw_upload_tcs_prog(struct brw_context *brw, > + uint64_t per_vertex_slots, > + uint32_t per_patch_slots) > { > struct gl_context *ctx = &brw->ctx; > struct gl_shader_program **current = ctx->_Shader->CurrentProgram; > @@ -248,6 +254,8 @@ brw_upload_tcs_prog(struct brw_context *brw) > memset(&key, 0, sizeof(key)); > > key.input_vertices = ctx->TessCtrlProgram.patch_vertices; > + key.outputs_written = per_vertex_slots; > + key.patch_outputs_written = per_patch_slots; > > /* We need to specialize our code generation for tessellation levels > * based on the domain the DS is expecting to tessellate. > @@ -301,6 +309,9 @@ brw_tcs_precompile(struct gl_context *ctx, > > key.tes_primitive_mode = GL_TRIANGLES; > > + key.outputs_written = prog->OutputsWritten; > + key.patch_outputs_written = prog->PatchOutputsWritten; > + > success = brw_codegen_tcs_prog(brw, shader_prog, btcp, &key); > > brw->tcs.base.prog_offset = old_prog_offset; > diff --git a/src/mesa/drivers/dri/i965/brw_tes.c > b/src/mesa/drivers/dri/i965/brw_tes.c > index 844c5b2..27dc7e5 100644 > --- a/src/mesa/drivers/dri/i965/brw_tes.c > +++ b/src/mesa/drivers/dri/i965/brw_tes.c > @@ -66,6 +66,10 @@ brw_tes_debug_recompile(struct brw_context *brw, > } > > found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); > + found |= key_debug(brw, "inputs read", old_key->inputs_read, > + key->inputs_read); > + found |= key_debug(brw, "patch inputs read", old_key->patch_inputs_read, > + key->patch_inputs_read); > > if (!found) { > perf_debug(" Something else\n"); > @@ -226,7 +230,9 @@ brw_codegen_tes_prog(struct brw_context *brw, > > > void > -brw_upload_tes_prog(struct brw_context *brw) > +brw_upload_tes_prog(struct brw_context *brw, > + uint64_t per_vertex_slots, > + uint32_t per_patch_slots) > { > struct gl_context *ctx = &brw->ctx; > struct gl_shader_program **current = ctx->_Shader->CurrentProgram; > @@ -247,6 +253,14 @@ brw_upload_tes_prog(struct brw_context *brw) > > key.program_string_id = tep->id; > > + /* Ignore gl_TessLevelInner/Outer - we treat them as system values, > + * not inputs, and they're always present in the URB entry regardless > + * of whether or not we read them. > + */ > + key.inputs_read = per_vertex_slots & > + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); > + key.patch_inputs_read = per_patch_slots; > + > /* _NEW_TEXTURE */ > brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, > &key.tex); > @@ -280,6 +294,20 @@ brw_tes_precompile(struct gl_context *ctx, > memset(&key, 0, sizeof(key)); > > key.program_string_id = btep->id; > + key.inputs_read = prog->InputsRead; > + key.patch_inputs_read = prog->PatchInputsRead; > + > + if (shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) { > + struct gl_program *tcp = > + shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program; > + key.inputs_read |= tcp->OutputsWritten; > + key.patch_inputs_read |= tcp->PatchOutputsWritten; > + } > + > + /* Ignore gl_TessLevelInner/Outer - they're system values. */ > + key.inputs_read &= ~(VARYING_BIT_TESS_LEVEL_INNER | > + VARYING_BIT_TESS_LEVEL_OUTER); > + > brw_setup_tex_for_precompile(brw, &key.tex, prog); > > success = brw_codegen_tes_prog(brw, shader_prog, btep, &key); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > index fba55b5..507db749 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp > @@ -476,6 +476,8 @@ brw_compile_tcs(const struct brw_compiler *compiler, > > nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); > nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); > + nir->info.outputs_written = key->outputs_written; > + nir->info.patch_outputs_written = key->patch_outputs_written; > nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar); > nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); > > -- > 2.6.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev