Whew... I probably would have split this one into 5 or so. Then krh would have grumbled at me. ;)
On 2015-12-11 13:24:00, Kenneth Graunke wrote: > The TES is essentially a post-tessellator VS, which has access to the > entire TCS output patch, and a special gl_TessCoord input. Otherwise, > they're very straightforward. > > This patch implements SIMD8 tessellation evaluation shaders for Gen8+. > The tessellator can generate a lot of geometry, so operating in SIMD8 > mode (8 vertices per thread) is more efficient than SIMD4x2 mode (only > 2 vertices per thread). I have another patch which implements SIMD4x2 > mode for older hardware (or via an environment variable override). > > We currently handle all inputs via the pull model. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > src/mesa/drivers/dri/i965/brw_compiler.h | 24 +++ > src/mesa/drivers/dri/i965/brw_context.h | 6 + > src/mesa/drivers/dri/i965/brw_fs.cpp | 48 +++++ > src/mesa/drivers/dri/i965/brw_fs.h | 10 +- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 121 +++++++++++ > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 12 +- > src/mesa/drivers/dri/i965/brw_link.cpp | 4 + > src/mesa/drivers/dri/i965/brw_program.h | 2 + > src/mesa/drivers/dri/i965/brw_shader.cpp | 94 +++++++++ > src/mesa/drivers/dri/i965/brw_shader.h | 3 + > src/mesa/drivers/dri/i965/brw_state_upload.c | 3 + > src/mesa/drivers/dri/i965/brw_tes.c | 300 > +++++++++++++++++++++++++++ > 13 files changed, 625 insertions(+), 3 deletions(-) > create mode 100644 src/mesa/drivers/dri/i965/brw_tes.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index d147a73..7354aaf 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -151,6 +151,7 @@ i965_FILES = \ > brw_state_upload.c \ > brw_structs.h \ > brw_tcs_surface_state.c \ > + brw_tes.c \ > brw_tes_surface_state.c \ > brw_tex.c \ > brw_tex_layout.c \ > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index c9e0317..64d831d 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -191,6 +191,14 @@ struct brw_vs_prog_key { > struct brw_sampler_prog_key_data tex; > }; > > +/** The program key for Tessellation Evaluation Shaders. */ > +struct brw_tes_prog_key > +{ > + unsigned program_string_id; > + > + struct brw_sampler_prog_key_data tex; > +}; > + > /** The program key for Geometry Shaders. */ > struct brw_gs_prog_key > { > @@ -669,6 +677,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void > *log_data, > char **error_str); > > /** > + * Compile a tessellation evaluation shader. > + * > + * Returns the final assembly and the program's size. > + */ > +const unsigned * > +brw_compile_tes(const struct brw_compiler *compiler, void *log_data, > + void *mem_ctx, > + const struct brw_tes_prog_key *key, > + struct brw_tes_prog_data *prog_data, > + const struct nir_shader *shader, > + struct gl_shader_program *shader_prog, > + int shader_time_index, > + unsigned *final_assembly_size, > + char **error_str); > + > +/** > * Compile a vertex shader. > * > * Returns the final assembly and the program's size. > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 69bc04c..5e840d1 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -1704,6 +1704,12 @@ brw_vertex_program_const(const struct > gl_vertex_program *p) > return (const struct brw_vertex_program *) p; > } > > +static inline struct brw_tess_eval_program * > +brw_tess_eval_program(struct gl_tess_eval_program *p) > +{ > + return (struct brw_tess_eval_program *) p; > +} > + > static inline struct brw_geometry_program * > brw_geometry_program(struct gl_geometry_program *p) > { > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index c833ef0..de584e4 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1686,6 +1686,21 @@ fs_visitor::assign_vs_urb_setup() > } > > void > +fs_visitor::assign_tes_urb_setup() > +{ > + assert(stage == MESA_SHADER_TESS_EVAL); > + > + brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data; > + > + first_non_payload_grf += 8 * vue_prog_data->urb_read_length; > + > + /* Rewrite all ATTR file references to HW_REGs. */ > + foreach_block_and_inst(block, fs_inst, inst, cfg) { > + convert_attr_sources_to_hw_regs(inst); > + } > +} > + > +void > fs_visitor::assign_gs_urb_setup() > { > assert(stage == MESA_SHADER_GEOMETRY); > @@ -5232,6 +5247,39 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) > } > > bool > +fs_visitor::run_tes() > +{ > + assert(stage == MESA_SHADER_TESS_EVAL); > + > + payload.num_regs = 5; How about a comment like setup_vs_payload has? Does TessLevel not being accessed affect this? (I'm a little confused about what happens when TessLevel is or isn't accessed.) > + > + if (shader_time_index >= 0) > + emit_shader_time_begin(); > + > + emit_nir_code(); > + > + if (failed) > + return false; > + > + emit_urb_writes(); > + > + if (shader_time_index >= 0) > + emit_shader_time_end(); > + > + calculate_cfg(); > + > + optimize(); > + > + assign_curb_setup(); > + assign_tes_urb_setup(); > + > + fixup_3src_null_dest(); > + allocate_registers(); > + > + return !failed; > +} > + > +bool > fs_visitor::run_gs() > { > assert(stage == MESA_SHADER_GEOMETRY); > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index f2e3841..372f760 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -81,7 +81,8 @@ public: > struct gl_program *prog, > const nir_shader *shader, > unsigned dispatch_width, > - int shader_time_index); > + int shader_time_index, > + const struct brw_vue_map *input_vue_map = NULL); > fs_visitor(const struct brw_compiler *compiler, void *log_data, > void *mem_ctx, > struct brw_gs_compile *gs_compile, > @@ -109,6 +110,7 @@ public: > > bool run_fs(bool do_rep_send); > bool run_vs(gl_clip_plane *clip_planes); > + bool run_tes(); > bool run_gs(); > bool run_cs(); > void optimize(); > @@ -124,6 +126,7 @@ public: > void assign_urb_setup(); > void convert_attr_sources_to_hw_regs(fs_inst *inst); > void assign_vs_urb_setup(); > + void assign_tes_urb_setup(); > void assign_gs_urb_setup(); > bool assign_regs(bool allow_spilling); > void assign_regs_trivial(); > @@ -249,6 +252,8 @@ public: > nir_intrinsic_instr *instr); > void nir_emit_intrinsic(const brw::fs_builder &bld, > nir_intrinsic_instr *instr); > + void nir_emit_tes_intrinsic(const brw::fs_builder &bld, > + nir_intrinsic_instr *instr); > void nir_emit_ssbo_atomic(const brw::fs_builder &bld, > int op, nir_intrinsic_instr *instr); > void nir_emit_shared_atomic(const brw::fs_builder &bld, > @@ -260,6 +265,7 @@ public: > fs_reg get_nir_src(nir_src src); > fs_reg get_nir_dest(nir_dest dest); > fs_reg get_nir_image_deref(const nir_deref_var *deref); > + fs_reg get_indirect_offset(nir_intrinsic_instr *instr); > void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, > unsigned wr_mask); > > @@ -313,6 +319,8 @@ public: > struct brw_stage_prog_data *prog_data; > struct gl_program *prog; > > + const struct brw_vue_map *input_vue_map; > + > int *param_size; > > int *virtual_grf_start; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index db38f61..fe87561 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -123,6 +123,7 @@ fs_visitor::nir_setup_outputs() > > switch (stage) { > case MESA_SHADER_VERTEX: > + case MESA_SHADER_TESS_EVAL: > case MESA_SHADER_GEOMETRY: { > unsigned location = var->data.location; > nir_setup_single_output_varying(®, var->type, &location); > @@ -443,6 +444,9 @@ fs_visitor::nir_emit_instr(nir_instr *instr) > case MESA_SHADER_VERTEX: > nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr)); > break; > + case MESA_SHADER_TESS_EVAL: > + nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr)); > + break; > case MESA_SHADER_GEOMETRY: > nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr)); > break; > @@ -1715,6 +1719,24 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, > } > } > > +fs_reg > +fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) > +{ > + nir_src *offset_src = nir_get_io_offset_src(instr); > + nir_const_value *const_value = nir_src_as_const_value(*offset_src); > + > + if (const_value) { > + /* The only constant offset we should find is 0. brw_nir.c's > + * add_const_offset_to_base() will fold other constant offsets > + * into instr->const_index[0]. > + */ > + assert(const_value->u[0] == 0); > + return fs_reg(); > + } > + > + return get_nir_src(*offset_src); > +} > + > void > fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr) > @@ -1747,6 +1769,105 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder > &bld, > } > > void > +fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, > + nir_intrinsic_instr *instr) > +{ > + assert(stage == MESA_SHADER_TESS_EVAL); > + struct brw_tes_prog_data *tes_prog_data = (struct brw_tes_prog_data *) > prog_data; > + > + fs_reg dest; > + if (nir_intrinsic_infos[instr->intrinsic].has_dest) > + dest = get_nir_dest(instr->dest); > + > + switch (instr->intrinsic) { > + case nir_intrinsic_load_primitive_id: > + bld.MOV(dest, fs_reg(brw_vec1_grf(0, 1))); > + break; > + case nir_intrinsic_load_tess_coord: > + /* gl_TessCoord is part of the payload in g1-3 */ > + for (unsigned i = 0; i < 3; i++) { > + bld.MOV(offset(dest, bld, i), fs_reg(brw_vec8_grf(1 + i, 0))); > + } > + break; > + > + case nir_intrinsic_load_tess_level_outer: > + /* When the TES reads gl_TessLevelOuter, we ensure that the patch > header > + * appears as a push-model input. So, we can simply use the ATTR file > + * rather than issuing URB read messages. Again, the data is stored > + * in the high DWords in reverse order. I'm not sure what the 'Again' part is referencing. > + */ > + switch (tes_prog_data->domain) { > + case BRW_TESS_DOMAIN_QUAD: > + for (unsigned i = 0; i < 4; i++) > + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i)); > + break; > + case BRW_TESS_DOMAIN_TRI: > + for (unsigned i = 0; i < 3; i++) > + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i)); > + break; > + case BRW_TESS_DOMAIN_ISOLINE: > + for (unsigned i = 0; i < 2; i++) > + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i)); > + break; > + } > + break; > + > + case nir_intrinsic_load_tess_level_inner: > + /* When the TES reads gl_TessLevelInner, we ensure that the patch > header > + * appears as a push-model input. So, we can simply use the ATTR file > + * rather than issuing URB read messages. > + */ > + switch (tes_prog_data->domain) { > + case BRW_TESS_DOMAIN_QUAD: > + bld.MOV(dest, component(fs_reg(ATTR, 0), 3)); > + bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2)); > + break; > + case BRW_TESS_DOMAIN_TRI: > + bld.MOV(dest, component(fs_reg(ATTR, 0), 4)); > + break; > + case BRW_TESS_DOMAIN_ISOLINE: > + /* ignore - value is undefined */ > + break; > + } > + break; > + > + case nir_intrinsic_load_input: > + case nir_intrinsic_load_per_vertex_input: { > + fs_reg indirect_offset = get_indirect_offset(instr); > + unsigned imm_offset = instr->const_index[0]; > + > + fs_inst *inst; > + if (indirect_offset.file == BAD_FILE) { > + /* Replicate the patch handle to all enabled channels */ > + fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); > + bld.MOV(patch_handle, retype(brw_vec1_grf(0, 0), > BRW_REGISTER_TYPE_UD)); > + > + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle); > + inst->mlen = 1; > + } else { > + /* Indirect indexing - use per-slot offsets as well. */ > + const fs_reg srcs[] = { > + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), > + indirect_offset > + }; > + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); > + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); > + > + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, > payload); > + inst->mlen = 2; > + } > + inst->offset = imm_offset; > + inst->base_mrf = -1; > + inst->regs_written = instr->num_components; > + break; > + } > + default: > + nir_emit_intrinsic(bld, instr); > + break; > + } > +} > + > +void > fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr) > { > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 0582e78..b6405cd 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -700,7 +700,10 @@ fs_visitor::emit_urb_writes(const fs_reg > &gs_vertex_count) > fs_reg sources[8]; > fs_reg urb_handle; > > - urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); > + if (stage == MESA_SHADER_TESS_EVAL) > + urb_handle = fs_reg(retype(brw_vec8_grf(4, 0), BRW_REGISTER_TYPE_UD)); > + else > + urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); > > /* If we don't have any valid slots to write, just do a minimal urb write > * send to terminate the shader. This includes 1 slot of undefined data, > @@ -934,9 +937,11 @@ fs_visitor::fs_visitor(const struct brw_compiler > *compiler, void *log_data, > struct gl_program *prog, > const nir_shader *shader, > unsigned dispatch_width, > - int shader_time_index) > + int shader_time_index, > + const struct brw_vue_map *input_vue_map) > : backend_shader(compiler, log_data, mem_ctx, shader, prog_data), > key(key), gs_compile(NULL), prog_data(prog_data), prog(prog), > + input_vue_map(input_vue_map), > dispatch_width(dispatch_width), > shader_time_index(shader_time_index), > bld(fs_builder(this, dispatch_width).at_end()) > @@ -972,6 +977,9 @@ fs_visitor::init() > case MESA_SHADER_VERTEX: > key_tex = &((const brw_vs_prog_key *) key)->tex; > break; > + case MESA_SHADER_TESS_EVAL: > + key_tex = &((const brw_tes_prog_key *) key)->tex; > + break; > case MESA_SHADER_GEOMETRY: > key_tex = &((const brw_gs_prog_key *) key)->tex; > break; > diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp > b/src/mesa/drivers/dri/i965/brw_link.cpp > index 31d29ec..f5a7d20 100644 > --- a/src/mesa/drivers/dri/i965/brw_link.cpp > +++ b/src/mesa/drivers/dri/i965/brw_link.cpp > @@ -42,6 +42,7 @@ brw_shader_precompile(struct gl_context *ctx, > struct gl_shader_program *sh_prog) > { > struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; > + struct gl_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; > struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; > struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; > struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; > @@ -52,6 +53,9 @@ brw_shader_precompile(struct gl_context *ctx, > if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program)) > return false; > > + if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program)) > + return false; > + > if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program)) > return false; > > diff --git a/src/mesa/drivers/dri/i965/brw_program.h > b/src/mesa/drivers/dri/i965/brw_program.h > index 339b8e1..1cdab97 100644 > --- a/src/mesa/drivers/dri/i965/brw_program.h > +++ b/src/mesa/drivers/dri/i965/brw_program.h > @@ -56,6 +56,8 @@ void > brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog, > struct gl_shader *shader, struct gl_program *prog); > > +void brw_upload_tes_prog(struct brw_context *brw); > + > #ifdef __cplusplus > } /* extern "C" */ > #endif > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 7a6751b..d954568 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -24,6 +24,7 @@ > #include "brw_context.h" > #include "brw_cfg.h" > #include "brw_eu.h" > +#include "brw_fs.h" > #include "brw_nir.h" > #include "glsl/glsl_parser_extras.h" > #include "main/shaderobj.h" > @@ -84,6 +85,7 @@ brw_compiler_create(void *mem_ctx, const struct > brw_device_info *devinfo) > > compiler->scalar_stage[MESA_SHADER_VERTEX] = > devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); > + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true; > compiler->scalar_stage[MESA_SHADER_GEOMETRY] = > devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); > compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; > @@ -135,6 +137,8 @@ brw_compiler_create(void *mem_ctx, const struct > brw_device_info *devinfo) > compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; > } > > + > compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = > false; > + > if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) > > compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = > false; > > @@ -1289,3 +1293,93 @@ gl_clip_plane *brw_select_clip_planes(struct > gl_context *ctx) > } > } > > +extern "C" const unsigned * > +brw_compile_tes(const struct brw_compiler *compiler, > + void *log_data, > + void *mem_ctx, > + const struct brw_tes_prog_key *key, > + struct brw_tes_prog_data *prog_data, > + const nir_shader *src_shader, > + struct gl_shader_program *shader_prog, > + int shader_time_index, > + unsigned *final_assembly_size, > + char **error_str) There was some discussion of making a new brw_compiler.c to cover the brw_compiler.h internal 'API'. Of course, all the brw_compile_* functions require C++, so maybe it would need to be brw_compiler.cpp. Thoughts? ... Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > +{ > + const struct brw_device_info *devinfo = compiler->devinfo; > + struct gl_shader *shader = > + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; > + const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; > + > + nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); > + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); > + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); > + > + brw_compute_vue_map(devinfo, &prog_data->base.vue_map, > + nir->info.outputs_written, > + nir->info.separate_shader); > + > + unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; > + > + assert(output_size_bytes >= 1); > + if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { > + if (error_str) > + *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum > size"); > + return NULL; > + } > + > + /* URB entry sizes are stored as a multiple of 64 bytes. */ > + prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; > + > + struct brw_vue_map input_vue_map; > + brw_compute_tess_vue_map(&input_vue_map, > + nir->info.inputs_read & > ~VARYING_BIT_PRIMITIVE_ID, > + nir->info.patch_inputs_read); > + > + bool need_patch_header = nir->info.system_values_read & > + (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) | > + BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER)); > + > + /* The TES will pull most inputs using URB read messages. > + * > + * However, we push the patch header for TessLevel factors when required, > + * as it's a tiny amount of extra data. > + */ > + prog_data->base.urb_read_length = need_patch_header ? 1 : 0; > + > + if (unlikely(INTEL_DEBUG & DEBUG_TES)) { > + fprintf(stderr, "TES Input "); > + brw_print_vue_map(stderr, &input_vue_map); > + fprintf(stderr, "TES Output "); > + brw_print_vue_map(stderr, &prog_data->base.vue_map); > + } > + > + if (is_scalar) { > + fs_visitor v(compiler, log_data, mem_ctx, (void *) key, > + &prog_data->base.base, shader->Program, nir, 8, > + shader_time_index, &input_vue_map); > + if (!v.run_tes()) { > + if (error_str) > + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); > + return NULL; > + } > + > + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; > + > + fs_generator g(compiler, log_data, mem_ctx, (void *) key, > + &prog_data->base.base, v.promoted_constants, false, > + "TES"); > + if (unlikely(INTEL_DEBUG & DEBUG_TES)) { > + g.enable_debug(ralloc_asprintf(mem_ctx, > + "%s tessellation evaluation shader > %s", > + nir->info.label ? nir->info.label > + : "unnamed", > + nir->info.name)); > + } > + > + g.generate_code(v.cfg, 8); > + > + return g.get_assembly(final_assembly_size); > + } else { > + unreachable("XXX: vec4 tessellation evalation shaders not merged > yet."); > + } > +} > diff --git a/src/mesa/drivers/dri/i965/brw_shader.h > b/src/mesa/drivers/dri/i965/brw_shader.h > index 8c5778f..2e73f12 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.h > +++ b/src/mesa/drivers/dri/i965/brw_shader.h > @@ -273,6 +273,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage > stage, > bool brw_vs_precompile(struct gl_context *ctx, > struct gl_shader_program *shader_prog, > struct gl_program *prog); > +bool brw_tes_precompile(struct gl_context *ctx, > + struct gl_shader_program *shader_prog, > + struct gl_program *prog); > bool brw_gs_precompile(struct gl_context *ctx, > struct gl_shader_program *shader_prog, > struct gl_program *prog); > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c > b/src/mesa/drivers/dri/i965/brw_state_upload.c > index cf3cf97..c657b25 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c > @@ -678,6 +678,7 @@ brw_upload_programs(struct brw_context *brw, > { > if (pipeline == BRW_RENDER_PIPELINE) { > brw_upload_vs_prog(brw); > + brw_upload_tes_prog(brw); > > if (brw->gen < 6) > brw_upload_ff_gs_prog(brw); > @@ -691,6 +692,8 @@ brw_upload_programs(struct brw_context *brw, > bool old_separate = brw->vue_map_geom_out.separate; > if (brw->geometry_program) > brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map; > + else if (brw->tess_eval_program) > + brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map; > else > brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map; > > diff --git a/src/mesa/drivers/dri/i965/brw_tes.c > b/src/mesa/drivers/dri/i965/brw_tes.c > new file mode 100644 > index 0000000..daa8f86 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_tes.c > @@ -0,0 +1,300 @@ > +/* > + * Copyright © 2013 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > + > +/** > + * \file brw_tes.c > + * > + * Tessellation evaluation shader state upload code. > + */ > + > +#include "brw_context.h" > +#include "brw_nir.h" > +#include "brw_program.h" > +#include "brw_shader.h" > +#include "brw_state.h" > +#include "program/prog_parameter.h" > + > +static void > +brw_tes_debug_recompile(struct brw_context *brw, > + struct gl_shader_program *shader_prog, > + const struct brw_tes_prog_key *key) > +{ > + struct brw_cache_item *c = NULL; > + const struct brw_tes_prog_key *old_key = NULL; > + bool found = false; > + > + perf_debug("Recompiling tessellation evaluation shader for program %d\n", > + shader_prog->Name); > + > + for (unsigned int i = 0; i < brw->cache.size; i++) { > + for (c = brw->cache.items[i]; c; c = c->next) { > + if (c->cache_id == BRW_CACHE_TES_PROG) { > + old_key = c->key; > + > + if (old_key->program_string_id == key->program_string_id) > + break; > + } > + } > + if (c) > + break; > + } > + > + if (!c) { > + perf_debug(" Didn't find previous compile in the shader cache for " > + "debug\n"); > + return; > + } > + > + found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); > + > + if (!found) { > + perf_debug(" Something else\n"); > + } > +} > + > +static bool > +brw_codegen_tes_prog(struct brw_context *brw, > + struct gl_shader_program *shader_prog, > + struct brw_tess_eval_program *tep, > + struct brw_tes_prog_key *key) > +{ > + const struct brw_compiler *compiler = brw->intelScreen->compiler; > + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; > + struct brw_stage_state *stage_state = &brw->tes.base; > + nir_shader *nir = tep->program.Base.nir; > + struct brw_tes_prog_data prog_data; > + bool start_busy = false; > + double start_time = 0; > + > + memset(&prog_data, 0, sizeof(prog_data)); > + > + brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_EVAL, devinfo, > + shader_prog, &tep->program.Base, > + &prog_data.base.base, 0); > + > + switch (tep->program.Spacing) { > + case GL_EQUAL: > + prog_data.partitioning = BRW_TESS_PARTITIONING_INTEGER; > + break; > + case GL_FRACTIONAL_ODD: > + prog_data.partitioning = BRW_TESS_PARTITIONING_ODD_FRACTIONAL; > + break; > + case GL_FRACTIONAL_EVEN: > + prog_data.partitioning = BRW_TESS_PARTITIONING_EVEN_FRACTIONAL; > + break; > + default: > + unreachable("invalid domain shader spacing"); > + } > + > + switch (tep->program.PrimitiveMode) { > + case GL_QUADS: > + prog_data.domain = BRW_TESS_DOMAIN_QUAD; > + break; > + case GL_TRIANGLES: > + prog_data.domain = BRW_TESS_DOMAIN_TRI; > + break; > + case GL_ISOLINES: > + prog_data.domain = BRW_TESS_DOMAIN_ISOLINE; > + break; > + default: > + unreachable("invalid domain shader primitive mode"); > + } > + > + if (tep->program.PointMode) { > + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; > + } else if (tep->program.PrimitiveMode == GL_ISOLINES) { > + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; > + } else { > + /* Hardware winding order is backwards from OpenGL */ > + switch (tep->program.VertexOrder) { > + case GL_CCW: > + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW; > + break; > + case GL_CW: > + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; > + break; > + default: > + unreachable("invalid domain shader vertex order"); > + } > + } > + > + /* Allocate the references to the uniforms that will end up in the > + * prog_data associated with the compiled program, and which will be freed > + * by the state cache. > + * > + * Note: param_count needs to be num_uniform_components * 4, since we add > + * padding around uniform values below vec4 size, so the worst case is > that > + * every uniform is a float which gets padded to the size of a vec4. > + */ > + struct gl_shader *tes = > shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; > + int param_count = nir->num_uniforms; > + if (!compiler->scalar_stage[MESA_SHADER_TESS_EVAL]) > + param_count *= 4; > + > + prog_data.base.base.param = > + rzalloc_array(NULL, const gl_constant_value *, param_count); > + prog_data.base.base.pull_param = > + rzalloc_array(NULL, const gl_constant_value *, param_count); > + prog_data.base.base.image_param = > + rzalloc_array(NULL, struct brw_image_param, tes->NumImages); > + prog_data.base.base.nr_params = param_count; > + prog_data.base.base.nr_image_params = tes->NumImages; > + > + brw_nir_setup_glsl_uniforms(nir, shader_prog, &tep->program.Base, > + &prog_data.base.base, > + > compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); > + > + if (unlikely(INTEL_DEBUG & DEBUG_TES)) > + brw_dump_ir("tessellation evaluation", shader_prog, tes, NULL); > + > + int st_index = -1; > + if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME)) > + st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_TES); > + > + if (unlikely(brw->perf_debug)) { > + start_busy = brw->batch.last_bo && > drm_intel_bo_busy(brw->batch.last_bo); > + start_time = get_time(); > + } > + > + void *mem_ctx = ralloc_context(NULL); > + unsigned program_size; > + char *error_str; > + const unsigned *program = > + brw_compile_tes(compiler, brw, mem_ctx, key, &prog_data, nir, > + shader_prog, st_index, &program_size, &error_str); > + if (program == NULL) { > + if (shader_prog) { > + shader_prog->LinkStatus = false; > + ralloc_strcat(&shader_prog->InfoLog, error_str); > + } > + > + _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: > " > + "%s\n", error_str); > + > + ralloc_free(mem_ctx); > + return false; > + } > + > + if (unlikely(brw->perf_debug)) { > + struct brw_shader *btes = (struct brw_shader *) tes; > + if (btes->compiled_once) { > + brw_tes_debug_recompile(brw, shader_prog, key); > + } > + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { > + perf_debug("TES compile took %.03f ms and stalled the GPU\n", > + (get_time() - start_time) * 1000); > + } > + btes->compiled_once = true; > + } > + > + /* Scratch space is used for register spilling */ > + if (prog_data.base.base.total_scratch) { > + brw_get_scratch_bo(brw, &stage_state->scratch_bo, > + prog_data.base.base.total_scratch * > + brw->max_ds_threads); > + } > + > + brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG, > + key, sizeof(*key), > + program, program_size, > + &prog_data, sizeof(prog_data), > + &stage_state->prog_offset, &brw->tes.prog_data); > + ralloc_free(mem_ctx); > + > + return true; > +} > + > + > +void > +brw_upload_tes_prog(struct brw_context *brw) > +{ > + struct gl_context *ctx = &brw->ctx; > + struct gl_shader_program **current = ctx->_Shader->CurrentProgram; > + struct brw_stage_state *stage_state = &brw->tes.base; > + struct brw_tes_prog_key key; > + /* BRW_NEW_TESS_EVAL_PROGRAM */ > + struct brw_tess_eval_program *tep = > + (struct brw_tess_eval_program *) brw->tess_eval_program; > + > + if (!brw_state_dirty(brw, > + _NEW_TEXTURE, > + BRW_NEW_TESS_EVAL_PROGRAM)) > + return; > + > + if (tep == NULL) { > + /* Other state atoms had better not try to access prog_data, since > + * there's no TES program. > + */ > + brw->tes.prog_data = NULL; > + brw->tes.base.prog_data = NULL; > + return; > + } > + > + struct gl_program *prog = &tep->program.Base; > + > + memset(&key, 0, sizeof(key)); > + > + key.program_string_id = tep->id; > + > + /* _NEW_TEXTURE */ > + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, > + &key.tex); > + > + if (!brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, > + &key, sizeof(key), > + &stage_state->prog_offset, &brw->tes.prog_data)) { > + bool success = brw_codegen_tes_prog(brw, > current[MESA_SHADER_TESS_EVAL], > + tep, &key); > + assert(success); > + (void)success; > + } > + brw->tes.base.prog_data = &brw->tes.prog_data->base.base; > +} > + > + > +bool > +brw_tes_precompile(struct gl_context *ctx, > + struct gl_shader_program *shader_prog, > + struct gl_program *prog) > +{ > + struct brw_context *brw = brw_context(ctx); > + struct brw_tes_prog_key key; > + uint32_t old_prog_offset = brw->tes.base.prog_offset; > + struct brw_tes_prog_data *old_prog_data = brw->tes.prog_data; > + bool success; > + > + struct gl_tess_eval_program *tep = (struct gl_tess_eval_program *)prog; > + struct brw_tess_eval_program *btep = brw_tess_eval_program(tep); > + > + memset(&key, 0, sizeof(key)); > + > + key.program_string_id = btep->id; > + brw_setup_tex_for_precompile(brw, &key.tex, prog); > + > + success = brw_codegen_tes_prog(brw, shader_prog, btep, &key); > + > + brw->tes.base.prog_offset = old_prog_offset; > + brw->tes.prog_data = old_prog_data; > + > + return success; > +} > -- > 2.6.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev