Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_cs.cpp | 216 +++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_state_upload.c | 3 + 3 files changed, 220 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index fb24f0e..170c0c6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -148,6 +148,7 @@ struct brw_vs_prog_key; struct brw_vue_prog_key; struct brw_wm_prog_key; struct brw_wm_prog_data; +struct brw_cs_prog_key; struct brw_cs_prog_data; enum brw_pipeline { diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 8021147..5be740c 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -22,8 +22,15 @@ */ +#include "util/ralloc.h" #include "brw_context.h" #include "brw_cs.h" +#include "brw_fs.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "intel_mipmap_tree.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" extern "C" bool @@ -46,3 +53,212 @@ brw_cs_prog_data_compare(const void *in_a, const void *in_b) return true; } + + +static const unsigned * +brw_cs_emit(struct brw_context *brw, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + struct gl_compute_program *cp, + struct gl_shader_program *prog, + unsigned *final_assembly_size) +{ + bool start_busy = false; + double start_time = 0; + + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && + drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } + + struct brw_shader *shader = NULL; + if (prog) + shader = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + brw_dump_ir("compute", prog, &shader->base, &cp->Base); + + /* Now the main event: Visit the shader IR and generate our CS IR for it. + */ + fs_visitor v(brw, mem_ctx, key, prog_data, prog, cp, 8); + if (!v.run_cs()) { + if (prog) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + } + + _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", + v.fail_msg); + + return NULL; + } + + cfg_t *simd16_cfg = NULL; + fs_visitor v2(brw, mem_ctx, key, prog_data, prog, cp, 16); + if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) { + if (!v.simd16_unsupported) { + /* Try a SIMD16 compile */ + v2.import_uniforms(&v); + if (!v2.run_cs()) { + perf_debug("SIMD16 shader failed to compile, falling back to " + "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg); + } else { + simd16_cfg = v2.cfg; + } + } else { + perf_debug("SIMD16 shader unsupported, falling back to " + "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg); + } + } + + prog_data->local_size[0] = cp->LocalSize[0]; + prog_data->local_size[1] = cp->LocalSize[1]; + prog_data->local_size[2] = cp->LocalSize[2]; + + cfg_t *simd8_cfg; + int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8; + if (no_simd8 && simd16_cfg) { + simd8_cfg = NULL; + prog_data->no_8 = true; + } else { + simd8_cfg = v.cfg; + prog_data->no_8 = false; + } + + fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base, + v.runtime_check_aads_emit, "CS"); + if (INTEL_DEBUG & DEBUG_CS) { + char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", + prog->Label ? prog->Label : "unnamed", + prog->Name); + g.enable_debug(name); + } + if (simd16_cfg) { + prog_data->simd_size = 16; + g.generate_code(simd16_cfg, 16); + } else if (simd8_cfg) { + prog_data->simd_size = 8; + g.generate_code(simd8_cfg, 8); + } + + if (unlikely(brw->perf_debug) && shader) { + if (shader->compiled_once) { + _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); + } + shader->compiled_once = true; + + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("CS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + } + + return g.get_assembly(final_assembly_size); +} + +static bool +do_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data prog_data; + struct gl_shader *cs = NULL; + + if (prog) + cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + + memset(&prog_data, 0, sizeof(prog_data)); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (cs) { + param_count = cs->num_uniform_components; + } else { + param_count = cp->program.Base.Parameters->NumParameters * 4; + } + + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; + prog_data.base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.nr_params = param_count; + + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, + &cp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + // if (prog_data.total_scratch) { + // brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo, + // prog_data.total_scratch * brw->max_wm_threads); + // } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG, + key, sizeof(*key), + program, program_size, + &prog_data, sizeof(prog_data), + &brw->cs.base.prog_offset, &brw->cs.prog_data); + ralloc_free(mem_ctx); + + return true; +} + + +static void +brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key) +{ + /* BRW_NEW_COMPUTE_PROGRAM */ + const struct brw_compute_program *cp = + (struct brw_compute_program *) brw->compute_program; + + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = cp->id; +} + + +extern "C" +void +brw_upload_cs_prog(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_cs_prog_key key; + struct brw_compute_program *cp = (struct brw_compute_program *) + brw->compute_program; + + if (!cp) + return; + + if (!brw_state_dirty(brw, 0, BRW_NEW_COMPUTE_PROGRAM)) + return; + + brw_cs_populate_key(brw, &key); + + if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, + &key, sizeof(key), + &brw->cs.base.prog_offset, &brw->cs.prog_data)) { + bool success = do_cs_prog(brw, ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE], cp, + &key); + (void) success; + assert(success); + } + brw->cs.base.prog_data = &brw->cs.prog_data->base; +} diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 25f2ff0..952d3c1 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -40,6 +40,7 @@ #include "brw_ff_gs.h" #include "brw_gs.h" #include "brw_wm.h" +#include "brw_cs.h" static const struct brw_tracked_state *gen4_atoms[] = { @@ -606,6 +607,8 @@ brw_upload_programs(struct brw_context *brw) brw_upload_gs_prog(brw); brw_upload_wm_prog(brw); + + brw_upload_cs_prog(brw); } static inline void -- 2.1.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev