With the advent of SPIR-V subgroup operations, compute shaders will have to be slightly different depending on the SIMD size at which they execute. In order to allow us to do dispatch-width specific things in NIR, we re-run the final NIR stages for each sIMD width.
One side-effect of this change is that we start rallocing fs_visitors which means we need DECLARE_RALLOC_CXX_OPERATORS. --- src/intel/compiler/brw_fs.cpp | 103 ++++++++++++++++++++++++++---------------- src/intel/compiler/brw_fs.h | 2 + 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5a35a33..a548df7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6768,6 +6768,20 @@ cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size) cs_prog_data->threads = (group_size + size - 1) / size; } +static nir_shader * +compile_cs_to_nir(const struct brw_compiler *compiler, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + const nir_shader *src_shader, + unsigned dispatch_width) +{ + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); + brw_nir_lower_cs_intrinsics(shader); + return brw_postprocess_nir(shader, compiler, true); +} + const unsigned * brw_compile_cs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, @@ -6778,17 +6792,12 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, unsigned *final_assembly_size, char **error_str) { - nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); - shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); - brw_nir_lower_cs_intrinsics(shader); - shader = brw_postprocess_nir(shader, compiler, true); - - prog_data->local_size[0] = shader->info.cs.local_size[0]; - prog_data->local_size[1] = shader->info.cs.local_size[1]; - prog_data->local_size[2] = shader->info.cs.local_size[2]; + prog_data->local_size[0] = src_shader->info.cs.local_size[0]; + prog_data->local_size[1] = src_shader->info.cs.local_size[1]; + prog_data->local_size[2] = src_shader->info.cs.local_size[2]; unsigned local_workgroup_size = - shader->info.cs.local_size[0] * shader->info.cs.local_size[1] * - shader->info.cs.local_size[2]; + src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1] * + src_shader->info.cs.local_size[2]; unsigned min_dispatch_width = DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_threads); @@ -6796,71 +6805,87 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, min_dispatch_width = util_next_power_of_two(min_dispatch_width); assert(min_dispatch_width <= 32); + + fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL; cfg_t *cfg = NULL; const char *fail_msg = NULL; + unsigned promoted_constants; /* Now the main event: Visit the shader IR and generate our CS IR for it. */ - fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base, - NULL, /* Never used in core profile */ - shader, 8, shader_time_index); if (min_dispatch_width <= 8) { - if (!v8.run_cs(min_dispatch_width)) { - fail_msg = v8.fail_msg; + nir_shader *nir8 = compile_cs_to_nir(compiler, mem_ctx, key, + prog_data, src_shader, 8); + v8 = new(mem_ctx) fs_visitor(compiler, log_data, mem_ctx, key, + &prog_data->base, + NULL, /* Never used in core profile */ + nir8, 8, shader_time_index); + if (!v8->run_cs(min_dispatch_width)) { + fail_msg = v8->fail_msg; } else { - cfg = v8.cfg; + cfg = v8->cfg; cs_set_simd_size(prog_data, 8); cs_fill_push_const_info(compiler->devinfo, prog_data); + promoted_constants = v8->promoted_constants; } } - fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base, - NULL, /* Never used in core profile */ - shader, 16, shader_time_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && min_dispatch_width <= 16) { /* Try a SIMD16 compile */ - if (min_dispatch_width <= 8) - v16.import_uniforms(&v8); - if (!v16.run_cs(min_dispatch_width)) { + nir_shader *nir16 = compile_cs_to_nir(compiler, mem_ctx, key, + prog_data, src_shader, 16); + v16 = new(mem_ctx) fs_visitor(compiler, log_data, mem_ctx, key, + &prog_data->base, + NULL, /* Never used in core profile */ + nir16, 16, shader_time_index); + if (v8) + v16->import_uniforms(v8); + + if (!v16->run_cs(min_dispatch_width)) { compiler->shader_perf_log(log_data, "SIMD16 shader failed to compile: %s", - v16.fail_msg); + v16->fail_msg); if (!cfg) { fail_msg = "Couldn't generate SIMD16 program and not " "enough threads for SIMD8"; } } else { - cfg = v16.cfg; + cfg = v16->cfg; cs_set_simd_size(prog_data, 16); cs_fill_push_const_info(compiler->devinfo, prog_data); + promoted_constants = v16->promoted_constants; } } - fs_visitor v32(compiler, log_data, mem_ctx, key, &prog_data->base, - NULL, /* Never used in core profile */ - shader, 32, shader_time_index); if (!fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32))) { /* Try a SIMD32 compile */ - if (min_dispatch_width <= 8) - v32.import_uniforms(&v8); - else if (min_dispatch_width <= 16) - v32.import_uniforms(&v16); - - if (!v32.run_cs(min_dispatch_width)) { + nir_shader *nir32 = compile_cs_to_nir(compiler, mem_ctx, key, + prog_data, src_shader, 32); + v32 = new(mem_ctx) fs_visitor(compiler, log_data, mem_ctx, key, + &prog_data->base, + NULL, /* Never used in core profile */ + nir32, 32, shader_time_index); + if (v8) + v32->import_uniforms(v8); + else if (v16) + v32->import_uniforms(v16); + + if (!v32->run_cs(min_dispatch_width)) { compiler->shader_perf_log(log_data, "SIMD32 shader failed to compile: %s", - v16.fail_msg); + v16->fail_msg); if (!cfg) { fail_msg = "Couldn't generate SIMD32 program and not " "enough threads for SIMD16"; } } else { - cfg = v32.cfg; + cfg = v32->cfg; cs_set_simd_size(prog_data, 32); cs_fill_push_const_info(compiler->devinfo, prog_data); + promoted_constants = v32->promoted_constants; } } @@ -6873,12 +6898,12 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, } fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base, - v8.promoted_constants, false, MESA_SHADER_COMPUTE); + promoted_constants, false, MESA_SHADER_COMPUTE); if (INTEL_DEBUG & DEBUG_CS) { char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s", - shader->info.label ? shader->info.label : - "unnamed", - shader->info.name); + src_shader->info.label ? + src_shader->info.label : "unnamed", + src_shader->info.name); g.enable_debug(name); } diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f51a4d8..d3ab385 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -60,6 +60,8 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta) class fs_visitor : public backend_shader { public: + DECLARE_RALLOC_CXX_OPERATORS(fs_reg) + fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const void *key, -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev