From: Marek Olšák <marek.ol...@amd.com> --- src/compiler/nir/nir.h | 8 +++++ src/compiler/nir/nir_opt_intrinsics.c | 40 +++++++++++++++++++++-- src/gallium/drivers/radeonsi/si_get.c | 1 + src/mesa/state_tracker/st_glsl_to_nir.cpp | 1 + 4 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 09950bf3398..d3874705235 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options { * * This depends on some possibly hw implementation details, which may * not be true for all hw. In particular that the FS is only executed * for covered samples or for helper invocations. So, do not blindly * enable this option. * * Note: See also issue #22 in ARB_shader_image_load_store */ bool lower_helper_invocation; + /** + * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: + * + * gl_SampleMaskIn == 0 ---> gl_HelperInvocation + * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation + */ + bool optimize_sample_mask_in; + bool lower_cs_local_index_from_id; bool lower_cs_local_id_from_index; bool lower_device_index_to_zero; /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ bool lower_wpos_pntc; bool lower_hadd; bool lower_add_sat; diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index 7b054faa204..2f9e4e466c9 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -22,21 +22,22 @@ */ #include "nir.h" #include "nir_builder.h" /** * \file nir_opt_intrinsics.c */ static bool -opt_intrinsics_impl(nir_function_impl *impl) +opt_intrinsics_impl(nir_function_impl *impl, + const struct nir_shader_compiler_options *options) { nir_builder b; nir_builder_init(&b, impl); bool progress = false; nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl) case nir_intrinsic_vote_any: case nir_intrinsic_vote_all: if (nir_src_is_const(intrin->src[0])) replacement = nir_ssa_for_src(&b, intrin->src[0], 1); break; case nir_intrinsic_vote_feq: case nir_intrinsic_vote_ieq: if (nir_src_is_const(intrin->src[0])) replacement = nir_imm_true(&b); break; + case nir_intrinsic_load_sample_mask_in: + /* Transform: + * gl_SampleMaskIn == 0 ---> gl_HelperInvocation + * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation + */ + if (!options->optimize_sample_mask_in) + continue; + + nir_foreach_use_safe(use_src, &intrin->dest.ssa) { + if (use_src->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr); + + if (alu->op == nir_op_ieq || + alu->op == nir_op_ine) { + /* Check for 0 in either operand. */ + nir_const_value *const_val = + nir_src_as_const_value(alu->src[0].src); + if (!const_val) + const_val = nir_src_as_const_value(alu->src[1].src); + if (!const_val || const_val->i32[0] != 0) + continue; + + nir_ssa_def *new_expr = nir_load_helper_invocation(&b, 1); + + if (alu->op == nir_op_ine32) + new_expr = nir_inot(&b, new_expr); + + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, + nir_src_for_ssa(new_expr)); + nir_instr_remove(&alu->instr); + continue; + } + } + } + continue; default: break; } if (!replacement) continue; nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(replacement)); nir_instr_remove(instr); @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl) bool nir_opt_intrinsics(nir_shader *shader) { bool progress = false; nir_foreach_function(function, shader) { if (!function->impl) continue; - if (opt_intrinsics_impl(function->impl)) { + if (opt_intrinsics_impl(function->impl, shader->options)) { progress = true; nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); } } return progress; } diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 58b56b34d13..2142d5a33f2 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -494,20 +494,21 @@ static const struct nir_shader_compiler_options nir_options = { .lower_pack_snorm_2x16 = true, .lower_pack_snorm_4x8 = true, .lower_pack_unorm_2x16 = true, .lower_pack_unorm_4x8 = true, .lower_unpack_snorm_2x16 = true, .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, .lower_extract_byte = true, .lower_extract_word = true, + .optimize_sample_mask_in = true, .max_unroll_iterations = 32, .native_integers = true, }; static const void * si_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir, enum pipe_shader_type shader) { assert(ir == PIPE_SHADER_IR_NIR); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index fb10869c9f9..8028ccf7110 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog, /* This has to be done last. Any operation the can cause * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true); st_set_prog_affected_state_flags(prog); NIR_PASS_V(nir, st_nir_lower_builtin); NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true); + NIR_PASS_V(nir, nir_opt_intrinsics); nir_variable_mode mask = nir_var_function_temp; nir_remove_dead_variables(nir, mask); if (st->ctx->_Shader->Flags & GLSL_DUMP) { _mesa_log("\n"); _mesa_log("NIR IR for linked %s program %d:\n", _mesa_shader_stage_to_string(prog->info.stage), shader_program->Name); nir_print_shader(nir, _mesa_get_log_file()); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev