For now, this pass can handle ssbo load combines within the same block. This is useful, for example, to make code such as this:
buffer SSBO { mat4 sm4; }; uniform mat4 um4; void main() { sm4 *= um4; } go from 16 SSBO loads down to only 4. --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_opt_load_combine.c | 357 ++++++++++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+) create mode 100644 src/glsl/nir/nir_opt_load_combine.c diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index ca87036..6f12434 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -67,6 +67,7 @@ NIR_FILES = \ nir/nir_opt_dead_cf.c \ nir/nir_opt_gcm.c \ nir/nir_opt_global_to_local.c \ + nir/nir_opt_load_combine.c \ nir/nir_opt_peephole_ffma.c \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 04a21a7..8881fc7 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2015,6 +2015,8 @@ bool nir_opt_dead_cf(nir_shader *shader); void nir_opt_gcm(nir_shader *shader); +bool nir_opt_load_combine(nir_shader *shader); + bool nir_opt_peephole_select(nir_shader *shader); bool nir_opt_peephole_ffma(nir_shader *shader); diff --git a/src/glsl/nir/nir_opt_load_combine.c b/src/glsl/nir/nir_opt_load_combine.c new file mode 100644 index 0000000..926b1ab --- /dev/null +++ b/src/glsl/nir/nir_opt_load_combine.c @@ -0,0 +1,357 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Iago Toral Quiroga <ito...@igalia.com> + * + */ + +/* + * Implements a load-combine pass for load/store instructions. Similar to a + * CSE pass, but needs to consider invalidation of cached loads by stores + * or memory barriers. It only works on local blocks for now. + */ + +#include "nir_instr_set.h" + +/* + * SSBO stores won't invalidate image loads for example, so we want to + * classify load/store operations in groups and only invalidate / reuse + * intrinsics in the same group. + */ +enum intrinsic_groups { + INTRINSIC_GROUP_NONE = 0, + INTRINSIC_GROUP_SSBO +}; + +/* SSBO load/store */ +static bool +is_indirect_store_ssbo(nir_intrinsic_instr *intrinsic) +{ + switch (intrinsic->intrinsic) { + case nir_intrinsic_store_ssbo_indirect: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + return true; + default: + return false; + } +} + +static bool +is_direct_store_ssbo(nir_intrinsic_instr *intrinsic) +{ + switch (intrinsic->intrinsic) { + case nir_intrinsic_store_ssbo: + return true; + default: + return false; + } +} + +static bool +is_indirect_load_ssbo(nir_intrinsic_instr *intrinsic) +{ + switch (intrinsic->intrinsic) { + case nir_intrinsic_load_ssbo_indirect: + return true; + default: + return false; + } +} + +static bool +is_direct_load_ssbo(nir_intrinsic_instr *intrinsic) +{ + switch (intrinsic->intrinsic) { + case nir_intrinsic_load_ssbo: + return true; + default: + return false; + } +} + +/* + * General load/store functions: we'll add more groups to this as needed. + * For now we only support SSBOs. + */ +static bool +is_indirect_store(nir_intrinsic_instr *intrinsic) +{ + return is_indirect_store_ssbo(intrinsic); +} + +static bool +is_direct_store(nir_intrinsic_instr *intrinsic) +{ + return is_direct_store_ssbo(intrinsic); +} + +static bool +is_store(nir_intrinsic_instr *intrinsic) +{ + return is_direct_store(intrinsic) || is_indirect_store(intrinsic); +} + +static bool +is_indirect_load(nir_intrinsic_instr *intrinsic) +{ + return is_indirect_load_ssbo(intrinsic); +} + +static bool +is_direct_load(nir_intrinsic_instr *intrinsic) +{ + return is_direct_load_ssbo(intrinsic); +} + +static bool +is_load(nir_intrinsic_instr *intrinsic) +{ + return is_direct_load(intrinsic) || is_indirect_load(intrinsic); +} + +static bool +is_memory_barrier(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_memory_barrier; +} + +static void +set_clear(struct nir_instr_set *instr_set) +{ + struct set_entry *entry; + set_foreach(instr_set->set, entry) + _mesa_set_remove(instr_set->set, entry); +} + +static unsigned +intrinsic_group(nir_intrinsic_instr *intrinsic) +{ + if (is_direct_load_ssbo(intrinsic) || + is_indirect_load_ssbo(intrinsic) || + is_direct_store_ssbo(intrinsic) || + is_indirect_store_ssbo(intrinsic)) + return INTRINSIC_GROUP_SSBO; + return INTRINSIC_GROUP_NONE; +} + +static bool +intrinsic_group_match(nir_intrinsic_instr *intrinsic1, + nir_intrinsic_instr *intrinsic2) +{ + return intrinsic_group(intrinsic1) == intrinsic_group(intrinsic2); +} + +/* + * Gets the block and offset of a load/store instruction. + * + * @instr: the intrinsic load/store operation + * @block: the block index + * @offset: the indirect offset (NULL for direct offset) + * @const_offset: the direct offset (only if offset is not NULL) + * + * Each out parameter can be set to NULL if we are not interested in it. + */ +static void +get_load_store_address(nir_intrinsic_instr *instr, + nir_src **block, + nir_src **offset, + unsigned *const_offset) +{ + int block_index = -1; + int offset_index = -1; + int const_offset_index = -1; + + switch (instr->intrinsic) { + case nir_intrinsic_load_ssbo: + block_index = 0; + const_offset_index = 0; + break; + case nir_intrinsic_load_ssbo_indirect: + block_index = 0; + offset_index = 1; + break; + case nir_intrinsic_store_ssbo: + block_index = 1; + const_offset_index = 0; + break; + case nir_intrinsic_store_ssbo_indirect: + block_index = 1; + offset_index = 2; + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + block_index = 0; + offset_index = 1; + break; + default: + assert(!"not implemented"); + } + + assert(block_index >= 0 && (offset_index >= 0 || const_offset_index >= 0)); + + if (block) + *block = &instr->src[block_index]; + + if (offset && offset_index >= 0) + *offset = &instr->src[offset_index]; + + if (const_offset && const_offset_index >= 0) + *const_offset = instr->const_index[const_offset_index]; +} + +/* + * Traverses the set of cached load/store intrinsics and invalidates all that + * conflict with @store. + */ +static void +set_invalidate_for_store(struct nir_instr_set *instr_set, + nir_intrinsic_instr *store) +{ + assert(is_store(store)); + + bool store_is_indirect = is_indirect_store(store); + + nir_src *store_block; + unsigned store_offset; + if (!store_is_indirect) + get_load_store_address(store, &store_block, NULL, &store_offset); + + for (struct set_entry *entry = _mesa_set_next_entry(instr_set->set, NULL); + entry != NULL; entry = _mesa_set_next_entry(instr_set->set, entry)) { + + /* Only invalidate instructions in the same load/store group */ + assert(((nir_instr *) entry->key)->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *cached = + nir_instr_as_intrinsic((nir_instr *) entry->key); + if (!intrinsic_group_match(store, cached)) + continue; + + bool cached_is_indirect = + is_indirect_load(cached) || is_indirect_store(cached); + if (store_is_indirect || cached_is_indirect) { + nir_instr_set_remove(instr_set, (nir_instr *) entry->key); + } else { + /* direct store and cached */ + nir_src *cached_block; + unsigned cached_offset; + get_load_store_address(cached, &cached_block, NULL, &cached_offset); + + /* offset and block must match */ + if (store_offset != cached_offset) + continue; + + if (!nir_srcs_equal(*store_block, *cached_block) && + store_block->ssa->parent_instr->type == + cached_block->ssa->parent_instr->type) + continue; + + nir_instr_set_remove(instr_set, (nir_instr *) entry->key); + } + + } +} + +static bool +load_combine_block(nir_block *block) +{ + bool progress = false; + + /* This pass only works on local blocks for now, so we create and destroy + * the instruction set with each block. + */ + struct nir_instr_set *instr_set = nir_instr_set_create(NULL, true); + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); + if (is_load(intrinsic)) { + /* Try to rewrite with a previous load */ + if (nir_instr_set_add_or_rewrite(instr_set, instr)) { + progress = true; + nir_instr_remove(instr); + } + } else if (is_store(intrinsic)) { + /* Invalidate conflicting load/stores */ + set_invalidate_for_store(instr_set, intrinsic); + } else if (is_memory_barrier(intrinsic)) { + /* If we see a memory barrier we have to invalidate all cached + * load/store operations + */ + set_clear(instr_set); + } + } + + nir_instr_set_destroy(instr_set); + + for (unsigned i = 0; i < block->num_dom_children; i++) { + nir_block *child = block->dom_children[i]; + progress |= load_combine_block(child); + } + + return progress; +} + +static bool +nir_opt_load_combine_impl(nir_function_impl *impl) +{ + nir_metadata_require(impl, nir_metadata_dominance); + + bool progress = load_combine_block(nir_start_block(impl)); + + if (progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + return progress; +} + +bool +nir_opt_load_combine(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_load_combine_impl(overload->impl); + } + + return progress; +} -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev