--- src/compiler/nir/nir_opt_load_combine.c | 155 ++++++++++++++++++++++++++------ 1 file changed, 129 insertions(+), 26 deletions(-)
diff --git a/src/compiler/nir/nir_opt_load_combine.c b/src/compiler/nir/nir_opt_load_combine.c index 09a34c6..3e3d066 100644 --- a/src/compiler/nir/nir_opt_load_combine.c +++ b/src/compiler/nir/nir_opt_load_combine.c @@ -38,7 +38,8 @@ enum intrinsic_groups { INTRINSIC_GROUP_NONE = 0, INTRINSIC_GROUP_ALL, - INTRINSIC_GROUP_SSBO + INTRINSIC_GROUP_SSBO, + INTRINSIC_GROUP_SHARED }; struct cache_node { @@ -86,33 +87,71 @@ is_memory_barrier_buffer(nir_intrinsic_instr *intrinsic) return intrinsic->intrinsic == nir_intrinsic_memory_barrier_buffer; } -/* - * General load/store functions: we'll add more groups to this as needed. - * For now we only support SSBOs. - */ +/* Shared variable load/store */ +static bool +is_atomic_shared(nir_intrinsic_instr *intrinsic) +{ + switch (intrinsic->intrinsic) { + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + return true; + default: + return false; + } +} + +static inline bool +is_store_shared(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_store_shared || + is_atomic_shared(intrinsic); +} + +static inline bool +is_load_shared(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_load_shared; +} + +static inline bool +is_memory_barrier_shared(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_memory_barrier_shared; +} + +/* General intrinsic classification functions */ static inline bool is_store(nir_intrinsic_instr *intrinsic) { - return is_store_ssbo(intrinsic); + return is_store_ssbo(intrinsic) || is_store_shared(intrinsic); } static inline bool is_load(nir_intrinsic_instr *intrinsic) { - return is_load_ssbo(intrinsic); + return is_load_ssbo(intrinsic) || is_load_shared(intrinsic); } static inline bool is_atomic(nir_intrinsic_instr *intrinsic) { - return is_atomic_ssbo(intrinsic); + return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic); } static inline bool is_memory_barrier(nir_intrinsic_instr *intrinsic) { return intrinsic->intrinsic == nir_intrinsic_memory_barrier || - is_memory_barrier_buffer(intrinsic); + is_memory_barrier_buffer(intrinsic) || + is_memory_barrier_shared(intrinsic); } static unsigned @@ -123,6 +162,9 @@ intrinsic_group(nir_intrinsic_instr *intrinsic) else if (is_load_ssbo(intrinsic) || is_store_ssbo(intrinsic) || is_memory_barrier_buffer(intrinsic)) return INTRINSIC_GROUP_SSBO; + else if (is_load_shared(intrinsic) || is_store_shared(intrinsic) || + is_memory_barrier_shared(intrinsic)) + return INTRINSIC_GROUP_SHARED; else return INTRINSIC_GROUP_NONE; } @@ -178,18 +220,21 @@ nir_src_is_direct(nir_src *src) * Gets the block and offset of a load/store instruction. * * @instr: the intrinsic load/store operation - * @block: the output block + * @block: the output block, can be NULL if @base is non-NULL (shared-vars) * @offset: the output offset + * @base: the output base, can be NULL if @block is non-NULL (SSBO) */ static void get_load_store_address(nir_intrinsic_instr *instr, nir_src **block, - nir_src **offset) + nir_src **offset, + unsigned *base) { int block_index = -1; int offset_index = -1; + int base_index = -1; - assert(block && offset); + assert((block || base) && offset); switch (instr->intrinsic) { /* SSBO */ @@ -213,14 +258,46 @@ get_load_store_address(nir_intrinsic_instr *instr, offset_index = 1; break; + /* Shared-variables memory access is defined by a direct + * value 'base' (const_index[0]) and an indirect SSA value 'offset'. + */ + case nir_intrinsic_load_shared: + base_index = 0; + offset_index = 0; + break; + + case nir_intrinsic_store_shared: + base_index = 0; + offset_index = 1; + break; + + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + base_index = 0; + offset_index = 0; + break; + default: assert(!"not implemented"); } - assert(block_index >= 0 && offset_index >= 0); + assert((block_index >= 0 || base_index >= 0) && offset_index >= 0); + + if (block && block_index >= 0) + *block = &instr->src[block_index]; - *block = &instr->src[block_index]; *offset = &instr->src[offset_index]; + + if (base && base_index >= 0) + *base = instr->const_index[base_index]; } /** @@ -242,10 +319,13 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1, { nir_src *instr1_block = NULL; nir_src *instr1_offset = NULL; + unsigned instr1_base = 0; nir_src *instr2_block = NULL; nir_src *instr2_offset = NULL; + unsigned instr2_base = 0; bool blocks_match = false; bool offsets_match = false; + bool bases_match = false; if (full_match) *full_match = false; @@ -254,21 +334,30 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1, if (!intrinsic_group_match(instr1, instr2)) return false; - get_load_store_address(instr1, &instr1_block, &instr1_offset); - get_load_store_address(instr2, &instr2_block, &instr2_offset); + get_load_store_address(instr1, &instr1_block, &instr1_offset, &instr1_base); + get_load_store_address(instr2, &instr2_block, &instr2_offset, &instr2_base); - /* There is conflict if the blocks and the offsets of each instruction - * are not both direct or both indirect. If that's not the case, then - * there is conflict if the blocks and offsets match. + /* There is conflict if the blocks (or bases) and the offsets of each + * instruction are not both direct or both indirect. If that's not the + * case, then there is conflict if the blocks (or bases) and offsets + * all match. */ - /* For SSBOs the block is an SSA value, but it can still be direct, - * if defined by a load_const instruction. - */ - if (nir_src_is_direct(instr1_block) != nir_src_is_direct(instr2_block)) - return true; + /* only SSBOs have a block source, so it can be NULL for shared-var */ + if (instr1_block) { + if (! instr2_block) + return true; - blocks_match = nir_srcs_equal(*instr1_block, *instr2_block); + /* For SSBOs the block is an SSA value, but it can still be direct, + * if defined by a load_const instruction. + */ + if (nir_src_is_direct(instr1_block) != nir_src_is_direct(instr2_block)) + return true; + + blocks_match = nir_srcs_equal(*instr1_block, *instr2_block); + } else { + bases_match = instr1_base == instr2_base; + } /* For SSBOs, the offset is an SSA value, but it can still be direct, *if defined by a load_const instruction. @@ -279,7 +368,7 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1, offsets_match = nir_srcs_equal(*instr1_offset, *instr2_offset); /* finally, if both blocks and offsets match, it means conflict */ - if (offsets_match && blocks_match) { + if (offsets_match && (blocks_match || bases_match)) { if (full_match) *full_match = true; @@ -296,6 +385,9 @@ get_store_writemask(nir_intrinsic_instr *instr) case nir_intrinsic_store_ssbo: return instr->const_index[0]; + case nir_intrinsic_store_shared: + return instr->const_index[1]; + case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: case nir_intrinsic_ssbo_atomic_umin: @@ -306,6 +398,17 @@ get_store_writemask(nir_intrinsic_instr *instr) case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: + /* fall-through to shared variable atomics */ + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: return WRITEMASK_X; default: -- 2.7.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev