--- src/compiler/nir/nir_opt_load_combine.c | 170 +++++++++++++++++++++++++++++--- 1 file changed, 155 insertions(+), 15 deletions(-)
diff --git a/src/compiler/nir/nir_opt_load_combine.c b/src/compiler/nir/nir_opt_load_combine.c index 3e3d066..8f3d4d6 100644 --- a/src/compiler/nir/nir_opt_load_combine.c +++ b/src/compiler/nir/nir_opt_load_combine.c @@ -39,7 +39,8 @@ enum intrinsic_groups { INTRINSIC_GROUP_NONE = 0, INTRINSIC_GROUP_ALL, INTRINSIC_GROUP_SSBO, - INTRINSIC_GROUP_SHARED + INTRINSIC_GROUP_SHARED, + INTRINSIC_GROUP_IMAGE }; struct cache_node { @@ -127,23 +128,64 @@ is_memory_barrier_shared(nir_intrinsic_instr *intrinsic) return intrinsic->intrinsic == nir_intrinsic_memory_barrier_shared; } +/* Image load/store */ +static bool +is_atomic_image(nir_intrinsic_instr *intrinsic) +{ + switch (intrinsic->intrinsic) { + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + return true; + default: + return false; + } +} + +static bool +is_store_image(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_image_store || + is_atomic_image(intrinsic); +} + +static bool +is_load_image(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_image_load; +} + +static inline bool +is_memory_barrier_image(nir_intrinsic_instr *intrinsic) +{ + return intrinsic->intrinsic == nir_intrinsic_memory_barrier_image; +} + /* General intrinsic classification functions */ static inline bool is_store(nir_intrinsic_instr *intrinsic) { - return is_store_ssbo(intrinsic) || is_store_shared(intrinsic); + return is_store_ssbo(intrinsic) || is_store_shared(intrinsic) || + is_store_image(intrinsic); } static inline bool is_load(nir_intrinsic_instr *intrinsic) { - return is_load_ssbo(intrinsic) || is_load_shared(intrinsic); + return is_load_ssbo(intrinsic) || is_load_shared(intrinsic) || + is_load_image(intrinsic); } static inline bool is_atomic(nir_intrinsic_instr *intrinsic) { - return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic); + return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic) + || is_atomic_image(intrinsic); } static inline bool @@ -151,7 +193,8 @@ is_memory_barrier(nir_intrinsic_instr *intrinsic) { return intrinsic->intrinsic == nir_intrinsic_memory_barrier || is_memory_barrier_buffer(intrinsic) || - is_memory_barrier_shared(intrinsic); + is_memory_barrier_shared(intrinsic) || + is_memory_barrier_image(intrinsic); } static unsigned @@ -165,6 +208,9 @@ intrinsic_group(nir_intrinsic_instr *intrinsic) else if (is_load_shared(intrinsic) || is_store_shared(intrinsic) || is_memory_barrier_shared(intrinsic)) return INTRINSIC_GROUP_SHARED; + else if (is_load_image(intrinsic) || is_store_image(intrinsic) || + is_memory_barrier_image(intrinsic)) + return INTRINSIC_GROUP_IMAGE; else return INTRINSIC_GROUP_NONE; } @@ -217,6 +263,17 @@ nir_src_is_direct(nir_src *src) } /** + * Returns true if a nir_src represents an undefined SSA register, false + * otherwise. + */ +static inline bool +nir_src_is_undefined(nir_src src) +{ + return src.is_ssa && + src.ssa->parent_instr->type == nir_instr_type_ssa_undef; +} + +/** * Gets the block and offset of a load/store instruction. * * @instr: the intrinsic load/store operation @@ -301,6 +358,61 @@ get_load_store_address(nir_intrinsic_instr *instr, } /** + * Returns true if two coordinates sources of an image intrinsic match. + * This means that both sources are defined by ALU vec4 ops, with all + * 4 sources equivalent. For example, consider the following snippet: + * + * vec1 ssa_1 = undefined + * vec4 ssa_2 = vec4 ssa_0, ssa_0, ssa_1, ssa_1 + * vec4 ssa_3 = intrinsic image_load (ssa_2, ssa_1) (itex) () + * ... + * vec1 ssa_6 = undefined + * vec4 ssa_7 = vec4 ssa_0, ssa_0, ssa_1, ssa_1 + * vec4 ssa_8 = intrinsic image_load (ssa_7, ssa_6) (itex) () + * + * Here, ssa_2 and ssa_7 are the coordinates inside the image, and + * they are two different SSA definitions, so comparing them directly + * won't work. This function is able to detect this and check that + * ssa_2 and ssa_7 are indeed "equivalent"; so the pass can tell that + * the two image_load instructions are a match. + */ +static bool +coordinates_match(nir_src *coord1, nir_src *coord2) +{ + assert(coord1->is_ssa); + assert(coord2->is_ssa); + + nir_ssa_def *ssa1 = coord1->ssa; + nir_ssa_def *ssa2 = coord2->ssa; + + nir_instr *parent1 = ssa1->parent_instr; + nir_instr *parent2 = ssa2->parent_instr; + + /* @FIXME: currently, all coordinates into an image load/store + * instruction are given by an ALU vec4 instruction. This may change in + * the future, in which case we should detect it here. + */ + assert(parent1->type == nir_instr_type_alu); + assert(parent2->type == nir_instr_type_alu); + + nir_alu_instr *alu1 = nir_instr_as_alu(parent1); + nir_alu_instr *alu2 = nir_instr_as_alu(parent2); + + assert(alu1->op == nir_op_vec4); + assert(alu2->op == nir_op_vec4); + + for (unsigned i = 0; i < 4; i++) { + if (! ((nir_src_is_undefined(alu1->src[i].src) && + nir_src_is_undefined(alu2->src[i].src)) || + nir_srcs_equal(alu1->src[i].src, alu2->src[i].src))) { + return false; + } + } + + return true; +} + +/** * Determines whether two instrinsic instructions conflict with each other, * meaning that a) they access the same memory area, or b) a non-conflict * cannot be determined (because at least one access is indirect). @@ -334,6 +446,25 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1, if (!intrinsic_group_match(instr1, instr2)) return false; + /* Since image load/store are always indirect, we should always report + * conflict because we are unable to determine it. However, we still want + * to know if the texture objects and coordinates match, to report back + * a full match. + */ + if (intrinsic_group(instr1) == INTRINSIC_GROUP_IMAGE) { + if (full_match) { + assert(instr1->variables[0]); + assert(instr2->variables[0]); + + if (instr1->variables[0]->var == instr2->variables[0]->var && + coordinates_match(&instr1->src[0], &instr2->src[0])) { + *full_match = true; + } + } + + return true; + } + get_load_store_address(instr1, &instr1_block, &instr1_offset, &instr1_base); get_load_store_address(instr2, &instr2_block, &instr2_offset, &instr2_base); @@ -527,21 +658,30 @@ rewrite_load_with_store(struct cache_node *cache, if (!blocks_and_offsets_match) continue; - /* The store must write to all the channels we are loading */ - unsigned store_writemask = get_store_writemask(store); - bool writes_all_channels = true; - for (int i = 0; i < load->num_components; i++) { - if (!((1 << i) & store_writemask)) { - writes_all_channels = false; - break; + if (intrinsic_group(store) != INTRINSIC_GROUP_IMAGE) { + /* The store must write to all the channels we are loading */ + unsigned store_writemask = get_store_writemask(store); + bool writes_all_channels = true; + for (int i = 0; i < load->num_components; i++) { + if (!((1 << i) & store_writemask)) { + writes_all_channels = false; + break; + } } + if (!writes_all_channels) + continue; } - if (!writes_all_channels) - continue; /* rewrite the new load with the cached store instruction */ nir_ssa_def *def = &load->dest.ssa; - nir_ssa_def *new_def = store->src[0].ssa; + nir_ssa_def *new_def; + /* while the value source for SSBO and shared-vars is src[0], + * image stores use src[2]. + */ + if (intrinsic_group(store) != INTRINSIC_GROUP_IMAGE) + new_def = store->src[0].ssa; + else + new_def = store->src[2].ssa; nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); return true; -- 2.7.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev