The lowering will take an SSBO intrinsic and replace it with the new ir3-specific version that adds an extra source. That source will hold the SSA value resulting from inserting a division by 4 (an SHR op) of the original byte-offset source of the intrinsic. --- src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 170 ++++++++++++++++++- 1 file changed, 164 insertions(+), 6 deletions(-)
diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index a43b3895fd8..d03dc6048cb 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -33,6 +33,12 @@ * compute (x*bpp) + y*y_stride + z*z_stride), and place the resulting * SSA value in the 4th-component of the vec4 instruction that defines * the offset. + * + * - Dword-offset for SSBO load, store and atomics: A new, similar intrinsic + * is emitted that replaces the original one, adding a new source that + * holds the result of the original byte-offset source divided by 4. + * 'ssbo_atomic_[f]comp_swap' are excluded because those already use + * the 4 sources. */ @@ -65,6 +71,32 @@ intrinsic_is_image_store_or_atomic(unsigned intrinsic) return intrinsic_is_image_atomic(intrinsic); } +static bool +intrinsic_is_ssbo(unsigned intrinsic) +{ + switch (intrinsic) { + case nir_intrinsic_store_ssbo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_fadd: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: + return true; + default: + break; + } + + return false; +} + /* * FIXME: shamelessly copied from ir3_compiler_nir until it gets factorized * out at some point. @@ -279,6 +311,131 @@ lower_offset_for_image_store_or_atomic(nir_intrinsic_instr *intrinsic, return true; } +/* Returns the ir3 version of a given SSBO intrinsic. It also conveniently + * returns the index of the offset source in 'offset_src_indx'. + */ +unsigned +get_ir3_intrinsic_for_ssbo_intrinsic(unsigned intrinsic, + uint8_t *offset_src_idx) +{ + debug_assert(offset_src_idx); + + *offset_src_idx = 1; + + switch (intrinsic) { + case nir_intrinsic_store_ssbo: + *offset_src_idx = 2; + return nir_intrinsic_store_ssbo_ir3; + case nir_intrinsic_load_ssbo: + return nir_intrinsic_load_ssbo_ir3; + case nir_intrinsic_ssbo_atomic_add: + return nir_intrinsic_ssbo_atomic_add_ir3; + case nir_intrinsic_ssbo_atomic_imin: + return nir_intrinsic_ssbo_atomic_imin_ir3; + case nir_intrinsic_ssbo_atomic_umin: + return nir_intrinsic_ssbo_atomic_umin_ir3; + case nir_intrinsic_ssbo_atomic_imax: + return nir_intrinsic_ssbo_atomic_imax_ir3; + case nir_intrinsic_ssbo_atomic_umax: + return nir_intrinsic_ssbo_atomic_umax_ir3; + case nir_intrinsic_ssbo_atomic_and: + return nir_intrinsic_ssbo_atomic_and_ir3; + case nir_intrinsic_ssbo_atomic_or: + return nir_intrinsic_ssbo_atomic_or_ir3; + case nir_intrinsic_ssbo_atomic_xor: + return nir_intrinsic_ssbo_atomic_xor_ir3; + case nir_intrinsic_ssbo_atomic_exchange: + return nir_intrinsic_ssbo_atomic_exchange_ir3; + case nir_intrinsic_ssbo_atomic_fadd: + return nir_intrinsic_ssbo_atomic_fadd_ir3; + case nir_intrinsic_ssbo_atomic_fmin: + return nir_intrinsic_ssbo_atomic_fmin_ir3; + case nir_intrinsic_ssbo_atomic_fmax: + return nir_intrinsic_ssbo_atomic_fmax_ir3; + default: + debug_assert(!"Unhandled SSBO intrinsic"); + break; + } + + return 0; +} + +static bool +lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, + void *mem_ctx) +{ + unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs; + debug_assert(num_srcs < 4); + + bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest; + nir_ssa_def *new_dest = NULL; + + /* Here we create a new intrinsic and copy over all contents from the old one. */ + + nir_intrinsic_instr *new_intrinsic; + nir_src *target_src; + uint8_t offset_src_idx; + + unsigned ir3_intrinsic_opcode = + get_ir3_intrinsic_for_ssbo_intrinsic(intrinsic->intrinsic, + &offset_src_idx); + + /* 'offset_src_idx' holds the index of the source that represent the offset. */ + new_intrinsic = + nir_intrinsic_instr_create(b->shader, ir3_intrinsic_opcode); + + nir_ssa_def *offset = intrinsic->src[offset_src_idx].ssa; + + /* The new source that will hold the dword-offset is always the last + * one for every intrinsic. + */ + target_src = &new_intrinsic->src[num_srcs]; + *target_src = nir_src_for_ssa(offset); + + if (has_dest) { + nir_ssa_def *dest = &intrinsic->dest.ssa; + nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest, + dest->num_components, dest->bit_size, NULL); + new_dest = &new_intrinsic->dest.ssa; + } + + for (unsigned i = 0; i < num_srcs; i++) + new_intrinsic->src[i] = nir_src_for_ssa(intrinsic->src[i].ssa); + + for (unsigned i = 0; i < NIR_INTRINSIC_MAX_CONST_INDEX; i++) + new_intrinsic->const_index[i] = intrinsic->const_index[i]; + + new_intrinsic->num_components = intrinsic->num_components; + + b->cursor = nir_before_instr(&intrinsic->instr); + nir_ssa_def *offset_div_4 = nir_ushr(b, offset, nir_imm_int(b, 2)); + debug_assert(offset_div_4); + + /* Insert the new intrinsic right before the old one. */ + b->cursor = nir_before_instr(&intrinsic->instr); + nir_builder_instr_insert(b, &new_intrinsic->instr); + + /* Replace the last source of the new intrinsic by the result of + * the offset divided by 4. + */ + nir_instr_rewrite_src(&new_intrinsic->instr, + target_src, + nir_src_for_ssa(offset_div_4)); + + if (has_dest) { + /* Replace the uses of the original destination by that + * of the new intrinsic. + */ + nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, + nir_src_for_ssa(new_dest)); + } + + /* Finally remove the original intrinsic. */ + nir_instr_remove(&intrinsic->instr); + + return true; +} + static bool lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx) { @@ -289,12 +446,13 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx) continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (!intrinsic_is_image_store_or_atomic(intr->intrinsic)) - continue; - - const nir_variable *var = nir_intrinsic_get_var(intr, 0); - progress |= lower_offset_for_image_store_or_atomic(intr, var, b, - mem_ctx); + if (intrinsic_is_image_store_or_atomic(intr->intrinsic)) { + const nir_variable *var = nir_intrinsic_get_var(intr, 0); + progress |= lower_offset_for_image_store_or_atomic(intr, var, b, + mem_ctx); + } else if (intrinsic_is_ssbo(intr->intrinsic)) { + progress |= lower_offset_for_ssbo(intr, b, mem_ctx); + } } return progress; -- 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev