On Wed, Feb 13, 2019 at 4:30 PM Eduardo Lima Mitev <el...@igalia.com> wrote: > > This pass moves to NIR some offset computations that are currently > implemented on the IR3 backend compiler, to allow NIR to possibly > optimize them. > > For now, it only supports lowering byte-offset computation for image > store and atomics. > --- > src/freedreno/Makefile.sources | 1 + > src/freedreno/ir3/ir3_nir.h | 1 + > src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 334 +++++++++++++++++++ > 3 files changed, 336 insertions(+) > create mode 100644 src/freedreno/ir3/ir3_nir_lower_io_offsets.c > > diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources > index 7fea9de39ef..235fec1c4f2 100644 > --- a/src/freedreno/Makefile.sources > +++ b/src/freedreno/Makefile.sources > @@ -31,6 +31,7 @@ ir3_SOURCES := \ > ir3/ir3_legalize.c \ > ir3/ir3_nir.c \ > ir3/ir3_nir.h \ > + ir3/ir3_nir_lower_io_offsets.c \ > ir3/ir3_nir_lower_tg4_to_tex.c \ > ir3/ir3_print.c \ > ir3/ir3_ra.c \ > diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h > index 74201d34160..7983b74af2c 100644 > --- a/src/freedreno/ir3/ir3_nir.h > +++ b/src/freedreno/ir3/ir3_nir.h > @@ -36,6 +36,7 @@ void ir3_nir_scan_driver_consts(nir_shader *shader, struct > ir3_driver_const_layo > > bool ir3_nir_apply_trig_workarounds(nir_shader *shader); > bool ir3_nir_lower_tg4_to_tex(nir_shader *shader); > +bool ir3_nir_lower_io_offsets(nir_shader *shader); > > const nir_shader_compiler_options * ir3_get_compiler_options(struct > ir3_compiler *compiler); > bool ir3_key_lowers_nir(const struct ir3_shader_key *key); > diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c > b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c > new file mode 100644 > index 00000000000..a43b3895fd8 > --- /dev/null > +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c > @@ -0,0 +1,334 @@ > +/* > + * Copyright © 2018-2019 Igalia S.L. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "ir3_nir.h" > +#include "compiler/nir/nir_builder.h" > + > +/** > + * This pass moves to NIR certain offset computations for different I/O > + * ops that are currently implemented on the IR3 backend compiler, to > + * give NIR a chance to optimize them: > + * > + * - Byte-offset for image store and atomics: Emit instructions to > + * compute (x*bpp) + y*y_stride + z*z_stride), and place the resulting > + * SSA value in the 4th-component of the vec4 instruction that defines > + * the offset. > + */ > + > + > +static bool > +intrinsic_is_image_atomic(unsigned intrinsic) > +{ > + switch (intrinsic) { > + case nir_intrinsic_image_deref_atomic_add: > + case nir_intrinsic_image_deref_atomic_min: > + case nir_intrinsic_image_deref_atomic_max: > + case nir_intrinsic_image_deref_atomic_and: > + case nir_intrinsic_image_deref_atomic_or: > + case nir_intrinsic_image_deref_atomic_xor: > + case nir_intrinsic_image_deref_atomic_exchange: > + case nir_intrinsic_image_deref_atomic_comp_swap: > + return true; > + default: > + break; > + } > + > + return false; > +} > + > +static bool > +intrinsic_is_image_store_or_atomic(unsigned intrinsic) > +{ > + if (intrinsic == nir_intrinsic_image_deref_store) > + return true; > + else > + return intrinsic_is_image_atomic(intrinsic); > +} > + > +/* > + * FIXME: shamelessly copied from ir3_compiler_nir until it gets factorized > + * out at some point.
Sorry, I'd overlooked this patchset.. blame gitlab MR's Anyways, the good news is these helpers were refactored out into ir3_image.c.. the bad news is rebasing this series will be a bit conflicty since I broke out the image/ssbo intrinsics into ir3_a6xx/ir3_a4xx. (Since they are different depending on the generation.. but offhand I think we can use the same nir->nir lowering and just ignore one of the nir_src's on a6xx.) BR, -R > + */ > +static unsigned > +get_image_coords(const nir_variable *var) > +{ > + const struct glsl_type *type = glsl_without_array(var->type); > + unsigned coords; > + > + switch (glsl_get_sampler_dim(type)) { > + case GLSL_SAMPLER_DIM_1D: > + case GLSL_SAMPLER_DIM_BUF: > + coords = 1; > + break; > + case GLSL_SAMPLER_DIM_2D: > + case GLSL_SAMPLER_DIM_RECT: > + case GLSL_SAMPLER_DIM_EXTERNAL: > + case GLSL_SAMPLER_DIM_MS: > + coords = 2; > + break; > + case GLSL_SAMPLER_DIM_3D: > + case GLSL_SAMPLER_DIM_CUBE: > + coords = 3; > + break; > + default: > + unreachable("bad sampler dim"); > + return 0; > + } > + > + if (glsl_sampler_type_is_array(type)) { > + /* adjust # of coords to include array idx: */ > + coords++; > + } > + > + return coords; > +} > + > +/* Returns the bytes-per-pixel for the different GL formats corresponding to > + * all supported image formats. > + */ > +static unsigned > +bytes_per_pixel_for_gl_format(GLuint format) > +{ > + switch (format) { > + case GL_R8I: > + case GL_R8UI: > + case GL_R8: > + case GL_R8_SNORM: > + return 1; > + > + case GL_R16F: > + case GL_R16I: > + case GL_R16UI: > + case GL_R16: > + case GL_R16_SNORM: > + case GL_RG8I: > + case GL_RG8UI: > + case GL_RG8: > + case GL_RG8_SNORM: > + return 2; > + > + case GL_R32F: > + case GL_R32I: > + case GL_R32UI: > + case GL_RG16F: > + case GL_RG16I: > + case GL_RG16UI: > + case GL_RG16: > + case GL_RG16_SNORM: > + case GL_RGBA8I: > + case GL_RGBA8UI: > + case GL_RGBA8: > + case GL_RGBA8_SNORM: > + case GL_RGB10_A2UI: > + case GL_RGB10_A2: > + case GL_R11F_G11F_B10F: > + return 4; > + > + case GL_RG32F: > + case GL_RG32I: > + case GL_RG32UI: > + case GL_RGBA16F: > + case GL_RGBA16I: > + case GL_RGBA16UI: > + case GL_RGBA16: > + case GL_RGBA16_SNORM: > + return 8; > + > + case GL_RGBA32F: > + case GL_RGBA32I: > + case GL_RGBA32UI: > + return 16; > + > + default: > + debug_assert(!"Unhandled GL format"); > + } > + > + return 0; > +} > + > +static nir_ssa_def * > +insert_load_image_param(nir_builder *b, nir_ssa_def *image_deref, > + unsigned dimension) > +{ > + nir_intrinsic_instr *load = > + nir_intrinsic_instr_create(b->shader, > + > nir_intrinsic_image_deref_load_param_ir3); > + load->num_components = 1; > + load->const_index[0] = dimension; > + load->src[0] = nir_src_for_ssa(image_deref); > + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); > + > + nir_builder_instr_insert(b, &load->instr); > + > + return &load->dest.ssa; > +} > + > +static bool > +lower_offset_for_image_store_or_atomic(nir_intrinsic_instr *intrinsic, > + > const nir_variable *var, nir_builder *b, > + > void *mem_ctx) > +{ > + nir_ssa_def *image_deref; > + > + /* Find the instruction that defines the coord source of the image > + * store/atomic intrinsic. It must be a "vec4" ALU instruction. > + */ > + debug_assert(intrinsic->src[1].is_ssa); > + nir_ssa_def *offset_src_def = intrinsic->src[1].ssa; > + > + nir_instr *offset_parent_instr = offset_src_def->parent_instr; > + debug_assert(offset_parent_instr->type == nir_instr_type_alu); > + > + nir_alu_instr *vec4_instr = nir_instr_as_alu(offset_parent_instr); > + debug_assert(vec4_instr->op == nir_op_vec4); > + > + unsigned coords = get_image_coords(var); > + > + b->cursor = nir_before_instr(&vec4_instr->instr); > + > + /* These are actually offsets into image_dims register file (for > + * a given image). > + */ > + enum { > + BYTES_PER_PIXEL = 0, > + Y_STRIDE = 1, > + Z_STRIDE = 2 > + }; > + > + /* x_offset = coord.x * bytes_per_pixel */ > + nir_ssa_def *x_coord = vec4_instr->src[0].src.ssa; > + unsigned bpp = bytes_per_pixel_for_gl_format(var->data.image.format); > + nir_ssa_def *offset = nir_imul_imm(b, x_coord, bpp); > + nir_alu_instr *imul = nir_instr_as_alu(offset->parent_instr); > + debug_assert(imul); > + imul->src[0].swizzle[0] = vec4_instr->src[0].swizzle[0]; > + debug_assert(offset); > + > + /* For Y and Z dimensions, we emit a temporary image_deref_load_param > + * intrinsic, to be consumed by ir3_compiler_nir::emit_intrinsic(), > which > + * will just emit an uniform with the right value from image_dims[]. > + */ > + > + if (coords > 1) { > + /* src0 of the intrinsic is the dereferenced image. */ > + debug_assert(intrinsic->src[0].is_ssa); > + image_deref = intrinsic->src[0].ssa; > + > + nir_ssa_def *y_coord = vec4_instr->src[1].src.ssa; > + nir_ssa_def *y_stride = > + insert_load_image_param(b, image_deref, Y_STRIDE); > + > + /* y_offset = coord.y * y_stride + x_offset */ > + offset = nir_imad24_ir3(b, y_stride, y_coord, offset); > + debug_assert(offset); > + nir_alu_instr *imad = nir_instr_as_alu(offset->parent_instr); > + debug_assert(imad); > + imad->src[1].swizzle[0] = vec4_instr->src[1].swizzle[0]; > + > + if (coords > 2) { > + nir_ssa_def *z_coord = vec4_instr->src[2].src.ssa; > + nir_ssa_def *z_stride = > + insert_load_image_param(b, image_deref, > Z_STRIDE); > + > + /* z_offset = coord.z * z_stride + y_offset */ > + offset = nir_imad24_ir3(b, z_stride, z_coord, offset); > + debug_assert(offset); > + nir_alu_instr *imad = > nir_instr_as_alu(offset->parent_instr); > + debug_assert(imad); > + imad->src[1].swizzle[0] = > vec4_instr->src[2].swizzle[0]; > + } > + } > + > + if (intrinsic_is_image_atomic(intrinsic->intrinsic)) { > + /* Some cases, like atomics, seem to use dword offset instead > + * of byte offsets.. blob just puts an extra shr.b in there > + * in those cases: > + */ > + nir_ssa_def *two = nir_imm_int(b, 2); > + offset = nir_ushr(b, offset, two); > + } > + > + /* Finally, store the computed offset in the 4th component of the > + * vec4 instruction, which is the only one guaranteed not to be used. > + * We don't want to override the original coordinate because it is > + * still needed in the backend. > + */ > + nir_instr_rewrite_src(&vec4_instr->instr, > + &vec4_instr->src[3].src, > + nir_src_for_ssa(offset)); > + return true; > +} > + > +static bool > +lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx) > +{ > + bool progress = false; > + > + nir_foreach_instr_safe(instr, block) { > + if (instr->type != nir_instr_type_intrinsic) > + continue; > + > + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); > + if (!intrinsic_is_image_store_or_atomic(intr->intrinsic)) > + continue; > + > + const nir_variable *var = nir_intrinsic_get_var(intr, 0); > + progress |= lower_offset_for_image_store_or_atomic(intr, var, > b, > + > mem_ctx); > + } > + > + return progress; > +} > + > +static bool > +lower_io_offsets_func(nir_function_impl *impl) > +{ > + void *mem_ctx = ralloc_parent(impl); > + nir_builder b; > + nir_builder_init(&b, impl); > + > + bool progress = false; > + nir_foreach_block_safe(block, impl) { > + progress |= lower_io_offsets_block(block, &b, mem_ctx); > + } > + > + if (progress) { > + nir_metadata_preserve(impl, nir_metadata_block_index | > + > nir_metadata_dominance); > + } > + > + return progress; > +} > + > +bool > +ir3_nir_lower_io_offsets(nir_shader *shader) > +{ > + bool progress = false; > + > + nir_foreach_function(function, shader) { > + if (function->impl) > + progress |= lower_io_offsets_func(function->impl); > + } > + > + return progress; > +} > -- > 2.20.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev