We used the byte_scattered_read message because it allows to read from non aligned 32-bit offsets. We were reading one component for each message.
Using a 32-bit bitsize read at byte_scattered_read we can read up to two 16-bit components or four 8-bit components with only one message per iteration. The same applies for 16-bit bitsize for two 8-bit components read. In the case of int8 vec3, we read them as 32-bit and we ignore the padding. Cc: Jason Ekstrand <ja...@jlekstrand.net> --- src/intel/compiler/brw_fs_nir.cpp | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 9b11b5fbd01..a1f946708ed 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2415,24 +2415,34 @@ do_untyped_vector_read(const fs_builder &bld, num_components); } else { fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD); - for (unsigned i = 0; i < num_components; i++) { - if (i == 0) { + unsigned iters = DIV_ROUND_UP(type_sz(dest.type) * num_components, 4); + for (unsigned it = 0; it < iters; it++) { + if (it == 0) { bld.MOV(read_offset, offset_reg); } else { - bld.ADD(read_offset, offset_reg, - brw_imm_ud(i * type_sz(dest.type))); + bld.ADD(read_offset, offset_reg, brw_imm_ud(4 * it)); } + unsigned iter_components = MIN2(4 / type_sz(dest.type), + num_components); + num_components -= iter_components; + /* We adjust the bitsize_read to hold as many components we can in + * the same read message. We use 32-bit to read 8-bit vec3 but we + * ignore last padding.component. + */ + unsigned bitsize_read = util_next_power_of_two(8 * iter_components * + type_sz(dest.type)); /* Non constant offsets are not guaranteed to be aligned 32-bits - * so they are read using one byte_scattered_read message - * for each component. + * for 8/16 bit componentes. We use byte_scattered_read for + * one or multiple components up to 4-bytes for iteration. */ fs_reg read_result = emit_byte_scattered_read(bld, surf_index, read_offset, 1 /* dims */, 1, - type_sz(dest.type) * 8 /* bit_size */, + bitsize_read, BRW_PREDICATE_NONE); - bld.MOV(offset(dest, bld, i), - subscript (read_result, dest.type, 0)); + shuffle_from_32bit_read(bld, offset(dest, bld, + it * 4 / type_sz(dest.type)), + read_result, 0, iter_components); } } } else if (type_sz(dest.type) == 4) { -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev