On Wed, May 13, 2015 at 12:07 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Tue, May 5, 2015 at 2:48 PM, Francisco Jerez <curroje...@riseup.net> wrote: >> Define a few transformations on register arrays which will be used >> frequently during the construction of typed and untyped surface >> message payloads. Their purpose is simple but the implementation is >> rather messy, so it makes a lot of sense to factor them out as >> separate functions. >> >> v2: Drop VEC4 suport. >> --- >> src/mesa/drivers/dri/i965/Makefile.sources | 2 + >> .../drivers/dri/i965/brw_fs_surface_builder.cpp | 218 >> +++++++++++++++++++++ >> src/mesa/drivers/dri/i965/brw_fs_surface_builder.h | 31 +++ >> 3 files changed, 251 insertions(+) >> create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.h >> >> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >> b/src/mesa/drivers/dri/i965/Makefile.sources >> index 260e448..f299913 100644 >> --- a/src/mesa/drivers/dri/i965/Makefile.sources >> +++ b/src/mesa/drivers/dri/i965/Makefile.sources >> @@ -59,6 +59,8 @@ i965_FILES = \ >> brw_fs_register_coalesce.cpp \ >> brw_fs_saturate_propagation.cpp \ >> brw_fs_sel_peephole.cpp \ >> + brw_fs_surface_builder.cpp \ >> + brw_fs_surface_builder.h \ >> brw_fs_vector_splitting.cpp \ >> brw_fs_visitor.cpp \ >> brw_gs.c \ >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> new file mode 100644 >> index 0000000..007d5f4 >> --- /dev/null >> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> @@ -0,0 +1,218 @@ >> +/* >> + * Copyright © 2013-2015 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> DEALINGS >> + * IN THE SOFTWARE. >> + */ >> + >> +#include "brw_fs_surface_builder.h" >> + >> +using namespace brw; >> + >> +namespace { >> + namespace array_utils { >> + /** >> + * A plain contiguous region of memory in your register file, >> + * with well-defined size and no fancy addressing modes, >> + * swizzling or striding. >> + */ >> + struct array_reg : public backend_reg { >> + array_reg() : backend_reg(), size(0) >> + { >> + } >> + >> + explicit >> + array_reg(const backend_reg ®, unsigned size = 1) : >> + backend_reg(reg), size(size) >> + { >> + } >> + >> + /** Size of the region in 32B registers. */ >> + unsigned size; >> + }; >> + >> + /** >> + * Increase the register base offset by the specified amount >> + * given in 32B registers. >> + */ >> + array_reg >> + offset(array_reg reg, unsigned delta) >> + { >> + assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM)); >> + reg.reg_offset += delta; >> + return reg; >> + } >> + >> + /** >> + * Create a register of natural vector size and SIMD width >> + * using array \p reg as storage. >> + */ >> + fs_reg >> + natural_reg(const fs_builder &bld, const array_reg ®) >> + { >> + return fs_reg(reg, bld.dispatch_width()); >> + } >> + >> + /** >> + * Allocate a raw chunk of memory from the virtual GRF file >> + * with no special vector size or SIMD width. \p n is given >> + * in units of 32B registers. >> + */ >> + array_reg >> + alloc_array_reg(const fs_builder &bld, enum brw_reg_type type, >> unsigned n) >> + { >> + return array_reg( >> + bld.vgrf(type, >> + DIV_ROUND_UP(n * REG_SIZE, >> + type_sz(type) * bld.dispatch_width())), >> + n); >> + } >> + >> + /** >> + * Fetch the i-th logical component of an array of registers and >> return >> + * it as a natural-width register according to the current SIMD mode. >> + * >> + * Each logical component may be in fact a vector with a number of >> + * per-channel values depending on the dispatch width and SIMD mode. >> + * E.g. a single physical 32B register contains 4, 1, or 0.5 logical >> + * 32-bit components depending on whether we're building SIMD4x2, >> SIMD8 >> + * or SIMD16 code respectively. >> + */ >> + fs_reg >> + index(const fs_builder &bld, const array_reg ®, unsigned i) >> + { >> + return offset(natural_reg(bld, reg), i); >> + } >> + >> + /** >> + * "Flatten" a vector of \p size components into a simple array of >> + * registers, getting rid of funky regioning modes. >> + */ >> + array_reg >> + emit_flatten(const fs_builder &bld, const fs_reg &src, unsigned size) >> + { >> + if (src.file == BAD_FILE || size == 0) { >> + return array_reg(); >> + >> + } else { >> + const array_reg dst = >> + alloc_array_reg(bld, src.type, size * bld.dispatch_width() / >> 8); >> + >> + for (unsigned c = 0; c < size; ++c) >> + bld.MOV(index(bld, dst, c), offset(src, c)); >> + >> + return dst; >> + } >> + } >> + >> + /** >> + * Copy one every \p src_stride logical components of the argument >> into >> + * one every \p dst_stride logical components of the result. >> + */ >> + array_reg >> + emit_stride(const fs_builder &bld, const array_reg &src, unsigned >> size, >> + unsigned dst_stride, unsigned src_stride) >> + { >> + if (src.file == BAD_FILE || size == 0) { >> + return array_reg(); >> + >> + } else if (dst_stride == 1 && src_stride == 1) { >> + return src; >> + >> + } else { >> + const array_reg dst = alloc_array_reg( >> + bld, src.type, >> + size * dst_stride * bld.dispatch_width() / 8); >> + >> + for (unsigned i = 0; i < size; ++i) >> + bld.MOV(index(bld, dst, i * dst_stride), >> + index(bld, src, i * src_stride)); >> + >> + return dst; >> + } >> + } >> + >> + /** >> + * Interleave logical components from the given arguments. If two >> + * arguments are provided \p size components will be copied from each >> to >> + * the even and odd components of the result respectively. >> + * >> + * It may be safely used to merge the two halves of a value calculated >> + * separately. >> + */ >> + array_reg >> + emit_zip(const fs_builder &bld, >> + const array_reg &src0, const array_reg &src1, >> + unsigned size) >> + { >> + const unsigned n = (src0.file != BAD_FILE) + (src1.file != >> BAD_FILE); >> + const array_reg srcs[] = { src0, src1 }; >> + const array_reg dst = size * n == 0 ? array_reg() : >> + alloc_array_reg(bld, src0.type, >> + size * n * bld.dispatch_width() / 8); >> + >> + for (unsigned i = 0; i < size; ++i) { >> + for (unsigned j = 0; j < n; ++j) >> + exec_all(bld.MOV(index(bld, dst, j + i * n), >> + index(bld, srcs[j], i))); >> + } >> + >> + return dst; >> + } >> + >> + /** >> + * Concatenate a number of register arrays passed in as arguments. >> + */ >> + array_reg >> + emit_collect(const fs_builder &bld, >> + const array_reg &src0 = array_reg(), >> + const array_reg &src1 = array_reg(), >> + const array_reg &src2 = array_reg(), >> + const array_reg &src3 = array_reg()) > > I promised you a comment on emit_collect and here it is. I *really* > like the idea of having a helper that creates the payload register > array, fills it out, emits the LOAD_PAYLOAD and returns the > destination register. I do, however, have two comments. First is the > name. Calling it emit_collect makes it sound like another "collect" > instruction is being emitted. I'd like to see it have a different > name. Maybe it would be better just as a second LOAD_PAYLOAD helper?
I wouldn't even mind if we changed the name of load_payload to collect. I think that's what the nv50 compiler calls it, and I've never liked the name "load_payload". _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev