On Tue, May 5, 2015 at 2:48 PM, Francisco Jerez <curroje...@riseup.net> wrote: > Define a few transformations on register arrays which will be used > frequently during the construction of typed and untyped surface > message payloads. Their purpose is simple but the implementation is > rather messy, so it makes a lot of sense to factor them out as > separate functions. > > v2: Drop VEC4 suport. > --- > src/mesa/drivers/dri/i965/Makefile.sources | 2 + > .../drivers/dri/i965/brw_fs_surface_builder.cpp | 218 > +++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_fs_surface_builder.h | 31 +++ > 3 files changed, 251 insertions(+) > create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.h > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 260e448..f299913 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -59,6 +59,8 @@ i965_FILES = \ > brw_fs_register_coalesce.cpp \ > brw_fs_saturate_propagation.cpp \ > brw_fs_sel_peephole.cpp \ > + brw_fs_surface_builder.cpp \ > + brw_fs_surface_builder.h \ > brw_fs_vector_splitting.cpp \ > brw_fs_visitor.cpp \ > brw_gs.c \ > diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > new file mode 100644 > index 0000000..007d5f4 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > @@ -0,0 +1,218 @@ > +/* > + * Copyright © 2013-2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "brw_fs_surface_builder.h" > + > +using namespace brw; > + > +namespace { > + namespace array_utils { > + /** > + * A plain contiguous region of memory in your register file, > + * with well-defined size and no fancy addressing modes, > + * swizzling or striding. > + */ > + struct array_reg : public backend_reg { > + array_reg() : backend_reg(), size(0) > + { > + } > + > + explicit > + array_reg(const backend_reg ®, unsigned size = 1) : > + backend_reg(reg), size(size) > + { > + } > + > + /** Size of the region in 32B registers. */ > + unsigned size; > + }; > + > + /** > + * Increase the register base offset by the specified amount > + * given in 32B registers. > + */ > + array_reg > + offset(array_reg reg, unsigned delta) > + { > + assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM)); > + reg.reg_offset += delta; > + return reg; > + } > + > + /** > + * Create a register of natural vector size and SIMD width > + * using array \p reg as storage. > + */ > + fs_reg > + natural_reg(const fs_builder &bld, const array_reg ®) > + { > + return fs_reg(reg, bld.dispatch_width()); > + } > + > + /** > + * Allocate a raw chunk of memory from the virtual GRF file > + * with no special vector size or SIMD width. \p n is given > + * in units of 32B registers. > + */ > + array_reg > + alloc_array_reg(const fs_builder &bld, enum brw_reg_type type, > unsigned n) > + { > + return array_reg( > + bld.vgrf(type, > + DIV_ROUND_UP(n * REG_SIZE, > + type_sz(type) * bld.dispatch_width())), > + n); > + } > + > + /** > + * Fetch the i-th logical component of an array of registers and return > + * it as a natural-width register according to the current SIMD mode. > + * > + * Each logical component may be in fact a vector with a number of > + * per-channel values depending on the dispatch width and SIMD mode. > + * E.g. a single physical 32B register contains 4, 1, or 0.5 logical > + * 32-bit components depending on whether we're building SIMD4x2, SIMD8 > + * or SIMD16 code respectively. > + */ > + fs_reg > + index(const fs_builder &bld, const array_reg ®, unsigned i) > + { > + return offset(natural_reg(bld, reg), i); > + } > + > + /** > + * "Flatten" a vector of \p size components into a simple array of > + * registers, getting rid of funky regioning modes. > + */ > + array_reg > + emit_flatten(const fs_builder &bld, const fs_reg &src, unsigned size) > + { > + if (src.file == BAD_FILE || size == 0) { > + return array_reg(); > + > + } else { > + const array_reg dst = > + alloc_array_reg(bld, src.type, size * bld.dispatch_width() / > 8); > + > + for (unsigned c = 0; c < size; ++c) > + bld.MOV(index(bld, dst, c), offset(src, c)); > + > + return dst; > + } > + } > + > + /** > + * Copy one every \p src_stride logical components of the argument into > + * one every \p dst_stride logical components of the result. > + */ > + array_reg > + emit_stride(const fs_builder &bld, const array_reg &src, unsigned size, > + unsigned dst_stride, unsigned src_stride) > + { > + if (src.file == BAD_FILE || size == 0) { > + return array_reg(); > + > + } else if (dst_stride == 1 && src_stride == 1) { > + return src; > + > + } else { > + const array_reg dst = alloc_array_reg( > + bld, src.type, > + size * dst_stride * bld.dispatch_width() / 8); > + > + for (unsigned i = 0; i < size; ++i) > + bld.MOV(index(bld, dst, i * dst_stride), > + index(bld, src, i * src_stride)); > + > + return dst; > + } > + } > + > + /** > + * Interleave logical components from the given arguments. If two > + * arguments are provided \p size components will be copied from each > to > + * the even and odd components of the result respectively. > + * > + * It may be safely used to merge the two halves of a value calculated > + * separately. > + */ > + array_reg > + emit_zip(const fs_builder &bld, > + const array_reg &src0, const array_reg &src1, > + unsigned size) > + { > + const unsigned n = (src0.file != BAD_FILE) + (src1.file != > BAD_FILE); > + const array_reg srcs[] = { src0, src1 }; > + const array_reg dst = size * n == 0 ? array_reg() : > + alloc_array_reg(bld, src0.type, > + size * n * bld.dispatch_width() / 8); > + > + for (unsigned i = 0; i < size; ++i) { > + for (unsigned j = 0; j < n; ++j) > + exec_all(bld.MOV(index(bld, dst, j + i * n), > + index(bld, srcs[j], i))); > + } > + > + return dst; > + } > + > + /** > + * Concatenate a number of register arrays passed in as arguments. > + */ > + array_reg > + emit_collect(const fs_builder &bld, > + const array_reg &src0 = array_reg(), > + const array_reg &src1 = array_reg(), > + const array_reg &src2 = array_reg(), > + const array_reg &src3 = array_reg())
I promised you a comment on emit_collect and here it is. I *really* like the idea of having a helper that creates the payload register array, fills it out, emits the LOAD_PAYLOAD and returns the destination register. I do, however, have two comments. First is the name. Calling it emit_collect makes it sound like another "collect" instruction is being emitted. I'd like to see it have a different name. Maybe it would be better just as a second LOAD_PAYLOAD helper? Second, I think it would work better if the first source were explicitly a header source and the others were not. If the header source is a BAD_FILE, this would indicate that there is no header. The other three sources would be regular non-header payload sources. This would make it match much better with LOAD_PAYLOAD. I know that you and I disagreed on how LOAD_PAYLOAD works and this function reflects that difference. However, for the sake of consistency, I'd like to see them match up better. I don't know how that change would interact with the rest of the code, but let's table that until we get some of these other discussions out of the way. --Jason > + { > + const array_reg srcs[] = { src0, src1, src2, src3 }; > + const unsigned size = src0.size + src1.size + src2.size + src3.size; > + const array_reg dst = size == 0 ? array_reg() : > + alloc_array_reg(bld, BRW_REGISTER_TYPE_UD, size); > + fs_reg *const components = new fs_reg[size]; > + unsigned n = 0; > + > + for (unsigned i = 0; i < ARRAY_SIZE(srcs); ++i) { > + /* Split the array in m elements of maximal width. */ > + const unsigned width = > + (srcs[i].size * 8 % bld.dispatch_width() == 0 ? > + bld.dispatch_width() : bld.dispatch_width() / 2); > + const unsigned m = srcs[i].size * 8 / width; > + > + /* Get a builder of maximal width. */ > + const fs_builder ubld = > + (width == bld.dispatch_width() ? bld : bld.half(0)); > + > + for (unsigned j = 0; j < m; ++j) > + components[n++] = retype(index(ubld, srcs[i], j), > + BRW_REGISTER_TYPE_UD); > + } > + > + bld.LOAD_PAYLOAD(natural_reg(bld, dst), components, n); > + > + delete[] components; > + return dst; > + } > + } > +} > diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h > b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h > new file mode 100644 > index 0000000..ad79e76 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h > @@ -0,0 +1,31 @@ > +/* -*- c++ -*- */ > +/* > + * Copyright © 2013-2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#ifndef BRW_FS_SURFACE_BUILDER_H > +#define BRW_FS_SURFACE_BUILDER_H > + > +#include "brw_fs_builder.h" > +#include "brw_context.h" > + > +#endif > -- > 2.3.5 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev