On Tue, Jul 21, 2015 at 9:38 AM, Francisco Jerez <curroje...@riseup.net> wrote: > Define bitfield packing, unpacking and type conversion operations in > terms of which the image format conversion code will be implemented. > These don't directly know about image formats: The packing and > unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit > widths as arguments, determining the bitfield position of each > component. Most of the remaining functions perform integer, fixed > point normalized, and floating point type conversions, mapping between > a target type with per-component bit widths given by a parameter and a > matching native representation of the same type. > > v2: Drop VEC4 suport. > v3: Rebase. > --- > .../drivers/dri/i965/brw_fs_surface_builder.cpp | 263 > +++++++++++++++++++++ > 1 file changed, 263 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > index 0c879db..ea1c4aa 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp > @@ -323,4 +323,267 @@ namespace { > return dst; > } > } > + > + namespace image_format_conversion { > + using image_format_info::color_u; > + > + namespace { > + /** > + * Maximum representable value in an unsigned integer with the given > + * number of bits. > + */ > + inline unsigned > + scale(unsigned n) > + { > + return (1 << n) - 1; > + } > + } > + > + /** > + * Pack the vector \p src in a bitfield given the per-component bit > + * shifts and widths.
You should comment in here that it assumes that either everything fits in 32 bits or that it is homogeneous with a power-of-two width. More specifically, it only works if no component spans a 32-bit boundary. > + */ > + fs_reg > + emit_pack(const fs_builder &bld, const fs_reg &src, > + const color_u &shifts, const color_u &widths) > + { > + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); > + bool seen[4] = {}; > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); > + > + /* Shift each component left to the correct bitfield > position. */ > + bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32)); > + > + /* Add everything up. */ > + if (seen[shifts[c] / 32]) { > + bld.OR(offset(dst, bld, shifts[c] / 32), > + offset(dst, bld, shifts[c] / 32), tmp); > + } else { > + bld.MOV(offset(dst, bld, shifts[c] / 32), tmp); > + seen[shifts[c] / 32] = true; > + } > + } > + } > + > + return dst; > + } > + > + /** > + * Unpack a vector from the bitfield \p src given the per-component bit > + * shifts and widths. Same comment here. > + */ > + fs_reg > + emit_unpack(const fs_builder &bld, const fs_reg &src, > + const color_u &shifts, const color_u &widths) > + { > + const fs_reg dst = bld.vgrf(src.type, 4); > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + /* Shift left to discard the most significant bits. */ > + bld.SHL(offset(dst, bld, c), > + offset(src, bld, shifts[c] / 32), > + fs_reg(32 - shifts[c] % 32 - widths[c])); > + > + /* Shift back to the least significant bits using an > arithmetic > + * shift to get sign extension on signed types. > + */ > + bld.ASR(offset(dst, bld, c), > + offset(dst, bld, c), fs_reg(32 - widths[c])); > + } > + } > + > + return dst; > + } > + > + /** > + * Convert a vector into an integer vector of the specified signedness > + * and bit widths, properly handling overflow. > + */ > + fs_reg > + emit_convert_to_integer(const fs_builder &bld, const fs_reg &src, > + const color_u &widths, bool is_signed) > + { > + const unsigned s = (is_signed ? 1 : 0); > + const fs_reg dst = bld.vgrf( > + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4); > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + bld.MOV(offset(dst, bld, c), offset(src, bld, c)); > + > + /* Clamp to the minimum value. */ > + if (is_signed) > + bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), > + fs_reg(-(int)scale(widths[c] - s) - 1), > + BRW_CONDITIONAL_G); If it isn't signed, shouldn't you still do a min/max with zero? Also, I think you want to do the minmax while its still a float. Otherwise, floating-point values bigger than, say 2^32-1, may roll over. Unless, of course, our hardware does clamping as part of float -> int conversion. > + > + /* Clamp to the maximum value. */ > + bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), > + fs_reg((int)scale(widths[c] - s)), > + BRW_CONDITIONAL_L); > + } > + } > + > + return dst; > + } > + > + /** > + * Convert a normalized fixed-point vector of the specified signedness > + * and bit widths into a floating point vector. > + */ > + fs_reg > + emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src, > + const color_u &widths, bool is_signed) > + { > + const unsigned s = (is_signed ? 1 : 0); > + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4); > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + /* Convert to float. */ > + bld.MOV(offset(dst, bld, c), offset(src, bld, c)); > + > + /* Divide by the normalization constants. */ > + bld.MUL(offset(dst, bld, c), offset(dst, bld, c), > + fs_reg(1.0f / scale(widths[c] - s))); > + > + /* Clamp to the minimum value. */ > + if (is_signed) > + bld.emit_minmax(offset(dst, bld, c), > + offset(dst, bld, c), fs_reg(-1.0f), > + BRW_CONDITIONAL_G); > + } > + } > + return dst; > + } > + > + /** > + * Convert a floating point vector into a normalized fixed-point vector > + * of the specified signedness and bit widths. > + */ > + fs_reg > + emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src, > + const color_u &widths, bool is_signed) > + { > + const unsigned s = (is_signed ? 1 : 0); > + const fs_reg dst = bld.vgrf( > + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4); > + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + bld.MOV(offset(fdst, bld, c), offset(src, bld, c)); > + > + /* Clamp to the minimum value. */ > + if (is_signed) > + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), > + fs_reg(-1.0f), BRW_CONDITIONAL_G); Again, clamp to 0 for unsigned? > + > + /* Clamp to the maximum value. */ > + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), > + fs_reg(1.0f), BRW_CONDITIONAL_L); > + > + /* Multiply by the normalization constants. */ > + bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c), > + fs_reg((float)scale(widths[c] - s))); > + > + /* Convert to integer. */ > + bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c)); > + bld.MOV(offset(dst, bld, c), offset(fdst, bld, c)); > + } > + } > + > + return dst; > + } > + > + /** > + * Convert a floating point vector of the specified bit widths into a > + * 32-bit floating point vector. > + */ > + fs_reg > + emit_convert_from_float(const fs_builder &bld, const fs_reg &src, > + const color_u &widths) > + { > + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); > + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + bld.MOV(offset(dst, bld, c), offset(src, bld, c)); > + > + /* Extend 10-bit and 11-bit floating point numbers to 15 bits. > + * This works because they have a 5-bit exponent just like the > + * 16-bit floating point format, and they have no sign bit. > + */ > + if (widths[c] < 16) > + bld.SHL(offset(dst, bld, c), > + offset(dst, bld, c), fs_reg(15 - widths[c])); > + > + /* Convert to 32-bit floating point. */ > + bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c)); > + } > + } > + > + return fdst; > + } > + > + /** > + * Convert a vector into a floating point vector of the specified bit > + * widths. > + */ > + fs_reg > + emit_convert_to_float(const fs_builder &bld, const fs_reg &src, > + const color_u &widths) > + { > + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); > + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); > + > + for (unsigned c = 0; c < 4; ++c) { > + if (widths[c]) { > + bld.MOV(offset(fdst, bld, c), offset(src, bld, c)); > + > + /* Clamp to the minimum value. */ > + if (widths[c] < 16) > + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), > + fs_reg(0.0f), BRW_CONDITIONAL_G); > + > + /* Convert to 16-bit floating-point. */ > + bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c)); > + > + /* Discard the least significant bits to get floating point > + * numbers of the requested width. This works because the > + * 10-bit and 11-bit floating point formats have a 5-bit > + * exponent just like the 16-bit format, and they have no sign > + * bit. > + */ > + if (widths[c] < 16) > + bld.SHR(offset(dst, bld, c), offset(dst, bld, c), > + fs_reg(15 - widths[c])); > + } > + } > + > + return dst; > + } > + > + /** > + * Fill missing components of a vector with 0, 0, 0, 1. > + */ > + fs_reg > + emit_pad(const fs_builder &bld, const fs_reg &src, > + const color_u &widths) > + { > + const fs_reg dst = bld.vgrf(src.type, 4); > + const unsigned pad[] = { 0, 0, 0, 1 }; > + > + for (unsigned c = 0; c < 4; ++c) > + bld.MOV(offset(dst, bld, c), > + widths[c] ? offset(src, bld, c) : fs_reg(pad[c])); > + > + return dst; > + } > + } > } > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev