On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <tarc...@itsqueeze.com> wrote:
> If all the swizzles are inside the src channels range than we can just > grab the srcs we need rather than converting everything. > > perf report convert_float() going from ~10% -> ~7% for the when > running the following glean test: > > glean -o -v -v -v -t +pointAtten > > Cc: Jason Ekstrand <ja...@jlekstrand.net> > --- > > Hi Jason, > > I've only perf tested the above glean test. What did you use to benchmark > this when you wrote it? > The teximage-colors test has a benchmark flag which I added at the time. I trust that a lot more than some random glean test. :-) --Jason > Thanks, > Tim > > src/mesa/main/format_utils.c | 84 ++++++++++++++++++++++++++++++ > +++----------- > 1 file changed, 63 insertions(+), 21 deletions(-) > > diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c > index 65e65d4..1649ac0 100644 > --- a/src/mesa/main/format_utils.c > +++ b/src/mesa/main/format_utils.c > @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst, > * > * \param DST_TYPE the C datatype of the destination > * \param DST_CHANS the number of destination channels > * \param SRC_TYPE the C datatype of the source > * \param SRC_CHANS the number of source channels > * \param CONV an expression for converting from the source data, > * storred in the variable "src", to the destination > * format > */ > #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, > CONV) \ > - do { \ > - int s, j; \ > - for (s = 0; s < count; ++s) { \ > - for (j = 0; j < SRC_CHANS; ++j) { \ > - SRC_TYPE src = typed_src[j]; \ > - tmp[j] = CONV; \ > - } \ > - \ > - typed_dst[0] = tmp[swizzle_x]; \ > - if (DST_CHANS > 1) { \ > - typed_dst[1] = tmp[swizzle_y]; \ > - if (DST_CHANS > 2) { \ > - typed_dst[2] = tmp[swizzle_z]; \ > - if (DST_CHANS > 3) { \ > - typed_dst[3] = tmp[swizzle_w]; \ > - } \ > - } \ > - } \ > - typed_src += SRC_CHANS; \ > - typed_dst += DST_CHANS; \ > - } \ > + do { \ > + bool fast_path = false; \ > + if (DST_CHANS == 1 && swizzle_x < SRC_CHANS) \ > + fast_path = true; \ > + if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \ > + swizzle_y < SRC_CHANS) \ > + fast_path = true; \ > + if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \ > + swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\ > + fast_path = true; \ > + if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \ > + swizzle_y < SRC_CHANS && \ > + swizzle_z < SRC_CHANS && \ > + swizzle_w < SRC_CHANS) \ > + fast_path = true; \ > + \ > + /* The fast path avoids copying/converting srcs we \ > + * will never use. \ > + */ \ > + if (fast_path) { \ > + for (int s = 0; s < count; ++s) { \ > + SRC_TYPE src = typed_src[swizzle_x]; \ > + tmp[swizzle_x] = CONV; \ > + typed_dst[0] = tmp[swizzle_x]; \ > + if (DST_CHANS > 1) { \ > + SRC_TYPE src = typed_src[swizzle_y]; \ > + tmp[swizzle_y] = CONV; \ > + typed_dst[1] = tmp[swizzle_y]; \ > + if (DST_CHANS > 2) { \ > + SRC_TYPE src = typed_src[swizzle_z]; \ > + tmp[swizzle_z] = CONV; \ > + typed_dst[2] = tmp[swizzle_z]; \ > + if (DST_CHANS > 3) { \ > + SRC_TYPE src = typed_src[swizzle_w];\ > + tmp[swizzle_w] = CONV; \ > + typed_dst[3] = tmp[swizzle_w]; \ > + } \ > + } \ > + } \ > + typed_src += SRC_CHANS; \ > + typed_dst += DST_CHANS; \ > + } \ > + } else { \ > + for (int s = 0; s < count; ++s) { \ > + for (unsigned j = 0; j < SRC_CHANS; ++j) { \ > + SRC_TYPE src = typed_src[j]; \ > + tmp[j] = CONV; \ > + } \ > + \ > + typed_dst[0] = tmp[swizzle_x]; \ > + if (DST_CHANS > 1) { \ > + typed_dst[1] = tmp[swizzle_y]; \ > + if (DST_CHANS > 2) { \ > + typed_dst[2] = tmp[swizzle_z]; \ > + if (DST_CHANS > 3) { \ > + typed_dst[3] = tmp[swizzle_w]; \ > + } \ > + } \ > + } \ > + typed_src += SRC_CHANS; \ > + typed_dst += DST_CHANS; \ > + } \ > + } \ > } while (0) > > /** > * Represents a single swizzle-and-convert operation > * > * This macro represents everything done in a single swizzle-and-convert > * operation. The actual work is done by the SWIZZLE_CONVERT_LOOP macro. > * This macro acts as a wrapper that uses a nested switch to ensure that > * all looping parameters get unrolled. > * > -- > 2.9.4 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev