On 15/06/17 15:34, Jason Ekstrand wrote:
On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <tarc...@itsqueeze.com <mailto:tarc...@itsqueeze.com>> wrote:

    If all the swizzles are inside the src channels range than we can just
    grab the srcs we need rather than converting everything.

    perf report convert_float() going from ~10% -> ~7% for the when
    running the following glean test:

    glean -o -v -v -v -t +pointAtten

    Cc: Jason Ekstrand <ja...@jlekstrand.net <mailto:ja...@jlekstrand.net>>
    ---

      Hi Jason,

      I've only perf tested the above glean test. What did you use to
    benchmark
      this when you wrote it?


The teximage-colors test has a benchmark flag which I added at the time. I trust that a lot more than some random glean test. :-)

Cool thanks :) I'm seeing upto x5 improvement in some tests otherwise largely unchanged :)


--Jason

      Thanks,
      Tim

      src/mesa/main/format_utils.c | 84
    +++++++++++++++++++++++++++++++++-----------
      1 file changed, 63 insertions(+), 21 deletions(-)

    diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
    index 65e65d4..1649ac0 100644
    --- a/src/mesa/main/format_utils.c
    +++ b/src/mesa/main/format_utils.c
    @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
       *
       * \param   DST_TYPE    the C datatype of the destination
       * \param   DST_CHANS   the number of destination channels
       * \param   SRC_TYPE    the C datatype of the source
       * \param   SRC_CHANS   the number of source channels
       * \param   CONV        an expression for converting from the
    source data,
       *                      storred in the variable "src", to the
    destination
       *                      format
       */
      #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE,
    SRC_CHANS, CONV) \
    -   do {                                           \
    -      int s, j;                                   \
    -      for (s = 0; s < count; ++s) {               \
    -         for (j = 0; j < SRC_CHANS; ++j) {        \
    -            SRC_TYPE src = typed_src[j];          \
    -            tmp[j] = CONV;                        \
    -         }                                        \
    -                                                  \
    -         typed_dst[0] = tmp[swizzle_x];           \
    -         if (DST_CHANS > 1) {                     \
    -            typed_dst[1] = tmp[swizzle_y];        \
    -            if (DST_CHANS > 2) {                  \
    -               typed_dst[2] = tmp[swizzle_z];     \
    -               if (DST_CHANS > 3) {               \
    -                  typed_dst[3] = tmp[swizzle_w];  \
    -               }                                  \
    -            }                                     \
    -         }                                        \
    -         typed_src += SRC_CHANS;                  \
    -         typed_dst += DST_CHANS;                  \
    -      }                                           \
    +   do {                                                  \
    +      bool fast_path = false;                            \
    +      if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)       \
    +         fast_path = true;                               \
    +      if (DST_CHANS == 2 && swizzle_x < SRC_CHANS &&     \
    +          swizzle_y < SRC_CHANS)                         \
    +         fast_path = true;                               \
    +      if (DST_CHANS == 3 && swizzle_x < SRC_CHANS &&     \
    +          swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
    +         fast_path = true;                               \
    +      if (DST_CHANS == 4 && swizzle_x < SRC_CHANS &&     \
    +          swizzle_y < SRC_CHANS &&                       \
    +          swizzle_z < SRC_CHANS &&                       \
    +          swizzle_w < SRC_CHANS)                         \
    +         fast_path = true;                               \
    +                                                         \
    +      /* The fast path avoids copying/converting srcs we \
    +       * will never use.                                 \
    +       */                                                \
    +      if (fast_path) {                                   \
    +         for (int s = 0; s < count; ++s) {               \
    +            SRC_TYPE src = typed_src[swizzle_x];         \
    +            tmp[swizzle_x] = CONV;                       \
    +            typed_dst[0] = tmp[swizzle_x];               \
    +            if (DST_CHANS > 1) {                         \
    +               SRC_TYPE src = typed_src[swizzle_y];      \
    +               tmp[swizzle_y] = CONV;                    \
    +               typed_dst[1] = tmp[swizzle_y];            \
    +               if (DST_CHANS > 2) {                      \
    +                  SRC_TYPE src = typed_src[swizzle_z];   \
    +                  tmp[swizzle_z] = CONV;                 \
    +                  typed_dst[2] = tmp[swizzle_z];         \
    +                  if (DST_CHANS > 3) {                   \
    +                     SRC_TYPE src = typed_src[swizzle_w];\
    +                     tmp[swizzle_w] = CONV;              \
    +                     typed_dst[3] = tmp[swizzle_w];      \
    +                  }                                      \
    +               }                                         \
    +            }                                            \
    +            typed_src += SRC_CHANS;                      \
    +            typed_dst += DST_CHANS;                      \
    +         }                                               \
    +      } else {                                           \
    +         for (int s = 0; s < count; ++s) {               \
    +            for (unsigned j = 0; j < SRC_CHANS; ++j) {   \
    +               SRC_TYPE src = typed_src[j];              \
    +               tmp[j] = CONV;                            \
    +            }                                            \
    +                                                         \
    +            typed_dst[0] = tmp[swizzle_x];               \
    +            if (DST_CHANS > 1) {                         \
    +               typed_dst[1] = tmp[swizzle_y];            \
    +               if (DST_CHANS > 2) {                      \
    +                  typed_dst[2] = tmp[swizzle_z];         \
    +                  if (DST_CHANS > 3) {                   \
    +                     typed_dst[3] = tmp[swizzle_w];      \
    +                  }                                      \
    +               }                                         \
    +            }                                            \
    +            typed_src += SRC_CHANS;                      \
    +            typed_dst += DST_CHANS;                      \
    +         }                                               \
    +      }                                                  \
         } while (0)

      /**
       * Represents a single swizzle-and-convert operation
       *
       * This macro represents everything done in a single
    swizzle-and-convert
       * operation.  The actual work is done by the SWIZZLE_CONVERT_LOOP
    macro.
       * This macro acts as a wrapper that uses a nested switch to
    ensure that
       * all looping parameters get unrolled.
       *
    --
    2.9.4


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to