Re: [Mesa-dev] [PATCH 4/4] softpipe: add tgsi_exec support for new bit manipulation opcodes

Brian Paul Fri, 25 Apr 2014 10:59:47 -0700

The subject line should be "tgsi: ..."


On 04/25/2014 11:41 AM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
  src/gallium/auxiliary/tgsi/tgsi_exec.c | 188 +++++++++++++++++++++++++++++++++
  src/gallium/auxiliary/util/u_math.h    |  11 ++
  2 files changed, 199 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 55da60a..2cc7884 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2603,6 +2603,40 @@ exec_vector_trinary(struct tgsi_exec_machine *mach,
     }
  }

+typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
+                                     const union tgsi_exec_channel *src0,
+                                     const union tgsi_exec_channel *src1,
+                                     const union tgsi_exec_channel *src2,
+                                     const union tgsi_exec_channel *src3);
+
+static void
+exec_vector_quaternary(struct tgsi_exec_machine *mach,
+                       const struct tgsi_full_instruction *inst,
+                       micro_quaternary_op op,
+                       enum tgsi_exec_datatype dst_datatype,
+                       enum tgsi_exec_datatype src_datatype)
+{
+   unsigned int chan;
+   struct tgsi_exec_vector dst;
+
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         union tgsi_exec_channel src[4];
+
+         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
+         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
+         fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
+         fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
+         op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
+      }
+   }
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, 
dst_datatype);
+      }
+   }
+}
+
  static void
  exec_dp3(struct tgsi_exec_machine *mach,
           const struct tgsi_full_instruction *inst)
@@ -3571,6 +3605,135 @@ micro_ucmp(union tgsi_exec_channel *dst,
  }

I'd have to look up what ibfe means so can you put a comment on thesefunctions to explain them?

  static void
+micro_ibfe(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src0,
+           const union tgsi_exec_channel *src1,
+           const union tgsi_exec_channel *src2)
+{
+   int i;
+   for (i = 0; i < 4; i++) {
+      int width = src2->i[i] & 0x1f;
+      int offset = src1->i[i] & 0x1f;
+      if (width == 0)
+         dst->i[i] = 0;
+      else if (width + offset < 32)
+         dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
+      else
+         dst->i[i] = src0->i[i] >> offset;
+   }
+}
+
+static void
+micro_ubfe(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src0,
+           const union tgsi_exec_channel *src1,
+           const union tgsi_exec_channel *src2)
+{
+   int i;
+   for (i = 0; i < 4; i++) {
+      int width = src2->u[i] & 0x1f;
+      int offset = src1->u[i] & 0x1f;
+      if (width == 0)
+         dst->u[i] = 0;
+      else if (width + offset < 32)
+         dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
+      else
+         dst->u[i] = src0->u[i] >> offset;
+   }
+}
+
+static void
+micro_bfi(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src0,
+          const union tgsi_exec_channel *src1,
+          const union tgsi_exec_channel *src2,
+          const union tgsi_exec_channel *src3)
+{
+   int i;
+   for (i = 0; i < 4; i++) {
+      int width = src3->u[i] & 0x1f;
+      int offset = src2->u[i] & 0x1f;
+      int bitmask = ((1 << width) - 1) << offset;
+      dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
+   }
+}
+
+static void
+micro_brev(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   int i;
+   static const unsigned reverse[16] = {
+      [0x0] = 0x0,
+      [0x1] = 0x8,
+      [0x2] = 0x4,
+      [0x3] = 0xc,
+      [0x4] = 0x2,
+      [0x5] = 0xa,
+      [0x6] = 0x6,
+      [0x7] = 0xe,
+      [0x8] = 0x1,
+      [0x9] = 0x9,
+      [0xa] = 0x5,
+      [0xb] = 0xd,
+      [0xc] = 0x3,
+      [0xd] = 0xb,
+      [0xe] = 0x7,
+      [0xf] = 0xf,
+   };
+   for (i = 0; i < 4; i++) {
+      dst->u[i] = (reverse[(src->u[i] >> 0) & 0xf] << 28 |
+                   reverse[(src->u[i] >> 4) & 0xf] << 24 |
+                   reverse[(src->u[i] >> 8) & 0xf] << 20 |
+                   reverse[(src->u[i] >> 12) & 0xf] << 16 |
+                   reverse[(src->u[i] >> 16) & 0xf] << 12 |
+                   reverse[(src->u[i] >> 20) & 0xf] << 8 |
+                   reverse[(src->u[i] >> 24) & 0xf] << 4 |
+                   reverse[(src->u[i] >> 28) & 0xf] << 0);
+   }

I think that code could be put into a u_math.c function since reversinga 32-bit uint might be useful elsewhere someday.

+}
+
+static void
+micro_popc(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   dst->u[0] = util_bitcount(src->u[0]);
+   dst->u[1] = util_bitcount(src->u[1]);
+   dst->u[2] = util_bitcount(src->u[2]);
+   dst->u[3] = util_bitcount(src->u[3]);
+}
+
+static void
+micro_lsb(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src)
+{
+   dst->i[0] = ffs(src->u[0]) - 1;
+   dst->i[1] = ffs(src->u[1]) - 1;
+   dst->i[2] = ffs(src->u[2]) - 1;
+   dst->i[3] = ffs(src->u[3]) - 1;
+}
+
+static void
+micro_imsb(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
+   dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
+   dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
+   dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
+}
+
+static void
+micro_umsb(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   dst->i[0] = util_last_bit(src->u[0]) - 1;
+   dst->i[1] = util_last_bit(src->u[1]) - 1;
+   dst->i[2] = util_last_bit(src->u[2]) - 1;
+   dst->i[3] = util_last_bit(src->u[3]) - 1;
+}
+
+static void
  exec_instruction(
     struct tgsi_exec_machine *mach,
     const struct tgsi_full_instruction *inst,
@@ -4417,6 +4580,31 @@ exec_instruction(
        /* src[2] = sampler unit */
        exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
        break;
+
+   case TGSI_OPCODE_IBFE:
+      exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_INT);
+      break;
+   case TGSI_OPCODE_UBFE:
+      exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_BFI:
+      exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_BREV:
+      exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_POPC:
+      exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_LSB:
+      exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_IMSB:
+      exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_INT);
+      break;
+   case TGSI_OPCODE_UMSB:
+      exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_UINT);
+      break;
     default:
        assert( 0 );
     }
diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index ec03e4e..5b811e3 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -567,6 +567,17 @@ static INLINE unsigned util_last_bit(unsigned u)
  #endif
  }


Function comment?

+static INLINE unsigned util_last_bit_signed(int i)
+{
+#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 407)
+   return 31 - __builtin_clrsb(i);
+#else
+   if (i >= 0)
+      return util_last_bit(i);
+   else
+      return util_last_bit(~(unsigned)i);
+#endif
+}

  /* Destructively loop over all of the bits in a mask as in:
   *


I'd put this change in a separate commit.

-Brian

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] softpipe: add tgsi_exec support for new bit manipulation opcodes

Reply via email to