The subject line should be "tgsi: ..."
On 04/25/2014 11:41 AM, Ilia Mirkin wrote:
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 188 +++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_math.h | 11 ++ 2 files changed, 199 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 55da60a..2cc7884 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2603,6 +2603,40 @@ exec_vector_trinary(struct tgsi_exec_machine *mach, } } +typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3); + +static void +exec_vector_quaternary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_quaternary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[4]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); + op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); + } + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + static void exec_dp3(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -3571,6 +3605,135 @@ micro_ucmp(union tgsi_exec_channel *dst, }
I'd have to look up what ibfe means so can you put a comment on these functions to explain them?
static void +micro_ibfe(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + int i; + for (i = 0; i < 4; i++) { + int width = src2->i[i] & 0x1f; + int offset = src1->i[i] & 0x1f; + if (width == 0) + dst->i[i] = 0; + else if (width + offset < 32) + dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); + else + dst->i[i] = src0->i[i] >> offset; + } +} + +static void +micro_ubfe(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + int i; + for (i = 0; i < 4; i++) { + int width = src2->u[i] & 0x1f; + int offset = src1->u[i] & 0x1f; + if (width == 0) + dst->u[i] = 0; + else if (width + offset < 32) + dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); + else + dst->u[i] = src0->u[i] >> offset; + } +} + +static void +micro_bfi(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3) +{ + int i; + for (i = 0; i < 4; i++) { + int width = src3->u[i] & 0x1f; + int offset = src2->u[i] & 0x1f; + int bitmask = ((1 << width) - 1) << offset; + dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); + } +} + +static void +micro_brev(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + int i; + static const unsigned reverse[16] = { + [0x0] = 0x0, + [0x1] = 0x8, + [0x2] = 0x4, + [0x3] = 0xc, + [0x4] = 0x2, + [0x5] = 0xa, + [0x6] = 0x6, + [0x7] = 0xe, + [0x8] = 0x1, + [0x9] = 0x9, + [0xa] = 0x5, + [0xb] = 0xd, + [0xc] = 0x3, + [0xd] = 0xb, + [0xe] = 0x7, + [0xf] = 0xf, + }; + for (i = 0; i < 4; i++) { + dst->u[i] = (reverse[(src->u[i] >> 0) & 0xf] << 28 | + reverse[(src->u[i] >> 4) & 0xf] << 24 | + reverse[(src->u[i] >> 8) & 0xf] << 20 | + reverse[(src->u[i] >> 12) & 0xf] << 16 | + reverse[(src->u[i] >> 16) & 0xf] << 12 | + reverse[(src->u[i] >> 20) & 0xf] << 8 | + reverse[(src->u[i] >> 24) & 0xf] << 4 | + reverse[(src->u[i] >> 28) & 0xf] << 0); + }
I think that code could be put into a u_math.c function since reversing a 32-bit uint might be useful elsewhere someday.
+} + +static void +micro_popc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = util_bitcount(src->u[0]); + dst->u[1] = util_bitcount(src->u[1]); + dst->u[2] = util_bitcount(src->u[2]); + dst->u[3] = util_bitcount(src->u[3]); +} + +static void +micro_lsb(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = ffs(src->u[0]) - 1; + dst->i[1] = ffs(src->u[1]) - 1; + dst->i[2] = ffs(src->u[2]) - 1; + dst->i[3] = ffs(src->u[3]) - 1; +} + +static void +micro_imsb(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = util_last_bit_signed(src->i[0]) - 1; + dst->i[1] = util_last_bit_signed(src->i[1]) - 1; + dst->i[2] = util_last_bit_signed(src->i[2]) - 1; + dst->i[3] = util_last_bit_signed(src->i[3]) - 1; +} + +static void +micro_umsb(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = util_last_bit(src->u[0]) - 1; + dst->i[1] = util_last_bit(src->u[1]) - 1; + dst->i[2] = util_last_bit(src->u[2]) - 1; + dst->i[3] = util_last_bit(src->u[3]) - 1; +} + +static void exec_instruction( struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, @@ -4417,6 +4580,31 @@ exec_instruction( /* src[2] = sampler unit */ exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); break; + + case TGSI_OPCODE_IBFE: + exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + case TGSI_OPCODE_UBFE: + exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_BFI: + exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_BREV: + exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_POPC: + exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_LSB: + exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_IMSB: + exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + case TGSI_OPCODE_UMSB: + exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); + break; default: assert( 0 ); } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index ec03e4e..5b811e3 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -567,6 +567,17 @@ static INLINE unsigned util_last_bit(unsigned u) #endif }
Function comment?
+static INLINE unsigned util_last_bit_signed(int i) +{ +#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 407) + return 31 - __builtin_clrsb(i); +#else + if (i >= 0) + return util_last_bit(i); + else + return util_last_bit(~(unsigned)i); +#endif +} /* Destructively loop over all of the bits in a mask as in: *
I'd put this change in a separate commit. -Brian _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev