From: Dave Airlie <airl...@redhat.com> This adds support to TGSI for 64-bit integer immediates.
Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 14 ++ src/gallium/auxiliary/tgsi/tgsi_exec.c | 244 ++++++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_parse.c | 2 + src/gallium/auxiliary/tgsi/tgsi_text.c | 44 ++++++ src/gallium/auxiliary/tgsi/tgsi_ureg.c | 45 +++++- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 10 ++ src/gallium/include/pipe/p_shader_tokens.h | 2 + 7 files changed, 358 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index d59b7ff..614bcb2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -254,6 +254,20 @@ dump_imm_data(struct tgsi_iterate_context *iter, i++; break; } + case TGSI_IMM_INT64: { + union di d; + d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32; + UID( d.i ); + i++; + break; + } + case TGSI_IMM_UINT64: { + union di d; + d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32; + UID( d.ui ); + i++; + break; + } case TGSI_IMM_FLOAT32: if (ctx->dump_float_as_hex) HFLT( data[i].Float ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1457c06..c929475 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -77,6 +77,8 @@ union tgsi_double_channel { double d[TGSI_QUAD_SIZE]; unsigned u[TGSI_QUAD_SIZE][2]; + uint64_t u64[TGSI_QUAD_SIZE]; + int64_t i64[TGSI_QUAD_SIZE]; }; struct tgsi_double_vector { @@ -692,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst, dst->d[3] = (double)src->u[3]; } +static void +micro_i64abs(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; + dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; + dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; + dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; +} + +static void +micro_i64sgn(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; + dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; + dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; + dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; +} + +static void +micro_i64neg(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = -src->i64[0]; + dst->i64[1] = -src->i64[1]; + dst->i64[2] = -src->i64[2]; + dst->i64[3] = -src->i64[3]; +} + +static void +micro_u64seq(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64sne(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; +} + +static void +micro_u64min(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64min(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; +} + +static void +micro_u64add(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] + src[1].u64[0]; + dst->u64[1] = src[0].u64[1] + src[1].u64[1]; + dst->u64[2] = src[0].u64[2] + src[1].u64[2]; + dst->u64[3] = src[0].u64[3] + src[1].u64[3]; +} + +static void +micro_u64mul(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] * src[1].u64[0]; + dst->u64[1] = src[0].u64[1] * src[1].u64[1]; + dst->u64[2] = src[0].u64[2] * src[1].u64[2]; + dst->u64[3] = src[0].u64[3] * src[1].u64[3]; +} + +static void +micro_u64div(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] / src[1].u64[0]; + dst->u64[1] = src[0].u64[1] / src[1].u64[1]; + dst->u64[2] = src[0].u64[2] / src[1].u64[2]; + dst->u64[3] = src[0].u64[3] / src[1].u64[3]; +} + +static void +micro_i64div(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] / src[1].i64[0]; + dst->i64[1] = src[0].i64[1] / src[1].i64[1]; + dst->i64[2] = src[0].i64[2] / src[1].i64[2]; + dst->i64[3] = src[0].i64[3] / src[1].i64[3]; +} + +static void +micro_u64mod(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] % src[1].u64[0]; + dst->u64[1] = src[0].u64[1] % src[1].u64[1]; + dst->u64[2] = src[0].u64[2] % src[1].u64[2]; + dst->u64[3] = src[0].u64[3] % src[1].u64[3]; +} + +static void +micro_i64mod(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] % src[1].i64[0]; + dst->i64[1] = src[0].i64[1] % src[1].i64[1]; + dst->i64[2] = src[0].i64[2] % src[1].i64[2]; + dst->i64[3] = src[0].i64[3] % src[1].i64[3]; +} + +static void +micro_u64shl(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->u64[0] = src0->u64[0] << masked_count; + masked_count = src1->u[1] & 0x3f; + dst->u64[1] = src0->u64[1] << masked_count; + masked_count = src1->u[2] & 0x3f; + dst->u64[2] = src0->u64[2] << masked_count; + masked_count = src1->u[3] & 0x3f; + dst->u64[3] = src0->u64[3] << masked_count; +} + +static void +micro_i64shr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->i64[0] = src0->i64[0] >> masked_count; + masked_count = src1->u[1] & 0x3f; + dst->i64[1] = src0->i64[1] >> masked_count; + masked_count = src1->u[2] & 0x3f; + dst->i64[2] = src0->i64[2] >> masked_count; + masked_count = src1->u[3] & 0x3f; + dst->i64[3] = src0->i64[3] >> masked_count; +} + +static void +micro_u64shr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->u64[0] = src0->u64[0] >> masked_count; + masked_count = src1->u[1] & 0x3f; + dst->u64[1] = src0->u64[1] >> masked_count; + masked_count = src1->u[2] & 0x3f; + dst->u64[2] = src0->u64[2] >> masked_count; + masked_count = src1->u[3] & 0x3f; + dst->u64[3] = src0->u64[3] >> masked_count; +} + enum tgsi_exec_datatype { TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT, - TGSI_EXEC_DATA_DOUBLE + TGSI_EXEC_DATA_DOUBLE, + TGSI_EXEC_DATA_INT64, + TGSI_EXEC_DATA_UINT64, }; /* diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 16564dd..940af7d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -155,12 +155,14 @@ tgsi_parse_token( break; case TGSI_IMM_UINT32: + case TGSI_IMM_UINT64: for (i = 0; i < imm_count; i++) { next_token(ctx, &imm->u[i].Uint); } break; case TGSI_IMM_INT32: + case TGSI_IMM_INT64: for (i = 0; i < imm_count; i++) { next_token(ctx, &imm->u[i].Int); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 8bdec06..be80842 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -295,6 +295,42 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1) return TRUE; } +static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1) +{ + const char *cur = *pcur; + union { + int64_t i64val; + uint32_t uval[2]; + } v; + + v.i64val = strtoll(cur, (char**)pcur, 0); + if (*pcur == cur) + return FALSE; + + *val0 = v.uval[0]; + *val1 = v.uval[1]; + + return TRUE; +} + +static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1) +{ + const char *cur = *pcur; + union { + uint64_t u64val; + uint32_t uval[2]; + } v; + + v.u64val = strtoull(cur, (char**)pcur, 0); + if (*pcur == cur) + return FALSE; + + *val0 = v.uval[0]; + *val1 = v.uval[1]; + + return TRUE; +} + struct translate_ctx { const char *text; @@ -1228,6 +1264,14 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type, ret = parse_double(&ctx->cur, &values[i].Uint, &values[i+1].Uint); i++; break; + case TGSI_IMM_INT64: + ret = parse_int64(&ctx->cur, &values[i].Uint, &values[i+1].Uint); + i++; + break; + case TGSI_IMM_UINT64: + ret = parse_uint64(&ctx->cur, &values[i].Uint, &values[i+1].Uint); + i++; + break; case TGSI_IMM_FLOAT32: ret = parse_float(&ctx->cur, &values[i].Float); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index b67c383..6ad514d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -792,7 +792,9 @@ match_or_expand_immediate( const unsigned *v, unsigned nr2 = *pnr2; unsigned i, j; - if (type == TGSI_IMM_FLOAT64) + if (type == TGSI_IMM_FLOAT64 || + type == TGSI_IMM_UINT64 || + type == TGSI_IMM_INT64) return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle); *swizzle = 0; @@ -871,7 +873,9 @@ out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - if (type == TGSI_IMM_FLOAT64) { + if (type == TGSI_IMM_FLOAT64 || + type == TGSI_IMM_UINT64 || + type == TGSI_IMM_INT64) { for (j = nr; j < 4; j+=2) { swizzle |= (swizzle & 0xf) << (j * 2); } @@ -971,6 +975,43 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } +struct ureg_src +ureg_DECL_immediate_uint64( struct ureg_program *ureg, + const uint64_t *v, + unsigned nr ) +{ + union { + unsigned u[4]; + uint64_t u64[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.u64[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64); +} + +struct ureg_src +ureg_DECL_immediate_int64( struct ureg_program *ureg, + const int64_t *v, + unsigned nr ) +{ + union { + unsigned u[4]; + int64_t i64[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.i64[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64); +} void ureg_emit_src( struct ureg_program *ureg, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index b4258fd..c2c2f1a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -271,6 +271,16 @@ ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); +struct ureg_src +ureg_DECL_immediate_uint64( struct ureg_program *, + const uint64_t *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int64( struct ureg_program *, + const int64_t *v, + unsigned nr ); + void ureg_DECL_constant2D(struct ureg_program *ureg, unsigned first, diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 37ce771..38e06bf 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -244,6 +244,8 @@ enum tgsi_imm_type { TGSI_IMM_UINT32, TGSI_IMM_INT32, TGSI_IMM_FLOAT64, + TGSI_IMM_UINT64, + TGSI_IMM_INT64, }; struct tgsi_immediate -- 2.5.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev