From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 52 +++++++++++++++++++++++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 12 ++++++-- src/gallium/drivers/radeonsi/si_state.c | 44 ++++++++++++++++++--------- 3 files changed, 90 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index dfba9d4..5fb5f43 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -427,21 +427,25 @@ static void declare_input_vs( LP_FUNC_ATTR_READNONE); /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); out[chan] = LLVMBuildExtractElement(gallivm->builder, input, llvm_chan, ""); } fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf; - if (fix_fetch) { + + switch (fix_fetch) { + case SI_FIX_FETCH_A2_SNORM: + case SI_FIX_FETCH_A2_SSCALED: + case SI_FIX_FETCH_A2_SINT: { /* The hardware returns an unsigned value; convert it to a * signed one. */ LLVMValueRef tmp = out[3]; LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0); /* First, recover the sign-extended signed integer value. */ if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, ""); else @@ -463,20 +467,66 @@ static void declare_input_vs( LLVMValueRef clamp; LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0); tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, ""); clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, ""); tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, ""); } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) { tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, ""); } out[3] = tmp; + break; + } + case SI_FIX_FETCH_RGBA_32_UNORM: + case SI_FIX_FETCH_RGBX_32_UNORM: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildUIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + out[chan] = LLVMBuildFMul(gallivm->builder, out[chan], + LLVMConstReal(ctx->f32, 1.0 / UINT_MAX), ""); + } + /* RGBX UINT returns 1 in alpha, which would be rounded to 0 by normalizing. */ + if (fix_fetch == SI_FIX_FETCH_RGBX_32_UNORM) + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGBA_32_SNORM: + case SI_FIX_FETCH_RGBX_32_SNORM: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildSIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + out[chan] = LLVMBuildFMul(gallivm->builder, out[chan], + LLVMConstReal(ctx->f32, 1.0 / INT_MAX), ""); + } + /* RGBX SINT returns 1 in alpha, which would be rounded to 0 by normalizing. */ + if (fix_fetch == SI_FIX_FETCH_RGBX_32_SNORM) + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGBA_32_USCALED: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildUIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + } + break; + case SI_FIX_FETCH_RGBA_32_SSCALED: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildSIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + } + break; } } static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); if (swizzle > 0) return bld_base->uint_bld.zero; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 89f9628..5e554d9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -229,23 +229,29 @@ enum { /* SI-specific system values. */ enum { TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT, TGSI_SEMANTIC_DEFAULT_TESSINNER_SI, }; /* For VS shader key fix_fetch. */ enum { SI_FIX_FETCH_NONE = 0, - SI_FIX_FETCH_A2_SNORM = 1, - SI_FIX_FETCH_A2_SSCALED = 2, - SI_FIX_FETCH_A2_SINT = 3, + SI_FIX_FETCH_A2_SNORM, + SI_FIX_FETCH_A2_SSCALED, + SI_FIX_FETCH_A2_SINT, + SI_FIX_FETCH_RGBA_32_UNORM, + SI_FIX_FETCH_RGBX_32_UNORM, + SI_FIX_FETCH_RGBA_32_SNORM, + SI_FIX_FETCH_RGBX_32_SNORM, + SI_FIX_FETCH_RGBA_32_USCALED, + SI_FIX_FETCH_RGBA_32_SSCALED, }; struct si_shader; /* A shader selector is a gallium CSO and contains shader variants and * binaries for one TGSI program. This can be shared by multiple contexts. */ struct si_shader_selector { struct si_screen *screen; struct util_queue_fence ready; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index fa78a56..c8d1099 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1739,28 +1739,20 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, case 1: return V_008F0C_BUF_DATA_FORMAT_16; case 2: return V_008F0C_BUF_DATA_FORMAT_16_16; case 3: case 4: return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; } break; case 32: - /* From the Southern Islands ISA documentation about MTBUF: - * 'Memory reads of data in memory that is 32 or 64 bits do not - * undergo any format conversion.' - */ - if (type != UTIL_FORMAT_TYPE_FLOAT && - !desc->channel[first_non_void].pure_integer) - return V_008F0C_BUF_DATA_FORMAT_INVALID; - switch (desc->nr_channels) { case 1: return V_008F0C_BUF_DATA_FORMAT_32; case 2: return V_008F0C_BUF_DATA_FORMAT_32_32; case 3: return V_008F0C_BUF_DATA_FORMAT_32_32_32; case 4: return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; } @@ -1774,32 +1766,34 @@ static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, const struct util_format_description *desc, int first_non_void) { if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) return V_008F0C_BUF_NUM_FORMAT_FLOAT; assert(first_non_void >= 0); switch (desc->channel[first_non_void].type) { case UTIL_FORMAT_TYPE_SIGNED: - if (desc->channel[first_non_void].normalized) - return V_008F0C_BUF_NUM_FORMAT_SNORM; - else if (desc->channel[first_non_void].pure_integer) + if (desc->channel[first_non_void].size >= 32 || + desc->channel[first_non_void].pure_integer) return V_008F0C_BUF_NUM_FORMAT_SINT; + else if (desc->channel[first_non_void].normalized) + return V_008F0C_BUF_NUM_FORMAT_SNORM; else return V_008F0C_BUF_NUM_FORMAT_SSCALED; break; case UTIL_FORMAT_TYPE_UNSIGNED: - if (desc->channel[first_non_void].normalized) - return V_008F0C_BUF_NUM_FORMAT_UNORM; - else if (desc->channel[first_non_void].pure_integer) + if (desc->channel[first_non_void].size >= 32 || + desc->channel[first_non_void].pure_integer) return V_008F0C_BUF_NUM_FORMAT_UINT; + else if (desc->channel[first_non_void].normalized) + return V_008F0C_BUF_NUM_FORMAT_UNORM; else return V_008F0C_BUF_NUM_FORMAT_USCALED; break; case UTIL_FORMAT_TYPE_FLOAT: default: return V_008F0C_BUF_NUM_FORMAT_FLOAT; } } static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, @@ -3335,27 +3329,29 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); int i; assert(count <= SI_MAX_ATTRIBS); if (!v) return NULL; v->count = count; for (i = 0; i < count; ++i) { const struct util_format_description *desc; + const struct util_format_channel_description *channel; unsigned data_format, num_format; int first_non_void; desc = util_format_description(elements[i].src_format); first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); + channel = &desc->channel[first_non_void]; v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); v->format_size[i] = desc->block.bits / 8; /* The hardware always treats the 2-bit alpha channel as @@ -3363,20 +3359,40 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, */ if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i); } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i); } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { /* This isn't actually used in OpenGL. */ v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i); } + } else if (channel->size == 32 && !channel->pure_integer) { + if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { + if (channel->normalized) { + if (desc->swizzle[3] == PIPE_SWIZZLE_1) + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i); + else + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i); + } else { + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i); + } + } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { + if (channel->normalized) { + if (desc->swizzle[3] == PIPE_SWIZZLE_1) + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i); + else + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i); + } else { + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i); + } + } } /* We work around the fact that 8_8_8 and 16_16_16 data formats * do not exist by using the corresponding 4-component formats. * This requires a fixup of the descriptor for bounds checks. */ if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) { v->fix_size3 |= (desc->block.bits / 24) << (2 * i); } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev