From: Marek Olšák <marek.ol...@amd.com> so that we can disable u_vbuf for GL core profiles. --- src/gallium/drivers/radeonsi/si_shader.c | 78 ++++++++++++++++++++++++++------ src/gallium/drivers/radeonsi/si_shader.h | 4 ++ src/gallium/drivers/radeonsi/si_state.c | 56 ++++++++++++++++++++--- 3 files changed, 117 insertions(+), 21 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 7b89014..1c84e8d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -320,70 +320,99 @@ static LLVMValueRef get_instance_index_for_fetch( /* The division must be done before START_INSTANCE is added. */ if (divisor > 1) result = LLVMBuildUDiv(gallivm->builder, result, lp_build_const_int32(gallivm, divisor), ""); return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(radeon_bld->main_fn, param_start_instance), ""); } +/* Bitcast <4 x float> to <2 x double>, extract the component, and convert + * to float. */ +static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx, + LLVMValueRef vec4, + unsigned double_index) +{ + LLVMBuilderRef builder = ctx->gallivm.builder; + LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->gallivm.context); + LLVMValueRef dvec2 = LLVMBuildBitCast(builder, vec4, + LLVMVectorType(f64, 2), ""); + LLVMValueRef index = LLVMConstInt(ctx->i32, double_index, 0); + LLVMValueRef value = LLVMBuildExtractElement(builder, dvec2, index, ""); + return LLVMBuildFPTrunc(builder, value, ctx->f32, ""); +} + static void declare_input_vs( struct si_shader_context *ctx, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]) { struct lp_build_context *base = &ctx->bld_base.base; struct gallivm_state *gallivm = base->gallivm; unsigned chan; unsigned fix_fetch; + unsigned num_fetches; + unsigned fetch_stride; LLVMValueRef t_list_ptr; LLVMValueRef t_offset; LLVMValueRef t_list; - LLVMValueRef attribute_offset; - LLVMValueRef buffer_index; + LLVMValueRef vertex_index; LLVMValueRef args[3]; - LLVMValueRef input; + LLVMValueRef input[3]; /* Load the T list */ t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS); t_offset = lp_build_const_int32(gallivm, input_index); t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); - /* Build the attribute offset */ - attribute_offset = lp_build_const_int32(gallivm, 0); - - buffer_index = LLVMGetParam(ctx->main_fn, + vertex_index = LLVMGetParam(ctx->main_fn, ctx->param_vertex_index0 + input_index); + fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf; + + /* Do multiple loads for double formats. */ + if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) { + num_fetches = 3; /* 3 2-dword loads */ + fetch_stride = 8; + } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) { + num_fetches = 2; /* 2 4-dword loads */ + fetch_stride = 16; + } else { + num_fetches = 1; + fetch_stride = 0; + } + args[0] = t_list; - args[1] = attribute_offset; - args[2] = buffer_index; - input = lp_build_intrinsic(gallivm->builder, - "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - LP_FUNC_ATTR_READNONE); + args[2] = vertex_index; + + for (unsigned i = 0; i < num_fetches; i++) { + args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0); + + input[i] = lp_build_intrinsic(gallivm->builder, + "llvm.SI.vs.load.input", ctx->v4f32, args, 3, + LP_FUNC_ATTR_READNONE); + } /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); out[chan] = LLVMBuildExtractElement(gallivm->builder, - input, llvm_chan, ""); + input[0], llvm_chan, ""); } - fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf; - switch (fix_fetch) { case SI_FIX_FETCH_A2_SNORM: case SI_FIX_FETCH_A2_SSCALED: case SI_FIX_FETCH_A2_SINT: { /* The hardware returns an unsigned value; convert it to a * signed one. */ LLVMValueRef tmp = out[3]; LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0); @@ -465,20 +494,39 @@ static void declare_input_vs( } break; case SI_FIX_FETCH_RGBA_32_SSCALED: for (chan = 0; chan < 4; chan++) { out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], ctx->i32, ""); out[chan] = LLVMBuildSIToFP(gallivm->builder, out[chan], ctx->f32, ""); } break; + case SI_FIX_FETCH_RG_64_FLOAT: + for (chan = 0; chan < 2; chan++) + out[chan] = extract_double_to_float(ctx, input[0], chan); + + out[2] = LLVMConstReal(ctx->f32, 0); + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGB_64_FLOAT: + for (chan = 0; chan < 3; chan++) + out[chan] = extract_double_to_float(ctx, input[chan], 0); + + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGBA_64_FLOAT: + for (chan = 0; chan < 4; chan++) { + out[chan] = extract_double_to_float(ctx, input[chan / 2], + chan % 2); + } + break; } } static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); if (swizzle > 0) return bld_base->uint_bld.zero; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 5464d67..6398b39 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -240,20 +240,24 @@ enum { SI_FIX_FETCH_A2_SSCALED, SI_FIX_FETCH_A2_SINT, SI_FIX_FETCH_RGBA_32_UNORM, SI_FIX_FETCH_RGBX_32_UNORM, SI_FIX_FETCH_RGBA_32_SNORM, SI_FIX_FETCH_RGBX_32_SNORM, SI_FIX_FETCH_RGBA_32_USCALED, SI_FIX_FETCH_RGBA_32_SSCALED, SI_FIX_FETCH_RGBA_32_FIXED, SI_FIX_FETCH_RGBX_32_FIXED, + SI_FIX_FETCH_RG_64_FLOAT, + SI_FIX_FETCH_RGB_64_FLOAT, + SI_FIX_FETCH_RGBA_64_FLOAT, + SI_FIX_FETCH_RESERVED_15, /* maximum */ }; struct si_shader; /* State of the context creating the shader object. */ struct si_compiler_ctx_state { /* Should only be used by si_init_shader_selector_async and * si_build_shader_variant if thread_index == -1 (non-threaded). */ LLVMTargetMachineRef tm; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 1e0729c..107bc06 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1755,20 +1755,33 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, case 1: return V_008F0C_BUF_DATA_FORMAT_32; case 2: return V_008F0C_BUF_DATA_FORMAT_32_32; case 3: return V_008F0C_BUF_DATA_FORMAT_32_32_32; case 4: return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; } break; + case 64: + /* Legacy double formats. */ + switch (desc->nr_channels) { + case 1: /* 1 load */ + return V_008F0C_BUF_DATA_FORMAT_32_32; + case 2: /* 1 load */ + return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; + case 3: /* 3 loads */ + return V_008F0C_BUF_DATA_FORMAT_32_32; + case 4: /* 2 loads */ + return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; + } + break; } return V_008F0C_BUF_DATA_FORMAT_INVALID; } static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, const struct util_format_description *desc, int first_non_void) { if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) @@ -3352,43 +3365,39 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, v->count = count; v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT); for (i = 0; i < count; ++i) { const struct util_format_description *desc; const struct util_format_channel_description *channel; unsigned data_format, num_format; int first_non_void; unsigned vbo_index = elements[i].vertex_buffer_index; + unsigned char swizzle[4]; if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { FREE(v); return NULL; } if (!used[vbo_index]) { v->first_vb_use_mask |= 1 << i; used[vbo_index] = true; } desc = util_format_description(elements[i].src_format); first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; + memcpy(swizzle, desc->swizzle, sizeof(swizzle)); - v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | - S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | - S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | - S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | - S_008F0C_NUM_FORMAT(num_format) | - S_008F0C_DATA_FORMAT(data_format); v->format_size[i] = desc->block.bits / 8; /* The hardware always treats the 2-bit alpha channel as * unsigned, so a shader workaround is needed. */ if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i); } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i); @@ -3414,22 +3423,57 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { if (channel->normalized) { if (desc->swizzle[3] == PIPE_SWIZZLE_1) v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i); else v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i); } else { v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i); } } + } else if (channel && channel->size == 64 && + channel->type == UTIL_FORMAT_TYPE_FLOAT) { + switch (desc->nr_channels) { + case 1: + case 2: + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i); + swizzle[0] = PIPE_SWIZZLE_X; + swizzle[1] = PIPE_SWIZZLE_Y; + swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0; + swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0; + break; + case 3: + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i); + swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */ + swizzle[1] = PIPE_SWIZZLE_Y; + swizzle[2] = PIPE_SWIZZLE_0; + swizzle[3] = PIPE_SWIZZLE_0; + break; + case 4: + v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i); + swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */ + swizzle[1] = PIPE_SWIZZLE_Y; + swizzle[2] = PIPE_SWIZZLE_Z; + swizzle[3] = PIPE_SWIZZLE_W; + break; + default: + assert(0); + } } + v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) | + S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | + S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | + S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) | + S_008F0C_NUM_FORMAT(num_format) | + S_008F0C_DATA_FORMAT(data_format); + /* We work around the fact that 8_8_8 and 16_16_16 data formats * do not exist by using the corresponding 4-component formats. * This requires a fixup of the descriptor for bounds checks. */ if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) { v->fix_size3 |= (desc->block.bits / 24) << (2 * i); } } memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev