From: Nicolai Hähnle <nicolai.haeh...@amd.com> LLVM can still decide to hoist the loads since they're marked invariant. --- src/gallium/drivers/radeonsi/si_shader.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1cda59c..d26c36a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2178,33 +2178,20 @@ static void emit_streamout_output(struct si_shader_context *ctx, * to buffers. */ static void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput) { struct pipe_stream_output_info *so = &ctx->shader->selector->so; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; int i; struct lp_build_if_state if_ctx; - LLVMValueRef so_buffers[4]; - LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, - SI_PARAM_RW_BUFFERS); - - /* Load the descriptors. */ - for (i = 0; i < 4; ++i) { - if (ctx->shader->selector->so.stride[i]) { - LLVMValueRef offset = lp_build_const_int32(gallivm, - SI_VS_STREAMOUT_BUF0 + i); - - so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, offset); - } - } /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ LLVMValueRef so_vtx_count = unpack_param(ctx, ctx->param_streamout_config, 16, 7); LLVMValueRef tid = get_thread_id(ctx); /* can_emit = tid < so_vtx_count; */ LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); @@ -2223,26 +2210,36 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, * attrib_offset */ LLVMValueRef so_write_index = LLVMGetParam(ctx->main_fn, ctx->param_streamout_write_index); /* Compute (streamout_write_index + thread_id). */ so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); - /* Compute the write offset for each enabled buffer. */ + /* Load the descriptor and compute the write offset for each + * enabled buffer. */ LLVMValueRef so_write_offset[4] = {}; + LLVMValueRef so_buffers[4]; + LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, + SI_PARAM_RW_BUFFERS); + for (i = 0; i < 4; i++) { if (!so->stride[i]) continue; + LLVMValueRef offset = lp_build_const_int32(gallivm, + SI_VS_STREAMOUT_BUF0 + i); + + so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, offset); + LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn, ctx->param_streamout_offset[i]); so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), ""); so_write_offset[i] = LLVMBuildMul(builder, so_write_index, LLVMConstInt(ctx->i32, so->stride[i]*4, 0), ""); so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); } /* Write streamout data. */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev