From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d26c36a..fd2ed42 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2137,20 +2137,22 @@ static void emit_streamout_output(struct si_shader_context *ctx, unsigned start = stream_out->start_component; unsigned num_comps = stream_out->num_components; LLVMValueRef out[4]; assert(num_comps && num_comps <= 4); if (!num_comps || num_comps > 4) return; /* Load the output as int. */ for (int j = 0; j < num_comps; j++) { + assert(stream_out->stream == shader_out->vertex_stream[start + j]); + out[j] = LLVMBuildBitCast(builder, shader_out->values[start + j], ctx->i32, ""); } /* Pack the output. */ LLVMValueRef vdata = NULL; switch (num_comps) { case 1: /* as i32 */ @@ -2173,21 +2175,22 @@ static void emit_streamout_output(struct si_shader_context *ctx, LLVMConstInt(ctx->i32, 0, 0), stream_out->dst_offset * 4); } /* On SI, the vertex shader is responsible for writing streamout data * to buffers. */ static void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput) { - struct pipe_stream_output_info *so = &ctx->shader->selector->so; + struct si_shader_selector *sel = ctx->shader->selector; + struct pipe_stream_output_info *so = &sel->so; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; int i; struct lp_build_if_state if_ctx; /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ LLVMValueRef so_vtx_count = unpack_param(ctx, ctx->param_streamout_config, 16, 7); LLVMValueRef tid = get_thread_id(ctx); @@ -2196,21 +2199,34 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); LLVMValueRef stream_id = unpack_param(ctx, ctx->param_streamout_config, 24, 2); /* Emit the streamout code conditionally. This actually avoids * out-of-bounds buffer access. The hw tells us via the SGPR * (so_vtx_count) which threads are allowed to emit streamout data. */ lp_build_if(&if_ctx, gallivm, can_emit); - { + + for (int stream = 0; stream < 4; ++stream) { + struct lp_build_if_state if_ctx_stream; + + if (!sel->info.num_stream_output_components[stream]) + continue; + + LLVMValueRef is_stream = + LLVMBuildICmp(builder, LLVMIntEQ, + stream_id, + lp_build_const_int32(gallivm, stream), ""); + + lp_build_if(&if_ctx_stream, gallivm, is_stream); + /* The buffer offset is computed as follows: * ByteOffset = streamout_offset[buffer_id]*4 + * (streamout_write_index + thread_id)*stride[buffer_id] + * attrib_offset */ LLVMValueRef so_write_index = LLVMGetParam(ctx->main_fn, ctx->param_streamout_write_index); @@ -2238,36 +2254,32 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), ""); so_write_offset[i] = LLVMBuildMul(builder, so_write_index, LLVMConstInt(ctx->i32, so->stride[i]*4, 0), ""); so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); } /* Write streamout data. */ for (i = 0; i < so->num_outputs; i++) { unsigned reg = so->output[i].register_index; - unsigned stream = so->output[i].stream; - struct lp_build_if_state if_ctx_stream; if (reg >= noutput) continue; - LLVMValueRef can_emit_stream = - LLVMBuildICmp(builder, LLVMIntEQ, - stream_id, - lp_build_const_int32(gallivm, stream), ""); + if (stream != so->output[i].stream) + continue; - lp_build_if(&if_ctx_stream, gallivm, can_emit_stream); emit_streamout_output(ctx, so_buffers, so_write_offset, &so->output[i], &outputs[reg]); - lp_build_endif(&if_ctx_stream); } + + lp_build_endif(&if_ctx_stream); } lp_build_endif(&if_ctx); } /* Generate export instructions for hardware VS shader stage */ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, struct si_shader_output_values *outputs, unsigned noutput) { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev