From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d26c36a..fd2ed42 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2137,20 +2137,22 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
        unsigned start = stream_out->start_component;
        unsigned num_comps = stream_out->num_components;
        LLVMValueRef out[4];
 
        assert(num_comps && num_comps <= 4);
        if (!num_comps || num_comps > 4)
                return;
 
        /* Load the output as int. */
        for (int j = 0; j < num_comps; j++) {
+               assert(stream_out->stream == shader_out->vertex_stream[start + 
j]);
+
                out[j] = LLVMBuildBitCast(builder,
                                          shader_out->values[start + j],
                                ctx->i32, "");
        }
 
        /* Pack the output. */
        LLVMValueRef vdata = NULL;
 
        switch (num_comps) {
        case 1: /* as i32 */
@@ -2173,21 +2175,22 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
                                   LLVMConstInt(ctx->i32, 0, 0),
                                   stream_out->dst_offset * 4);
 }
 
 /* On SI, the vertex shader is responsible for writing streamout data
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
                                   struct si_shader_output_values *outputs,
                                   unsigned noutput)
 {
-       struct pipe_stream_output_info *so = &ctx->shader->selector->so;
+       struct si_shader_selector *sel = ctx->shader->selector;
+       struct pipe_stream_output_info *so = &sel->so;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        int i;
        struct lp_build_if_state if_ctx;
 
        /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
        LLVMValueRef so_vtx_count =
                unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
        LLVMValueRef tid = get_thread_id(ctx);
@@ -2196,21 +2199,34 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
        LLVMValueRef can_emit =
                LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
        LLVMValueRef stream_id =
                unpack_param(ctx, ctx->param_streamout_config, 24, 2);
 
        /* Emit the streamout code conditionally. This actually avoids
         * out-of-bounds buffer access. The hw tells us via the SGPR
         * (so_vtx_count) which threads are allowed to emit streamout data. */
        lp_build_if(&if_ctx, gallivm, can_emit);
-       {
+
+       for (int stream = 0; stream < 4; ++stream) {
+               struct lp_build_if_state if_ctx_stream;
+
+               if (!sel->info.num_stream_output_components[stream])
+                       continue;
+
+               LLVMValueRef is_stream =
+                       LLVMBuildICmp(builder, LLVMIntEQ,
+                                     stream_id,
+                                     lp_build_const_int32(gallivm, stream), 
"");
+
+               lp_build_if(&if_ctx_stream, gallivm, is_stream);
+
                /* The buffer offset is computed as follows:
                 *   ByteOffset = streamout_offset[buffer_id]*4 +
                 *                (streamout_write_index + 
thread_id)*stride[buffer_id] +
                 *                attrib_offset
                  */
 
                LLVMValueRef so_write_index =
                        LLVMGetParam(ctx->main_fn,
                                     ctx->param_streamout_write_index);
 
@@ -2238,36 +2254,32 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
                        so_offset = LLVMBuildMul(builder, so_offset, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
                        so_write_offset[i] = LLVMBuildMul(builder, 
so_write_index,
                                                          
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
                        so_write_offset[i] = LLVMBuildAdd(builder, 
so_write_offset[i], so_offset, "");
                }
 
                /* Write streamout data. */
                for (i = 0; i < so->num_outputs; i++) {
                        unsigned reg = so->output[i].register_index;
-                       unsigned stream = so->output[i].stream;
-                       struct lp_build_if_state if_ctx_stream;
 
                        if (reg >= noutput)
                                continue;
 
-                       LLVMValueRef can_emit_stream =
-                               LLVMBuildICmp(builder, LLVMIntEQ,
-                                             stream_id,
-                                             lp_build_const_int32(gallivm, 
stream), "");
+                       if (stream != so->output[i].stream)
+                               continue;
 
-                       lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
                        emit_streamout_output(ctx, so_buffers, so_write_offset,
                                              &so->output[i], &outputs[reg]);
-                       lp_build_endif(&if_ctx_stream);
                }
+
+               lp_build_endif(&if_ctx_stream);
        }
        lp_build_endif(&if_ctx);
 }
 
 
 /* Generate export instructions for hardware VS shader stage */
 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
                              struct si_shader_output_values *outputs,
                              unsigned noutput)
 {
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to