From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 91 ++++++++++++++++++--------------
 1 file changed, 52 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4e61d73..1cda59c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2118,30 +2118,79 @@ static void si_dump_streamout(struct 
pipe_stream_output_info *so)
                        i, so->output[i].output_buffer,
                        so->output[i].dst_offset, so->output[i].dst_offset + 
so->output[i].num_components - 1,
                        so->output[i].register_index,
                        mask & 1 ? "x" : "",
                        mask & 2 ? "y" : "",
                        mask & 4 ? "z" : "",
                        mask & 8 ? "w" : "");
        }
 }
 
+static void emit_streamout_output(struct si_shader_context *ctx,
+                                 LLVMValueRef const *so_buffers,
+                                 LLVMValueRef const *so_write_offsets,
+                                 struct pipe_stream_output *stream_out,
+                                 struct si_shader_output_values *shader_out)
+{
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
+       unsigned buf_idx = stream_out->output_buffer;
+       unsigned start = stream_out->start_component;
+       unsigned num_comps = stream_out->num_components;
+       LLVMValueRef out[4];
+
+       assert(num_comps && num_comps <= 4);
+       if (!num_comps || num_comps > 4)
+               return;
+
+       /* Load the output as int. */
+       for (int j = 0; j < num_comps; j++) {
+               out[j] = LLVMBuildBitCast(builder,
+                                         shader_out->values[start + j],
+                               ctx->i32, "");
+       }
+
+       /* Pack the output. */
+       LLVMValueRef vdata = NULL;
+
+       switch (num_comps) {
+       case 1: /* as i32 */
+               vdata = out[0];
+               break;
+       case 2: /* as v2i32 */
+       case 3: /* as v4i32 (aligned to 4) */
+       case 4: /* as v4i32 */
+               vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
+               for (int j = 0; j < num_comps; j++) {
+                       vdata = LLVMBuildInsertElement(builder, vdata, out[j],
+                                                      LLVMConstInt(ctx->i32, 
j, 0), "");
+               }
+               break;
+       }
+
+       build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
+                                  vdata, num_comps,
+                                  so_write_offsets[buf_idx],
+                                  LLVMConstInt(ctx->i32, 0, 0),
+                                  stream_out->dst_offset * 4);
+}
+
 /* On SI, the vertex shader is responsible for writing streamout data
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
                                   struct si_shader_output_values *outputs,
                                   unsigned noutput)
 {
        struct pipe_stream_output_info *so = &ctx->shader->selector->so;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
-       int i, j;
+       int i;
        struct lp_build_if_state if_ctx;
        LLVMValueRef so_buffers[4];
        LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
                                            SI_PARAM_RW_BUFFERS);
 
        /* Load the descriptors. */
        for (i = 0; i < 4; ++i) {
                if (ctx->shader->selector->so.stride[i]) {
                        LLVMValueRef offset = lp_build_const_int32(gallivm,
                                                                   
SI_VS_STREAMOUT_BUF0 + i);
@@ -2191,71 +2240,35 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
                                                              
ctx->param_streamout_offset[i]);
                        so_offset = LLVMBuildMul(builder, so_offset, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
                        so_write_offset[i] = LLVMBuildMul(builder, 
so_write_index,
                                                          
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
                        so_write_offset[i] = LLVMBuildAdd(builder, 
so_write_offset[i], so_offset, "");
                }
 
                /* Write streamout data. */
                for (i = 0; i < so->num_outputs; i++) {
-                       unsigned buf_idx = so->output[i].output_buffer;
                        unsigned reg = so->output[i].register_index;
-                       unsigned start = so->output[i].start_component;
-                       unsigned num_comps = so->output[i].num_components;
                        unsigned stream = so->output[i].stream;
-                       LLVMValueRef out[4];
                        struct lp_build_if_state if_ctx_stream;
 
-                       assert(num_comps && num_comps <= 4);
-                       if (!num_comps || num_comps > 4)
-                               continue;
-
                        if (reg >= noutput)
                                continue;
 
-                       /* Load the output as int. */
-                       for (j = 0; j < num_comps; j++) {
-                               out[j] = LLVMBuildBitCast(builder,
-                                                         
outputs[reg].values[start+j],
-                                               ctx->i32, "");
-                       }
-
-                       /* Pack the output. */
-                       LLVMValueRef vdata = NULL;
-
-                       switch (num_comps) {
-                       case 1: /* as i32 */
-                               vdata = out[0];
-                               break;
-                       case 2: /* as v2i32 */
-                       case 3: /* as v4i32 (aligned to 4) */
-                       case 4: /* as v4i32 */
-                               vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
-                               for (j = 0; j < num_comps; j++) {
-                                       vdata = LLVMBuildInsertElement(builder, 
vdata, out[j],
-                                                                      
LLVMConstInt(ctx->i32, j, 0), "");
-                               }
-                               break;
-                       }
-
                        LLVMValueRef can_emit_stream =
                                LLVMBuildICmp(builder, LLVMIntEQ,
                                              stream_id,
                                              lp_build_const_int32(gallivm, 
stream), "");
 
                        lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
-                       build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
-                                                  vdata, num_comps,
-                                                  so_write_offset[buf_idx],
-                                                  LLVMConstInt(ctx->i32, 0, 0),
-                                                  so->output[i].dst_offset*4);
+                       emit_streamout_output(ctx, so_buffers, so_write_offset,
+                                             &so->output[i], &outputs[reg]);
                        lp_build_endif(&if_ctx_stream);
                }
        }
        lp_build_endif(&if_ctx);
 }
 
 
 /* Generate export instructions for hardware VS shader stage */
 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
                              struct si_shader_output_values *outputs,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to