From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 91 ++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 39 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4e61d73..1cda59c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2118,30 +2118,79 @@ static void si_dump_streamout(struct pipe_stream_output_info *so) i, so->output[i].output_buffer, so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, so->output[i].register_index, mask & 1 ? "x" : "", mask & 2 ? "y" : "", mask & 4 ? "z" : "", mask & 8 ? "w" : ""); } } +static void emit_streamout_output(struct si_shader_context *ctx, + LLVMValueRef const *so_buffers, + LLVMValueRef const *so_write_offsets, + struct pipe_stream_output *stream_out, + struct si_shader_output_values *shader_out) +{ + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMBuilderRef builder = gallivm->builder; + unsigned buf_idx = stream_out->output_buffer; + unsigned start = stream_out->start_component; + unsigned num_comps = stream_out->num_components; + LLVMValueRef out[4]; + + assert(num_comps && num_comps <= 4); + if (!num_comps || num_comps > 4) + return; + + /* Load the output as int. */ + for (int j = 0; j < num_comps; j++) { + out[j] = LLVMBuildBitCast(builder, + shader_out->values[start + j], + ctx->i32, ""); + } + + /* Pack the output. */ + LLVMValueRef vdata = NULL; + + switch (num_comps) { + case 1: /* as i32 */ + vdata = out[0]; + break; + case 2: /* as v2i32 */ + case 3: /* as v4i32 (aligned to 4) */ + case 4: /* as v4i32 */ + vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps))); + for (int j = 0; j < num_comps; j++) { + vdata = LLVMBuildInsertElement(builder, vdata, out[j], + LLVMConstInt(ctx->i32, j, 0), ""); + } + break; + } + + build_tbuffer_store_dwords(ctx, so_buffers[buf_idx], + vdata, num_comps, + so_write_offsets[buf_idx], + LLVMConstInt(ctx->i32, 0, 0), + stream_out->dst_offset * 4); +} + /* On SI, the vertex shader is responsible for writing streamout data * to buffers. */ static void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput) { struct pipe_stream_output_info *so = &ctx->shader->selector->so; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; - int i, j; + int i; struct lp_build_if_state if_ctx; LLVMValueRef so_buffers[4]; LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); /* Load the descriptors. */ for (i = 0; i < 4; ++i) { if (ctx->shader->selector->so.stride[i]) { LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_STREAMOUT_BUF0 + i); @@ -2191,71 +2240,35 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, ctx->param_streamout_offset[i]); so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), ""); so_write_offset[i] = LLVMBuildMul(builder, so_write_index, LLVMConstInt(ctx->i32, so->stride[i]*4, 0), ""); so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); } /* Write streamout data. */ for (i = 0; i < so->num_outputs; i++) { - unsigned buf_idx = so->output[i].output_buffer; unsigned reg = so->output[i].register_index; - unsigned start = so->output[i].start_component; - unsigned num_comps = so->output[i].num_components; unsigned stream = so->output[i].stream; - LLVMValueRef out[4]; struct lp_build_if_state if_ctx_stream; - assert(num_comps && num_comps <= 4); - if (!num_comps || num_comps > 4) - continue; - if (reg >= noutput) continue; - /* Load the output as int. */ - for (j = 0; j < num_comps; j++) { - out[j] = LLVMBuildBitCast(builder, - outputs[reg].values[start+j], - ctx->i32, ""); - } - - /* Pack the output. */ - LLVMValueRef vdata = NULL; - - switch (num_comps) { - case 1: /* as i32 */ - vdata = out[0]; - break; - case 2: /* as v2i32 */ - case 3: /* as v4i32 (aligned to 4) */ - case 4: /* as v4i32 */ - vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps))); - for (j = 0; j < num_comps; j++) { - vdata = LLVMBuildInsertElement(builder, vdata, out[j], - LLVMConstInt(ctx->i32, j, 0), ""); - } - break; - } - LLVMValueRef can_emit_stream = LLVMBuildICmp(builder, LLVMIntEQ, stream_id, lp_build_const_int32(gallivm, stream), ""); lp_build_if(&if_ctx_stream, gallivm, can_emit_stream); - build_tbuffer_store_dwords(ctx, so_buffers[buf_idx], - vdata, num_comps, - so_write_offset[buf_idx], - LLVMConstInt(ctx->i32, 0, 0), - so->output[i].dst_offset*4); + emit_streamout_output(ctx, so_buffers, so_write_offset, + &so->output[i], &outputs[reg]); lp_build_endif(&if_ctx_stream); } } lp_build_endif(&if_ctx); } /* Generate export instructions for hardware VS shader stage */ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, struct si_shader_output_values *outputs, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev