From: Nicolai Hähnle <nicolai.haeh...@amd.com> The iteration is not needed for normal vertex shaders. --- src/gallium/drivers/radeonsi/si_shader.c | 62 +++++++++++++++++++------------- 1 file changed, 37 insertions(+), 25 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fd2ed42..cd2fd09 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2169,64 +2169,50 @@ static void emit_streamout_output(struct si_shader_context *ctx, break; } build_tbuffer_store_dwords(ctx, so_buffers[buf_idx], vdata, num_comps, so_write_offsets[buf_idx], LLVMConstInt(ctx->i32, 0, 0), stream_out->dst_offset * 4); } -/* On SI, the vertex shader is responsible for writing streamout data - * to buffers. */ +/** + * Write streamout data to buffers for vertex stream @p stream (different + * vertex streams can occur for GS copy shaders). + */ static void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, - unsigned noutput) + unsigned noutput, unsigned stream) { struct si_shader_selector *sel = ctx->shader->selector; struct pipe_stream_output_info *so = &sel->so; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; int i; struct lp_build_if_state if_ctx; /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ LLVMValueRef so_vtx_count = unpack_param(ctx, ctx->param_streamout_config, 16, 7); LLVMValueRef tid = get_thread_id(ctx); /* can_emit = tid < so_vtx_count; */ LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); - LLVMValueRef stream_id = - unpack_param(ctx, ctx->param_streamout_config, 24, 2); - /* Emit the streamout code conditionally. This actually avoids * out-of-bounds buffer access. The hw tells us via the SGPR * (so_vtx_count) which threads are allowed to emit streamout data. */ lp_build_if(&if_ctx, gallivm, can_emit); - - for (int stream = 0; stream < 4; ++stream) { - struct lp_build_if_state if_ctx_stream; - - if (!sel->info.num_stream_output_components[stream]) - continue; - - LLVMValueRef is_stream = - LLVMBuildICmp(builder, LLVMIntEQ, - stream_id, - lp_build_const_int32(gallivm, stream), ""); - - lp_build_if(&if_ctx_stream, gallivm, is_stream); - + { /* The buffer offset is computed as follows: * ByteOffset = streamout_offset[buffer_id]*4 + * (streamout_write_index + thread_id)*stride[buffer_id] + * attrib_offset */ LLVMValueRef so_write_index = LLVMGetParam(ctx->main_fn, ctx->param_streamout_write_index); @@ -2264,22 +2250,20 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, if (reg >= noutput) continue; if (stream != so->output[i].stream) continue; emit_streamout_output(ctx, so_buffers, so_write_offset, &so->output[i], &outputs[reg]); } - - lp_build_endif(&if_ctx_stream); } lp_build_endif(&if_ctx); } /* Generate export instructions for hardware VS shader stage */ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, struct si_shader_output_values *outputs, unsigned noutput) { @@ -2828,21 +2812,21 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) /* Return the primitive ID from the LLVM function. */ ctx->return_value = LLVMBuildInsertValue(gallivm->builder, ctx->return_value, bitcast(bld_base, TGSI_TYPE_FLOAT, get_primitive_id(bld_base, 0)), VS_EPILOG_PRIMID_LOC, ""); if (ctx->shader->selector->so.num_outputs) - si_llvm_emit_streamout(ctx, outputs, i); + si_llvm_emit_streamout(ctx, outputs, i, 0); si_llvm_export_vs(bld_base, outputs, i); FREE(outputs); } struct si_ps_exports { unsigned num; LLVMValueRef args[10][9]; }; unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, @@ -6203,20 +6187,21 @@ static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret) /* Generate code for the hardware VS shader stage to go with a geometry shader */ struct si_shader * si_generate_gs_copy_shader(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader_selector *gs_selector, struct pipe_debug_callback *debug) { struct si_shader_context ctx; struct si_shader *shader; struct gallivm_state *gallivm = &ctx.gallivm; + LLVMBuilderRef builder; struct lp_build_tgsi_context *bld_base = &ctx.soa.bld_base; struct lp_build_context *uint = &bld_base->uint_bld; struct si_shader_output_values *outputs; struct tgsi_shader_info *gsinfo = &gs_selector->info; LLVMValueRef args[9]; int i, r; outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0])); if (!outputs) @@ -6228,36 +6213,46 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, return NULL; } shader->selector = gs_selector; shader->is_gs_copy_shader = true; si_init_shader_ctx(&ctx, sscreen, shader, tm); ctx.type = PIPE_SHADER_VERTEX; + builder = gallivm->builder; + create_meta_data(&ctx); create_function(&ctx); preload_ring_buffers(&ctx); args[0] = ctx.gsvs_ring[0]; args[1] = lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn, ctx.param_vertex_id), 4); args[3] = uint->zero; args[4] = uint->one; /* OFFEN */ args[5] = uint->zero; /* IDXEN */ args[6] = uint->one; /* GLC */ args[7] = uint->one; /* SLC */ args[8] = uint->zero; /* TFE */ + /* Fetch the vertex stream ID.*/ + LLVMValueRef stream_id; + + if (gs_selector->so.num_outputs) + stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2); + else + stream_id = uint->zero; + /* Fetch vertex data from GSVS ring */ for (i = 0; i < gsinfo->num_outputs; ++i) { unsigned chan; outputs[i].semantic_name = gsinfo->output_semantic_name[i]; outputs[i].semantic_index = gsinfo->output_semantic_index[i]; for (chan = 0; chan < 4; chan++) { outputs[i].vertex_stream[chan] = (gsinfo->output_streams[i] >> (2 * chan)) & 3; @@ -6269,22 +6264,39 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, outputs[i].values[chan] = LLVMBuildBitCast(gallivm->builder, lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx.i32, args, 9, LP_FUNC_ATTR_READONLY), ctx.f32, ""); } } - if (gs_selector->so.num_outputs) - si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs); + if (gs_selector->so.num_outputs) { + for (int stream = 0; stream < 4; stream++) { + struct lp_build_if_state if_ctx_stream; + + if (!gsinfo->num_stream_output_components[stream]) + continue; + + LLVMValueRef is_stream = + LLVMBuildICmp(builder, LLVMIntEQ, + stream_id, + lp_build_const_int32(gallivm, stream), ""); + + lp_build_if(&if_ctx_stream, gallivm, is_stream); + si_llvm_emit_streamout(&ctx, outputs, + gsinfo->num_outputs, + stream); + lp_build_endif(&if_ctx_stream); + } + } si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); LLVMBuildRetVoid(gallivm->builder); /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY)) LLVMDumpModule(bld_base->base.gallivm->module); si_llvm_finalize_module(&ctx, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev