From: Marek Olšák <marek.ol...@amd.com> This removes a lot of useless LDS stores.
A few games read TESSINNER/OUTER, but not any other outputs. Most games don't read any outputs. The only app doing LDS output reads is UE4 Lightsroom Interior. --- src/gallium/drivers/radeonsi/si_shader.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a67ac82..65e3faf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -975,59 +975,74 @@ static LLVMValueRef fetch_input_tes( } static void store_output_tcs(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, LLVMValueRef dst[4]) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info; unsigned chan_index; LLVMValueRef dw_addr, stride; LLVMValueRef rw_buffers, buffer, base, buf_addr; LLVMValueRef values[4]; + bool skip_lds_store; /* Only handle per-patch and per-vertex outputs here. * Vectors will be lowered to scalars and this function will be called again. */ if (reg->Register.File != TGSI_FILE_OUTPUT || (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) { si_llvm_emit_store(bld_base, inst, info, dst); return; } if (reg->Register.Dimension) { stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8); dw_addr = get_tcs_out_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr); + skip_lds_store = !sh_info->reads_pervertex_outputs; } else { dw_addr = get_tcs_out_current_patch_data_offset(ctx); dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr); + skip_lds_store = !sh_info->reads_perpatch_outputs; + + if (!reg->Register.Indirect) { + int name = sh_info->output_semantic_name[reg->Register.Index]; + + /* Always write tess factors into LDS for the TCS epilog. */ + if (name == TGSI_SEMANTIC_TESSINNER || + name == TGSI_SEMANTIC_TESSOUTER) + skip_lds_store = false; + } } rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers, lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL); TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { LLVMValueRef value = dst[chan_index]; if (inst->Instruction.Saturate) value = ac_emit_clamp(&ctx->ac, value); - lds_store(bld_base, chan_index, dw_addr, value); + /* Skip LDS stores if there is no LDS read of this output. */ + if (!skip_lds_store) + lds_store(bld_base, chan_index, dw_addr, value); value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); values[chan_index] = value; if (inst->Dst[0].Register.WriteMask != 0xF) { ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 1, buf_addr, base, 4 * chan_index); } } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev