Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek
On Mon, Nov 20, 2017 at 2:57 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > If the TCS doesn't read back the outputs, no need to store them > to LDS in the first place. (except for tess factors). > > This seems to give about 50fps (3290->3330) with tessellation demo. > > I haven't tested if it impacts DoW3 at all. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/amd/common/ac_nir_to_llvm.c | 17 ++++++++++++++++- > 1 file changed, 16 insertions(+), 1 deletion(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 1ecdeca..b58bae9 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -149,6 +149,9 @@ struct nir_to_llvm_context { > unsigned tes_primitive_mode; > uint64_t tess_outputs_written; > uint64_t tess_patch_outputs_written; > + > + uint32_t tcs_patch_outputs_read; > + uint64_t tcs_outputs_read; > }; > > static inline struct nir_to_llvm_context * > @@ -2789,7 +2792,15 @@ store_tcs_output(struct nir_to_llvm_context *ctx, > const unsigned comp = instr->variables[0]->var->data.location_frac; > const bool per_vertex = > nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); > const bool is_compact = instr->variables[0]->var->data.compact; > + bool store_lds = true; > > + if (instr->variables[0]->var->data.patch) { > + if (!(ctx->tcs_patch_outputs_read & (1U << > instr->variables[0]->var->data.location))) > + store_lds = false; > + } else { > + if (!(ctx->tcs_outputs_read & (1ULL << > instr->variables[0]->var->data.location))) > + store_lds = false; > + } > get_deref_offset(ctx->nir, instr->variables[0], > false, NULL, per_vertex ? &vertex_index : NULL, > &const_index, &indir_index); > @@ -2826,7 +2837,8 @@ store_tcs_output(struct nir_to_llvm_context *ctx, > continue; > LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - > comp); > > - ac_lds_store(&ctx->ac, dw_addr, value); > + if (store_lds || is_tess_factor) > + ac_lds_store(&ctx->ac, dw_addr, value); > > if (!is_tess_factor && writemask != 0xF) > ac_build_buffer_store_dword(&ctx->ac, > ctx->hs_ring_tess_offchip, value, 1, > @@ -6550,6 +6562,9 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, > ctx.ac.i32, "gs_next_vertex"); > > ctx.gs_max_out_vertices = > shaders[i]->info.gs.vertices_out; > + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { > + ctx.tcs_outputs_read = shaders[i]->info.outputs_read; > + ctx.tcs_patch_outputs_read = > shaders[i]->info.patch_outputs_read; > } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { > ctx.tes_primitive_mode = > shaders[i]->info.tess.primitive_mode; > } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { > -- > 2.9.5 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev