On Mon, May 28, 2018 at 3:52 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > The only effect this has is that written cache lines are immediately > freed in L1$. We're not going to read the data again, so it's better > to leave room for other things in the cache. > --- > src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 +++++++++++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > index f4140bb0e2d..c06d0c6edfa 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > @@ -714,20 +714,26 @@ static void store_emit_buffer( > offset = base_offset; > if (start != 0) { > offset = LLVMBuildAdd( > builder, offset, > LLVMConstInt(ctx->i32, start * 4, 0), ""); > } > > emit_data->args[0] = data; > emit_data->args[3] = offset; > > + if (writeonly_memory) { > + /* Set GLC for write-only memory, so that we don't > + * leave cache lines in L1$. */ > + emit_data->args[3] = ctx->ac.i1true; > + } > + > lp_build_intrinsic( > builder, intrinsic_name, emit_data->dst_type, > emit_data->args, emit_data->arg_count, > ac_get_store_intr_attribs(writeonly_memory)); > } > } > > static void store_emit_memory( > struct si_shader_context *ctx, > struct lp_build_emit_data *emit_data) > @@ -793,24 +799,28 @@ static void store_emit( > args.opcode = ac_image_store; > args.data[0] = emit_data->args[0]; > args.resource = emit_data->args[1]; > memcpy(args.coords, &emit_data->args[2], > sizeof(args.coords)); > args.dim = ac_image_dim_from_tgsi_target(ctx->screen, > inst->Memory.Texture); > args.attributes = ac_get_store_intr_attribs( > writeonly_memory); > args.dmask = 0xf; > There is one more store above this block, but anyway: Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek > > /* Workaround for 8bit/16bit TC L1 write corruption bug on > SI. > * All store opcodes not aligned to a dword are affected. > + * > + * Also set GLC for writeonly memory, so that we don't > leave > + * cache lines in L1. > */ > bool force_glc = ctx->screen->info.chip_class == SI; > if (force_glc || > - inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | > TGSI_MEMORY_VOLATILE)) > + inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | > TGSI_MEMORY_VOLATILE) || > + writeonly_memory) > args.cache_policy = ac_glc; > > emit_data->output[emit_data->chan] = > ac_build_image_opcode(&ctx->ac, &args); > } > } > > static void atomic_fetch_args( > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data) > -- > 2.14.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev