From: Nicolai Hähnle <nicolai.haeh...@amd.com> The only effect this has is that written cache lines are immediately freed in L1$. We're not going to read the data again, so it's better to leave room for other things in the cache. --- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index f4140bb0e2d..c06d0c6edfa 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -714,20 +714,26 @@ static void store_emit_buffer( offset = base_offset; if (start != 0) { offset = LLVMBuildAdd( builder, offset, LLVMConstInt(ctx->i32, start * 4, 0), ""); } emit_data->args[0] = data; emit_data->args[3] = offset; + if (writeonly_memory) { + /* Set GLC for write-only memory, so that we don't + * leave cache lines in L1$. */ + emit_data->args[3] = ctx->ac.i1true; + } + lp_build_intrinsic( builder, intrinsic_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, ac_get_store_intr_attribs(writeonly_memory)); } } static void store_emit_memory( struct si_shader_context *ctx, struct lp_build_emit_data *emit_data) @@ -793,24 +799,28 @@ static void store_emit( args.opcode = ac_image_store; args.data[0] = emit_data->args[0]; args.resource = emit_data->args[1]; memcpy(args.coords, &emit_data->args[2], sizeof(args.coords)); args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); args.attributes = ac_get_store_intr_attribs(writeonly_memory); args.dmask = 0xf; /* Workaround for 8bit/16bit TC L1 write corruption bug on SI. * All store opcodes not aligned to a dword are affected. + * + * Also set GLC for writeonly memory, so that we don't leave + * cache lines in L1. */ bool force_glc = ctx->screen->info.chip_class == SI; if (force_glc || - inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)) + inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) || + writeonly_memory) args.cache_policy = ac_glc; emit_data->output[emit_data->chan] = ac_build_image_opcode(&ctx->ac, &args); } } static void atomic_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) -- 2.14.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev