From: Marek Olšák <marek.ol...@amd.com> --- .../drivers/radeonsi/si_shader_tgsi_mem.c | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 427fead09d0..f5729acb8df 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -647,20 +647,27 @@ static void store_emit_buffer( struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, bool writeonly_memory) { const struct tgsi_full_instruction *inst = emit_data->inst; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef base_data = emit_data->args[0]; LLVMValueRef base_offset = emit_data->args[3]; unsigned writemask = inst->Dst[0].Register.WriteMask; + /* If this is write-only, don't keep data in L1 to prevent + * evicting L1 cache lines that may be needed by other + * instructions. + */ + if (writeonly_memory) + emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */ + while (writemask) { int start, count; const char *intrinsic_name; LLVMValueRef data; LLVMValueRef offset; LLVMValueRef tmp; u_bit_scan_consecutive_range(&writemask, &start, &count); /* Due to an LLVM limitation, split 3-element writes @@ -762,40 +769,50 @@ static void store_emit( info->shader_buffers_atomic, info->images_load | info->images_atomic); if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { store_emit_buffer(ctx, emit_data, writeonly_memory); return; } if (target == TGSI_TEXTURE_BUFFER) { + /* If this is write-only, don't keep data in L1 to prevent + * evicting L1 cache lines that may be needed by other + * instructions. + */ + if (writeonly_memory) + emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */ + emit_data->output[emit_data->chan] = ac_build_intrinsic( &ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", emit_data->dst_type, emit_data->args, emit_data->arg_count, ac_get_store_intr_attribs(writeonly_memory)); } else { struct ac_image_args args = {}; args.opcode = ac_image_store; args.data[0] = emit_data->args[0]; args.resource = emit_data->args[1]; memcpy(args.coords, &emit_data->args[2], sizeof(args.coords)); args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); args.attributes = ac_get_store_intr_attribs(writeonly_memory); args.dmask = 0xf; /* Workaround for 8bit/16bit TC L1 write corruption bug on SI. * All store opcodes not aligned to a dword are affected. */ - bool force_glc = ctx->screen->info.chip_class == SI; - if (force_glc || + if (ctx->screen->info.chip_class == SI || + /* If this is write-only, don't keep data in L1 to prevent + * evicting L1 cache lines that may be needed by other + * instructions. */ + writeonly_memory || inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)) args.cache_policy = ac_glc; emit_data->output[emit_data->chan] = ac_build_image_opcode(&ctx->ac, &args); } } static void atomic_fetch_args( struct lp_build_tgsi_context * bld_base, -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev