From: Marek Olšák <marek.ol...@amd.com> --- .../drivers/radeonsi/si_shader_tgsi_mem.c | 103 +++++++----------- 1 file changed, 42 insertions(+), 61 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index e7ba17048a7..f0220881995 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -567,78 +567,20 @@ static void load_emit( if (inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)) args.cache_policy = ac_glc; args.attributes = ac_get_load_intr_attribs(can_speculate); args.dmask = 0xf; emit_data->output[emit_data->chan] = ac_build_image_opcode(&ctx->ac, &args); } } -static void store_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - struct tgsi_full_src_register memory; - LLVMValueRef chans[4]; - LLVMValueRef data; - LLVMValueRef rsrc; - unsigned chan; - - emit_data->dst_type = ctx->voidt; - - for (chan = 0; chan < 4; ++chan) { - chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); - } - data = ac_build_gather_values(&ctx->ac, chans, 4); - - emit_data->args[emit_data->arg_count++] = data; - - memory = tgsi_full_src_register_from_dst(&inst->Dst[0]); - - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { - LLVMValueRef offset; - LLVMValueRef tmp; - - rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false); - - tmp = lp_build_emit_fetch(bld_base, inst, 0, 0); - offset = ac_to_integer(&ctx->ac, tmp); - - buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, - offset, false, false); - } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) { - unsigned target = inst->Memory.Texture; - - /* 8bit/16bit TC L1 write corruption bug on SI. - * All store opcodes not aligned to a dword are affected. - * - * The only way to get unaligned stores in radeonsi is through - * shader images. - */ - bool force_glc = ctx->screen->info.chip_class == SI; - - image_fetch_rsrc(bld_base, &memory, true, target, &rsrc); - image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]); - - if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2], - ctx->i32_0, false, force_glc); - } else { - emit_data->args[1] = rsrc; - } - } -} - static void store_emit_buffer( struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, bool writeonly_memory) { const struct tgsi_full_instruction *inst = emit_data->inst; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef base_data = emit_data->args[0]; LLVMValueRef base_offset = emit_data->args[3]; unsigned writemask = inst->Dst[0].Register.WriteMask; @@ -698,21 +640,21 @@ static void store_emit_buffer( if (start != 0) { offset = LLVMBuildAdd( builder, offset, LLVMConstInt(ctx->i32, start * 4, 0), ""); } emit_data->args[0] = data; emit_data->args[3] = offset; ac_build_intrinsic( - &ctx->ac, intrinsic_name, emit_data->dst_type, + &ctx->ac, intrinsic_name, ctx->voidt, emit_data->args, emit_data->arg_count, ac_get_store_intr_attribs(writeonly_memory)); } } static void store_emit_memory( struct si_shader_context *ctx, struct lp_build_emit_data *emit_data) { const struct tgsi_full_instruction *inst = emit_data->inst; @@ -735,28 +677,68 @@ static void store_emit_memory( } static void store_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction * inst = emit_data->inst; const struct tgsi_shader_info *info = &ctx->shader->selector->info; + struct tgsi_full_src_register resource_reg = + tgsi_full_src_register_from_dst(&inst->Dst[0]); unsigned target = inst->Memory.Texture; bool writeonly_memory = false; + LLVMValueRef chans[4], rsrc; if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { store_emit_memory(ctx, emit_data); return; } + for (unsigned chan = 0; chan < 4; ++chan) + chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); + + emit_data->args[emit_data->arg_count++] = + ac_build_gather_values(&ctx->ac, chans, 4); + + if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { + LLVMValueRef offset, tmp; + + rsrc = shader_buffer_fetch_rsrc(ctx, &resource_reg, false); + + tmp = lp_build_emit_fetch(bld_base, inst, 0, 0); + offset = ac_to_integer(&ctx->ac, tmp); + + buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, + offset, false, false); + } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE || + tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) { + /* 8bit/16bit TC L1 write corruption bug on SI. + * All store opcodes not aligned to a dword are affected. + * + * The only way to get unaligned stores in radeonsi is through + * shader images. + */ + bool force_glc = ctx->screen->info.chip_class == SI; + + image_fetch_rsrc(bld_base, &resource_reg, true, target, &rsrc); + image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]); + + if (target == TGSI_TEXTURE_BUFFER) { + buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2], + ctx->i32_0, false, force_glc); + } else { + emit_data->args[1] = rsrc; + } + } + if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) ac_build_waitcnt(&ctx->ac, VM_CNT); writeonly_memory = is_oneway_access_only(inst, info, info->shader_buffers_load | info->shader_buffers_atomic, info->images_load | info->images_atomic); if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { @@ -767,21 +749,21 @@ static void store_emit( if (target == TGSI_TEXTURE_BUFFER) { /* If this is write-only, don't keep data in L1 to prevent * evicting L1 cache lines that may be needed by other * instructions. */ if (writeonly_memory) emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */ emit_data->output[emit_data->chan] = ac_build_intrinsic( &ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", - emit_data->dst_type, emit_data->args, + ctx->voidt, emit_data->args, emit_data->arg_count, ac_get_store_intr_attribs(writeonly_memory)); } else { struct ac_image_args args = {}; args.opcode = ac_image_store; args.data[0] = emit_data->args[0]; args.resource = emit_data->args[1]; memcpy(args.coords, &emit_data->args[2], sizeof(args.coords)); args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); args.attributes = ac_get_store_intr_attribs(writeonly_memory); @@ -1829,21 +1811,20 @@ void si_shader_context_init_mem(struct si_shader_context *ctx) bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic; bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic; bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit; bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic; bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic; bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch; bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; - bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args; bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit; bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; tmpl.fetch_args = atomic_fetch_args; tmpl.emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl; bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add"; bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl; bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap"; bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev