From: Marek Olšák <marek.ol...@amd.com> --- .../drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 6decedc4cce..727def56f65 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -694,79 +694,88 @@ static void store_emit( bool writeonly_memory = is_oneway_access_only(inst, info, info->shader_buffers_load | info->shader_buffers_atomic, info->images_load | info->images_atomic, info->uses_bindless_buffer_load | info->uses_bindless_buffer_atomic, info->uses_bindless_image_load | info->uses_bindless_image_atomic); - LLVMValueRef chans[4], value; + LLVMValueRef chans[4]; LLVMValueRef vindex = ctx->i32_0; LLVMValueRef voffset = ctx->i32_0; struct ac_image_args args = {}; for (unsigned chan = 0; chan < 4; ++chan) chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); - value = ac_build_gather_values(&ctx->ac, chans, 4); - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false); voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0)); } else { image_fetch_rsrc(bld_base, &resource_reg, true, target, &args.resource); image_fetch_coords(bld_base, inst, 0, args.resource, args.coords); vindex = args.coords[0]; /* for buffers only */ } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) ac_build_waitcnt(&ctx->ac, VM_CNT); bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER; args.cache_policy = get_cache_policy(ctx, inst, false, /* atomic */ is_image, /* may_store_unaligned */ writeonly_memory); if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask, - value, voffset, args.cache_policy, writeonly_memory); + ac_build_gather_values(&ctx->ac, chans, 4), + voffset, args.cache_policy, writeonly_memory); return; } if (target == TGSI_TEXTURE_BUFFER) { + unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask); + num_channels = util_next_power_of_two(num_channels); + LLVMValueRef buf_args[6] = { - value, + ac_build_gather_values(&ctx->ac, chans, 4), args.resource, vindex, ctx->i32_0, /* voffset */ }; if (HAVE_LLVM >= 0x0800) { buf_args[4] = ctx->i32_0; /* soffset */ buf_args[5] = LLVMConstInt(ctx->i1, args.cache_policy, 0); } else { buf_args[4] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0); buf_args[5] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0); } + const char *types[] = { "f32", "v2f32", "v4f32" }; + char name[128]; + + snprintf(name, sizeof(name), "%s.%s", + HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.struct.buffer.store.format" : + "llvm.amdgcn.buffer.store.format", + types[CLAMP(num_channels, 1, 3) - 1]); + emit_data->output[emit_data->chan] = ac_build_intrinsic( &ctx->ac, - HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.struct.buffer.store.format.v4f32" : - "llvm.amdgcn.buffer.store.format.v4f32", + name, ctx->voidt, buf_args, 6, ac_get_store_intr_attribs(writeonly_memory)); } else { args.opcode = ac_image_store; - args.data[0] = value; + args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4); args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); args.attributes = ac_get_store_intr_attribs(writeonly_memory); args.dmask = 0xf; emit_data->output[emit_data->chan] = ac_build_image_opcode(&ctx->ac, &args); } } static void atomic_emit_memory(struct si_shader_context *ctx, -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev