From: Marek Olšák <marek.ol...@amd.com>

---
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 6decedc4cce..727def56f65 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -694,79 +694,88 @@ static void store_emit(
 
        bool writeonly_memory = is_oneway_access_only(inst, info,
                                                      info->shader_buffers_load 
|
                                                      
info->shader_buffers_atomic,
                                                      info->images_load |
                                                      info->images_atomic,
                                                      
info->uses_bindless_buffer_load |
                                                      
info->uses_bindless_buffer_atomic,
                                                      
info->uses_bindless_image_load |
                                                      
info->uses_bindless_image_atomic);
-       LLVMValueRef chans[4], value;
+       LLVMValueRef chans[4];
        LLVMValueRef vindex = ctx->i32_0;
        LLVMValueRef voffset = ctx->i32_0;
        struct ac_image_args args = {};
 
        for (unsigned chan = 0; chan < 4; ++chan)
                chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
 
-       value = ac_build_gather_values(&ctx->ac, chans, 4);
-
        if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
                args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, 
false);
                voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 0, 0));
        } else {
                image_fetch_rsrc(bld_base, &resource_reg, true, target, 
&args.resource);
                image_fetch_coords(bld_base, inst, 0, args.resource, 
args.coords);
                vindex = args.coords[0]; /* for buffers only */
        }
 
        if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
                ac_build_waitcnt(&ctx->ac, VM_CNT);
 
        bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
        args.cache_policy = get_cache_policy(ctx, inst,
                                             false, /* atomic */
                                             is_image, /* may_store_unaligned */
                                             writeonly_memory);
 
        if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
                store_emit_buffer(ctx, args.resource, 
inst->Dst[0].Register.WriteMask,
-                                 value, voffset, args.cache_policy, 
writeonly_memory);
+                                 ac_build_gather_values(&ctx->ac, chans, 4),
+                                 voffset, args.cache_policy, writeonly_memory);
                return;
        }
 
        if (target == TGSI_TEXTURE_BUFFER) {
+               unsigned num_channels = 
util_last_bit(inst->Dst[0].Register.WriteMask);
+               num_channels = util_next_power_of_two(num_channels);
+
                LLVMValueRef buf_args[6] = {
-                       value,
+                       ac_build_gather_values(&ctx->ac, chans, 4),
                        args.resource,
                        vindex,
                        ctx->i32_0, /* voffset */
                };
 
                if (HAVE_LLVM >= 0x0800) {
                        buf_args[4] = ctx->i32_0; /* soffset */
                        buf_args[5] = LLVMConstInt(ctx->i1, args.cache_policy, 
0);
                } else {
                        buf_args[4] = LLVMConstInt(ctx->i1, 
!!(args.cache_policy & ac_glc), 0);
                        buf_args[5] = LLVMConstInt(ctx->i1, 
!!(args.cache_policy & ac_slc), 0);
                }
 
+               const char *types[] = { "f32", "v2f32", "v4f32" };
+               char name[128];
+
+               snprintf(name, sizeof(name), "%s.%s",
+                        HAVE_LLVM >= 0x0800 ? 
"llvm.amdgcn.struct.buffer.store.format" :
+                                              
"llvm.amdgcn.buffer.store.format",
+                        types[CLAMP(num_channels, 1, 3) - 1]);
+
                emit_data->output[emit_data->chan] = ac_build_intrinsic(
                        &ctx->ac,
-                       HAVE_LLVM >= 0x0800 ? 
"llvm.amdgcn.struct.buffer.store.format.v4f32" :
-                                             
"llvm.amdgcn.buffer.store.format.v4f32",
+                       name,
                        ctx->voidt, buf_args, 6,
                        ac_get_store_intr_attribs(writeonly_memory));
        } else {
                args.opcode = ac_image_store;
-               args.data[0] = value;
+               args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
                args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
                args.attributes = ac_get_store_intr_attribs(writeonly_memory);
                args.dmask = 0xf;
 
                emit_data->output[emit_data->chan] =
                        ac_build_image_opcode(&ctx->ac, &args);
        }
 }
 
 static void atomic_emit_memory(struct si_shader_context *ctx,
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to