From: Marek Olšák <marek.ol...@amd.com>

---
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 163 ++++++++----------
 1 file changed, 71 insertions(+), 92 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 1e21cabe770..e4b29c675a5 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -340,26 +340,35 @@ static void buffer_append_args(
                emit_data->args[emit_data->arg_count++] =
                        force_glc ||
                        inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
                        i1true : i1false; /* glc */
        }
        emit_data->args[emit_data->arg_count++] = i1false; /* slc */
 }
 
 static unsigned get_cache_policy(struct si_shader_context *ctx,
                                 const struct tgsi_full_instruction *inst,
-                                bool atomic, bool force_glc)
+                                bool atomic, bool may_store_unaligned,
+                                bool writeonly_memory)
 {
        unsigned cache_policy = 0;
 
        if (!atomic &&
-           (force_glc ||
+           /* SI has a TC L1 bug causing corruption of 8bit/16bit stores.
+            * All store opcodes not aligned to a dword are affected.
+            * The only way to get unaligned stores in radeonsi is through
+            * shader images. */
+           ((may_store_unaligned && ctx->screen->info.chip_class == SI) ||
+            /* If this is write-only, don't keep data in L1 to prevent
+             * evicting L1 cache lines that may be needed by other
+             * instructions. */
+            writeonly_memory ||
             inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE)))
                cache_policy |= ac_glc;
 
        return cache_policy;
 }
 
 static void load_emit_buffer(struct si_shader_context *ctx,
                             struct lp_build_emit_data *emit_data,
                             bool can_speculate, bool allow_smem)
 {
@@ -581,44 +590,36 @@ static void load_emit(
                if (inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE))
                        args.cache_policy = ac_glc;
                args.attributes = ac_get_load_intr_attribs(can_speculate);
                args.dmask = 0xf;
 
                emit_data->output[emit_data->chan] =
                        ac_build_image_opcode(&ctx->ac, &args);
        }
 }
 
-static void store_emit_buffer(
-               struct si_shader_context *ctx,
-               struct lp_build_emit_data *emit_data,
-               bool writeonly_memory)
+static void store_emit_buffer(struct si_shader_context *ctx,
+                             LLVMValueRef resource,
+                             unsigned writemask,
+                             LLVMValueRef value,
+                             LLVMValueRef voffset,
+                             unsigned cache_policy,
+                             bool writeonly_memory)
 {
-       const struct tgsi_full_instruction *inst = emit_data->inst;
        LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef base_data = emit_data->args[0];
-       LLVMValueRef base_offset = emit_data->args[3];
-       unsigned writemask = inst->Dst[0].Register.WriteMask;
-
-       /* If this is write-only, don't keep data in L1 to prevent
-        * evicting L1 cache lines that may be needed by other
-        * instructions.
-        */
-       if (writeonly_memory)
-               emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
+       LLVMValueRef base_data = value;
+       LLVMValueRef base_offset = voffset;
 
        while (writemask) {
                int start, count;
                const char *intrinsic_name;
-               LLVMValueRef data;
-               LLVMValueRef offset;
-               LLVMValueRef tmp;
+               LLVMValueRef data, voff, tmp;
 
                u_bit_scan_consecutive_range(&writemask, &start, &count);
 
                /* Due to an LLVM limitation, split 3-element writes
                 * into a 2-element and a 1-element write. */
                if (count == 3) {
                        writemask |= 1 << (start + 2);
                        count = 2;
                }
 
@@ -643,34 +644,37 @@ static void store_emit_buffer(
 
                        intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
                } else {
                        assert(count == 1);
                        data = LLVMBuildExtractElement(
                                builder, base_data,
                                LLVMConstInt(ctx->i32, start, 0), "");
                        intrinsic_name = "llvm.amdgcn.buffer.store.f32";
                }
 
-               offset = base_offset;
+               voff = base_offset;
                if (start != 0) {
-                       offset = LLVMBuildAdd(
-                               builder, offset,
+                       voff = LLVMBuildAdd(
+                               builder, voff,
                                LLVMConstInt(ctx->i32, start * 4, 0), "");
                }
 
-               emit_data->args[0] = data;
-               emit_data->args[3] = offset;
-
-               ac_build_intrinsic(
-                       &ctx->ac, intrinsic_name, ctx->voidt,
-                       emit_data->args, emit_data->arg_count,
-                       ac_get_store_intr_attribs(writeonly_memory));
+               LLVMValueRef args[] = {
+                       data,
+                       resource,
+                       ctx->i32_0, /* vindex */
+                       voff,
+                       LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), 0),
+                       LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), 0),
+               };
+               ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, args, 
6,
+                                  ac_get_store_intr_attribs(writeonly_memory));
        }
 }
 
 static void store_emit_memory(
                struct si_shader_context *ctx,
                struct lp_build_emit_data *emit_data)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        LLVMBuilderRef builder = ctx->ac.builder;
        unsigned writemask = inst->Dst[0].Register.WriteMask;
@@ -694,113 +698,88 @@ static void store_emit(
                const struct lp_build_tgsi_action *action,
                struct lp_build_tgsi_context *bld_base,
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        const struct tgsi_full_instruction * inst = emit_data->inst;
        const struct tgsi_shader_info *info = &ctx->shader->selector->info;
        struct tgsi_full_src_register resource_reg =
                tgsi_full_src_register_from_dst(&inst->Dst[0]);
        unsigned target = inst->Memory.Texture;
-       bool writeonly_memory = false;
-       LLVMValueRef chans[4], rsrc;
+       bool writeonly_memory = is_oneway_access_only(inst, info,
+                                                     info->shader_buffers_load 
|
+                                                     
info->shader_buffers_atomic,
+                                                     info->images_load |
+                                                     info->images_atomic);
+       bool is_image = inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
+                       tgsi_is_bindless_image_file(inst->Dst[0].Register.File);
+       LLVMValueRef chans[4], value;
+       LLVMValueRef vindex = ctx->i32_0;
+       LLVMValueRef voffset = ctx->i32_0;
+       struct ac_image_args args = {};
 
        if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
                store_emit_memory(ctx, emit_data);
                return;
        }
 
        for (unsigned chan = 0; chan < 4; ++chan)
                chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
 
-       emit_data->args[emit_data->arg_count++] =
-               ac_build_gather_values(&ctx->ac, chans, 4);
+       value = ac_build_gather_values(&ctx->ac, chans, 4);
 
        if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-               LLVMValueRef offset, tmp;
-
-               rsrc = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
-
-               tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
-               offset = ac_to_integer(&ctx->ac, tmp);
-
-               buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
-                                  offset, false, false);
-       } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
-                  tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
-               /* 8bit/16bit TC L1 write corruption bug on SI.
-                * All store opcodes not aligned to a dword are affected.
-                *
-                * The only way to get unaligned stores in radeonsi is through
-                * shader images.
-                */
-               bool force_glc = ctx->screen->info.chip_class == SI;
-
-               image_fetch_rsrc(bld_base, &resource_reg, true, target, &rsrc);
-               image_fetch_coords(bld_base, inst, 0, rsrc, 
&emit_data->args[2]);
-
-               if (target == TGSI_TEXTURE_BUFFER) {
-                       buffer_append_args(ctx, emit_data, rsrc, 
emit_data->args[2],
-                                          ctx->i32_0, false, force_glc);
-               } else {
-                       emit_data->args[1] = rsrc;
-               }
+               args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, 
false);
+               voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 0, 0));
+       } else if (is_image) {
+               image_fetch_rsrc(bld_base, &resource_reg, true, target, 
&args.resource);
+               image_fetch_coords(bld_base, inst, 0, args.resource, 
args.coords);
+               vindex = args.coords[0]; /* for buffers only */
+       } else {
+               unreachable("unexpected register file");
        }
 
        if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
                ac_build_waitcnt(&ctx->ac, VM_CNT);
 
-       writeonly_memory = is_oneway_access_only(inst, info,
-                                                info->shader_buffers_load |
-                                                info->shader_buffers_atomic,
-                                                info->images_load |
-                                                info->images_atomic);
+       args.cache_policy = get_cache_policy(ctx, inst,
+                                            false, /* atomic */
+                                            is_image, /* may_store_unaligned */
+                                            writeonly_memory);
 
        if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-               store_emit_buffer(ctx, emit_data, writeonly_memory);
+               store_emit_buffer(ctx, args.resource, 
inst->Dst[0].Register.WriteMask,
+                                 value, voffset, args.cache_policy, 
writeonly_memory);
                return;
        }
 
        if (target == TGSI_TEXTURE_BUFFER) {
-               /* If this is write-only, don't keep data in L1 to prevent
-                * evicting L1 cache lines that may be needed by other
-                * instructions.
-                */
-               if (writeonly_memory)
-                       emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* 
GLC = 1 */
+               LLVMValueRef buf_args[] = {
+                       value,
+                       args.resource,
+                       vindex,
+                       ctx->i32_0, /* voffset */
+                       LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 
0),
+                       LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 
0),
+               };
 
                emit_data->output[emit_data->chan] = ac_build_intrinsic(
                        &ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
-                       ctx->voidt, emit_data->args,
-                       emit_data->arg_count,
+                       ctx->voidt, buf_args, 6,
                        ac_get_store_intr_attribs(writeonly_memory));
        } else {
-               struct ac_image_args args = {};
                args.opcode = ac_image_store;
-               args.data[0] = emit_data->args[0];
-               args.resource = emit_data->args[1];
-               memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
+               args.data[0] = value;
                args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
                args.attributes = ac_get_store_intr_attribs(writeonly_memory);
                args.dmask = 0xf;
 
-               /* Workaround for 8bit/16bit TC L1 write corruption bug on SI.
-                * All store opcodes not aligned to a dword are affected.
-                */
-               if (ctx->screen->info.chip_class == SI ||
-                   /* If this is write-only, don't keep data in L1 to prevent
-                    * evicting L1 cache lines that may be needed by other
-                    * instructions. */
-                   writeonly_memory ||
-                   inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE))
-                       args.cache_policy = ac_glc;
-
                emit_data->output[emit_data->chan] =
                        ac_build_image_opcode(&ctx->ac, &args);
        }
 }
 
 static void atomic_emit_memory(struct si_shader_context *ctx,
                                struct lp_build_emit_data *emit_data) {
        LLVMBuilderRef builder = ctx->ac.builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef ptr, result, arg;
@@ -886,21 +865,21 @@ static void atomic_emit(
        if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
                /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware 
order
                 * of arguments, which is reversed relative to TGSI (and GLSL)
                 */
                args.data[num_data++] =
                        ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 3, 0));
        }
 
        args.data[num_data++] =
                ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 
0));
-       args.cache_policy = get_cache_policy(ctx, inst, true, false);
+       args.cache_policy = get_cache_policy(ctx, inst, true, false, false);
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
                args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], 
false);
                voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 1, 0));
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
                image_fetch_rsrc(bld_base, &inst->Src[0], true,
                                inst->Memory.Texture, &args.resource);
                image_fetch_coords(bld_base, inst, 1, args.resource, 
args.coords);
                vindex = args.coords[0]; /* for buffers only */
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to