From: Marek Olšák <marek.ol...@amd.com>

---
 .../drivers/radeonsi/si_shader_internal.h     |  2 +
 .../drivers/radeonsi/si_shader_tgsi_mem.c     | 79 ++++++++++---------
 .../drivers/radeonsi/si_shader_tgsi_setup.c   |  2 +
 3 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 36351391d95..ac7784f7d46 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -193,20 +193,22 @@ struct si_shader_context {
        LLVMTypeRef i64;
        LLVMTypeRef i128;
        LLVMTypeRef f32;
        LLVMTypeRef v2i32;
        LLVMTypeRef v4i32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
 
        LLVMValueRef i32_0;
        LLVMValueRef i32_1;
+       LLVMValueRef i1false;
+       LLVMValueRef i1true;
 };
 
 static inline struct si_shader_context *
 si_shader_context(struct lp_build_tgsi_context *bld_base)
 {
        return (struct si_shader_context*)bld_base;
 }
 
 static inline struct si_shader_context *
 si_shader_context_from_abi(struct ac_shader_abi *abi)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 4781526b071..1e21cabe770 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -338,20 +338,34 @@ static void buffer_append_args(
        emit_data->args[emit_data->arg_count++] = offset; /* voffset */
        if (!atomic) {
                emit_data->args[emit_data->arg_count++] =
                        force_glc ||
                        inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
                        i1true : i1false; /* glc */
        }
        emit_data->args[emit_data->arg_count++] = i1false; /* slc */
 }
 
+static unsigned get_cache_policy(struct si_shader_context *ctx,
+                                const struct tgsi_full_instruction *inst,
+                                bool atomic, bool force_glc)
+{
+       unsigned cache_policy = 0;
+
+       if (!atomic &&
+           (force_glc ||
+            inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE)))
+               cache_policy |= ac_glc;
+
+       return cache_policy;
+}
+
 static void load_emit_buffer(struct si_shader_context *ctx,
                             struct lp_build_emit_data *emit_data,
                             bool can_speculate, bool allow_smem)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        uint writemask = inst->Dst[0].Register.WriteMask;
        uint count = util_last_bit(writemask);
        LLVMValueRef *args = emit_data->args;
 
        /* Don't use SMEM for shader buffer loads, because LLVM doesn't
@@ -852,101 +866,94 @@ static void atomic_emit_memory(struct si_shader_context 
*ctx,
                LLVMBuildBitCast(builder, result, ctx->f32, "");
 }
 
 static void atomic_emit(
                const struct lp_build_tgsi_action *action,
                struct lp_build_tgsi_context *bld_base,
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        const struct tgsi_full_instruction * inst = emit_data->inst;
+       struct ac_image_args args = {};
+       unsigned num_data = 0;
+       LLVMValueRef vindex = ctx->i32_0;
+       LLVMValueRef voffset = ctx->i32_0;
 
        if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
                atomic_emit_memory(ctx, emit_data);
                return;
        }
 
        if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
                /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware 
order
                 * of arguments, which is reversed relative to TGSI (and GLSL)
                 */
-               emit_data->args[emit_data->arg_count++] =
+               args.data[num_data++] =
                        ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 3, 0));
        }
 
-       emit_data->args[emit_data->arg_count++] =
+       args.data[num_data++] =
                ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 
0));
+       args.cache_policy = get_cache_policy(ctx, inst, true, false);
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
-               LLVMValueRef rsrc, offset;
-
-               rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
-               offset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 1, 0));
-
-               buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
-                                  offset, true, false);
+               args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], 
false);
+               voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, 
inst, 1, 0));
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
-               unsigned target = inst->Memory.Texture;
-               LLVMValueRef rsrc;
-
-               image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
-               image_fetch_coords(bld_base, inst, 1, rsrc,
-                                  &emit_data->args[emit_data->arg_count + 1]);
-
-               if (target == TGSI_TEXTURE_BUFFER) {
-                       buffer_append_args(ctx, emit_data, rsrc,
-                                          emit_data->args[emit_data->arg_count 
+ 1],
-                                          ctx->i32_0, true, false);
-               } else {
-                       emit_data->args[emit_data->arg_count] = rsrc;
-               }
+               image_fetch_rsrc(bld_base, &inst->Src[0], true,
+                               inst->Memory.Texture, &args.resource);
+               image_fetch_coords(bld_base, inst, 1, args.resource, 
args.coords);
+               vindex = args.coords[0]; /* for buffers only */
        }
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
            inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
+               LLVMValueRef buf_args[7];
+               unsigned num_args = 0;
+
+               buf_args[num_args++] = args.data[0];
+               if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+                       buf_args[num_args++] = args.data[1];
+
+               buf_args[num_args++] = args.resource;
+               buf_args[num_args++] = vindex;
+               buf_args[num_args++] = voffset;
+               buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true 
: ctx->i1false;
+
                char intrinsic_name[40];
                snprintf(intrinsic_name, sizeof(intrinsic_name),
                         "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
-               LLVMValueRef tmp = ac_build_intrinsic(
-                       &ctx->ac, intrinsic_name, ctx->i32,
-                       emit_data->args, emit_data->arg_count, 0);
-               emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
+               emit_data->output[emit_data->chan] =
+                       ac_to_float(&ctx->ac,
+                                   ac_build_intrinsic(&ctx->ac, intrinsic_name,
+                                                      ctx->i32, buf_args, 
num_args, 0));
        } else {
-               unsigned num_data = inst->Instruction.Opcode == 
TGSI_OPCODE_ATOMCAS ? 2 : 1;
-               struct ac_image_args args = {};
-
                if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
                        args.opcode = ac_image_atomic_cmpswap;
                } else {
                        args.opcode = ac_image_atomic;
                        switch (inst->Instruction.Opcode) {
                        case TGSI_OPCODE_ATOMXCHG: args.atomic = 
ac_atomic_swap; break;
                        case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; 
break;
                        case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; 
break;
                        case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; 
break;
                        case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; 
break;
                        case TGSI_OPCODE_ATOMUMIN: args.atomic = 
ac_atomic_umin; break;
                        case TGSI_OPCODE_ATOMUMAX: args.atomic = 
ac_atomic_umax; break;
                        case TGSI_OPCODE_ATOMIMIN: args.atomic = 
ac_atomic_smin; break;
                        case TGSI_OPCODE_ATOMIMAX: args.atomic = 
ac_atomic_smax; break;
                        default: unreachable("unhandled image atomic");
                        }
                }
 
-               for (unsigned i = 0; i < num_data; ++i)
-                       args.data[i] = emit_data->args[i];
-
-               args.resource = emit_data->args[num_data];
-               memcpy(args.coords, &emit_data->args[num_data + 1], 
sizeof(args.coords));
                args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
-
                emit_data->output[emit_data->chan] =
                        ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, 
&args));
        }
 }
 
 static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
                                unsigned target, LLVMValueRef out)
 {
        LLVMBuilderRef builder = ctx->ac.builder;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index b9ed0fc3ab0..975696d07ad 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1014,20 +1014,22 @@ void si_llvm_context_init(struct si_shader_context *ctx,
        ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
        ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
        ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
        ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
        ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
        ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
        ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
+       ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
+       ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
 }
 
 /* Set the context to a certain TGSI shader. Can be called repeatedly
  * to change the shader. */
 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
                              struct si_shader *shader)
 {
        const struct tgsi_shader_info *info = NULL;
        const struct tgsi_token *tokens = NULL;
 
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to