From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Turns out that this is needed after all to satisfy some strengthened
coherency tests. Depends on support in LLVM, see
http://reviews.llvm.org/D19203
---
Note that this patch applies on top of the compute shader series, otherwise
you'll get a trivial merge conflict.

 src/gallium/drivers/radeonsi/si_shader.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index dcbdde3..0398a28 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2826,6 +2826,13 @@ static void emit_optimization_barrier(struct 
si_shader_context *ctx)
        LLVMBuildCall(builder, inlineasm, NULL, 0, "");
 }
 
+static void emit_waitcnt(struct si_shader_context *ctx)
+{
+       LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+       lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt.all",
+                          ctx->voidt, NULL, 0, LLVMNoUnwindAttribute);
+}
+
 static void membar_emit(
                const struct lp_build_tgsi_action *action,
                struct lp_build_tgsi_context *bld_base,
@@ -2833,14 +2840,7 @@ static void membar_emit(
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
-       /* Since memoryBarrier only makes guarantees about atomics and
-        * coherent image accesses (which bypass TC L1), we do not need to emit
-        * any special cache handling here.
-        *
-        * We do have to prevent LLVM from re-ordering loads across
-        * the barrier though.
-        */
-       emit_optimization_barrier(ctx);
+       emit_waitcnt(ctx);
 }
 
 static LLVMValueRef
@@ -3200,7 +3200,7 @@ static void load_emit(
        }
 
        if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-               emit_optimization_barrier(ctx);
+               emit_waitcnt(ctx);
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
                load_emit_buffer(ctx, emit_data);
@@ -3390,6 +3390,7 @@ static void store_emit(
                struct lp_build_tgsi_context *bld_base,
                struct lp_build_emit_data *emit_data)
 {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
@@ -3397,11 +3398,16 @@ static void store_emit(
        char intrinsic_name[32];
        char coords_type[8];
 
-       if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
-               store_emit_buffer(si_shader_context(bld_base), emit_data);
+       if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
+               store_emit_memory(ctx, emit_data);
                return;
-       } else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
-               store_emit_memory(si_shader_context(bld_base), emit_data);
+       }
+
+       if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
+               emit_waitcnt(ctx);
+
+       if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+               store_emit_buffer(ctx, emit_data);
                return;
        }
 
-- 
2.5.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to