From: Dave Airlie <airl...@redhat.com>

This fixes some hangs with the arb_shader_image_load_store-atomicity tests
on evergreen/cayman GPUs.

I'm not 100% sure why (VPM hurts my brain), I'm running some piglit
runs to see if it has any bad side effects.

v2: only set the vpm flags when an atomic operation is done.
---
 src/gallium/drivers/r600/r600_asm.h    |  1 +
 src/gallium/drivers/r600/r600_shader.c | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 5841044bf81..366530573de 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -224,6 +224,7 @@ struct r600_cf_stack_entry {
        struct r600_bytecode_cf         *start;
        struct r600_bytecode_cf         **mid; /* used to store the else point 
*/
        int                             num_mid;
+       bool need_vpm;
 };
 
 #define SQ_MAX_CALL_DEPTH 0x00000020
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 2229dc8fab3..e93dbd3970c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -379,6 +379,7 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[],
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
 static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
+static void fc_level_needs_vpm(struct r600_shader_ctx *ctx);
 static int tgsi_else(struct r600_shader_ctx *ctx);
 static int tgsi_endif(struct r600_shader_ctx *ctx);
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
@@ -9125,6 +9126,8 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
        unsigned immed_base;
        unsigned rat_base;
 
+       fc_level_needs_vpm(ctx);
+
        immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
        rat_base = ctx->shader->rat_base;
 
@@ -9284,6 +9287,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
                return -1;
        }
 
+       fc_level_needs_vpm(ctx);
+
        r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
        if (r)
                return r;
@@ -9405,6 +9410,8 @@ static int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
        int lds_op = get_lds_op(inst->Instruction.Opcode);
        int r;
 
+       fc_level_needs_vpm(ctx);
+
        struct r600_bytecode_alu alu;
        memset(&alu, 0, sizeof(struct r600_bytecode_alu));
        alu.op = lds_op;
@@ -10433,9 +10440,16 @@ static void fc_pushlevel(struct r600_shader_ctx *ctx, 
int type)
        assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
        ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
        ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+       ctx->bc->fc_stack[ctx->bc->fc_sp].need_vpm = false;
        ctx->bc->fc_sp++;
 }
 
+static void fc_level_needs_vpm(struct r600_shader_ctx *ctx)
+{
+       if (ctx->bc->fc_sp)
+               ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm = true;
+}
+
 static void fc_poplevel(struct r600_shader_ctx *ctx)
 {
        struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
@@ -10576,6 +10590,8 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
        } else {
                ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = 
ctx->bc->cf_last->id + offset;
        }
+       if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm)
+               ctx->bc->fc_stack[ctx->bc->fc_sp-1].start->vpm = 1;
        fc_poplevel(ctx);
 
        callstack_pop(ctx, FC_PUSH_VPM);
@@ -10587,7 +10603,6 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
        /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
         * limited to 4096 iterations, like the other LOOP_* instructions. */
        r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
-
        fc_pushlevel(ctx, FC_LOOP);
 
        /* check stack depth */
@@ -10612,6 +10627,8 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
           BRK/CONT point to LOOP END CF
        */
        ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 
1].start->id + 2;
+       if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].need_vpm)
+               ctx->bc->fc_stack[ctx->bc->fc_sp-1].start->vpm = 1;
 
        ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = 
ctx->bc->cf_last->id + 2;
 
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to