On 04/15/2013 01:05 AM, Martin Andersson wrote:
There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for nested
loops may put the branch stack into a state such that ALU_PUSH_BEFORE
doesn't work as expected. Workaround this by replacing the ALU_PUSH_BEFORE
with a PUSH + ALU for nested loops.
Fixes piglit tests:
spec/!OpenGL 1.1/read-front
spec/EXT_transform_feedback/order*
spec/glsl-1.40/uniform_buffer/fs-struct-pad
No piglit regressions.
---
src/gallium/drivers/r600/r600_shader.c | 33 ++++++++++++++++++++++++++++++---
1 file changed, 30 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c
b/src/gallium/drivers/r600/r600_shader.c
index 6dbca50..aee011e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -252,6 +252,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
static int tgsi_endloop(struct r600_shader_ctx *ctx);
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx);
/*
* bytestream -> r600 shader
@@ -5490,7 +5491,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
return 0;
}
-static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int
alu_type)
{
struct r600_bytecode_alu alu;
int r;
@@ -5510,7 +5511,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx,
int opcode)
alu.last = 1;
- r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+ r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
if (r)
return r;
return 0;
@@ -5730,7 +5731,20 @@ static void break_loop_on_flag(struct r600_shader_ctx
*ctx, unsigned fc_sp)
static int tgsi_if(struct r600_shader_ctx *ctx)
{
- emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
+ int alu_type = CF_OP_ALU_PUSH_BEFORE;
+
+ /*
+ There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for
nested
+ loops may put the branch stack into a state such that ALU_PUSH_BEFORE
+ doesn't work as expected. Workaround this by replacing the
ALU_PUSH_BEFORE
+ with a PUSH + ALU for nested loops.
+ */
+ if (ctx->bc->chip_class == CAYMAN &&
need_cayman_loop_bug_workaround(ctx)) {
We already have current loop level for the stack size computation, see
r600_bytecode::stack, so I think need_cayman_loop_bug_workaround call
may be replaced with "ctx->bc->stack.loop > 1", if I'm not missing
something.
Vadim
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
+ alu_type = CF_OP_ALU;
+ }
+
+ emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, alu_type);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
@@ -5834,6 +5848,19 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx
*ctx)
return 0;
}
+static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx)
+{
+ unsigned int fscp;
+ int num_loops = 0;
+ for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+ {
+ if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ ++num_loops;
+ }
+
+ return num_loops >= 2;
+}
+
static int tgsi_umad(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst =
&ctx->parse.FullToken.FullInstruction;
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev