There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for nested loops may put the branch stack into a state such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this by replacing the ALU_PUSH_BEFORE with a PUSH + ALU for nested loops.
Fixes piglit tests: spec/!OpenGL 1.1/read-front spec/EXT_transform_feedback/order* spec/glsl-1.40/uniform_buffer/fs-struct-pad No piglit regressions. --- src/gallium/drivers/r600/r600_shader.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6dbca50..aee011e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -252,6 +252,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx); static int tgsi_bgnloop(struct r600_shader_ctx *ctx); static int tgsi_endloop(struct r600_shader_ctx *ctx); static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); +static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx); /* * bytestream -> r600 shader @@ -5490,7 +5491,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) return 0; } -static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) +static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) { struct r600_bytecode_alu alu; int r; @@ -5510,7 +5511,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.last = 1; - r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); + r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); if (r) return r; return 0; @@ -5730,7 +5731,20 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) static int tgsi_if(struct r600_shader_ctx *ctx) { - emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT); + int alu_type = CF_OP_ALU_PUSH_BEFORE; + + /* + There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for nested + loops may put the branch stack into a state such that ALU_PUSH_BEFORE + doesn't work as expected. Workaround this by replacing the ALU_PUSH_BEFORE + with a PUSH + ALU for nested loops. + */ + if (ctx->bc->chip_class == CAYMAN && need_cayman_loop_bug_workaround(ctx)) { + r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); + alu_type = CF_OP_ALU; + } + + emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, alu_type); r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); @@ -5834,6 +5848,19 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return 0; } +static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx) +{ + unsigned int fscp; + int num_loops = 0; + for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) + { + if (FC_LOOP == ctx->bc->fc_stack[fscp].type) + ++num_loops; + } + + return num_loops >= 2; +} + static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; -- 1.8.2.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev