From: Dave Airlie <airl...@redhat.com> So it appears R600s (except rv670) do AR handling different using a different opcode. This patch fixes up r600g to work properly on r600.
This fixes ~100 piglit tests here (in GLSL1.30 mode) on rv610. v3: add index_mode as per the docs. This still fails any dst relative tests for some reason I can't quite see yet, but it passes a lot more tests than without. Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/gallium/drivers/r600/r600_asm.c | 48 ++++++++++++++++++++++++++++--- src/gallium/drivers/r600/r600_asm.h | 8 ++++- src/gallium/drivers/r600/r600_shader.c | 6 +++- src/gallium/drivers/r600/r600_sq.h | 7 ++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 8234744..aad286b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -94,6 +94,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: @@ -249,10 +250,11 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void) return tex; } -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class) +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, unsigned ar_handling) { LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; + bc->ar_handling = ar_handling; } static int r600_bytecode_add_cf(struct r600_bytecode *bc) @@ -441,7 +443,8 @@ static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu * return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || - alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT); case EVERGREEN: case CAYMAN: default: @@ -457,7 +460,8 @@ static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_a case R600: case R700: return is_alu_reduction_inst(bc, alu) || - is_alu_mova_inst(bc, alu); + (is_alu_mova_inst(bc, alu) && + (alu->inst != V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT)); case EVERGREEN: case CAYMAN: default: @@ -478,6 +482,7 @@ static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode case R700: if (!alu->is_op3) return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || @@ -1236,12 +1241,43 @@ static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r60 return 0; } + +/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ +static int load_ar_r6xx(struct r600_bytecode *bc) +{ + struct r600_bytecode_alu alu; + int r; + + if (bc->ar_loaded) + return 0; + + /* hack to avoid making MOVA the last instruction in the clause */ + if ((bc->cf_last->ndw>>1) >= 110) + bc->force_add_cf = 1; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT; + alu.src[0].sel = bc->ar_reg; + alu.last = 1; + alu.index_mode = INDEX_MODE_LOOP; + r = r600_bytecode_add_alu(bc, &alu); + if (r) + return r; + + bc->cf_last->r6xx_uses_waterfall = 1; + bc->ar_loaded = 1; + return 0; +} + /* load AR register from gpr (bc->ar_reg) with MOVA_INT */ static int load_ar(struct r600_bytecode *bc) { struct r600_bytecode_alu alu; int r; + if (bc->ar_handling) + return load_ar_r6xx(bc); + if (bc->ar_loaded) return 0; @@ -1599,6 +1635,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecod S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | S_SQ_ALU_WORD0_LAST(alu->last); if (alu->is_op3) { @@ -2286,7 +2323,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc) fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel); fprintf(stderr, "REL:%d ", alu->src[1].rel); fprintf(stderr, "CHAN:%d ", alu->src[1].chan); - fprintf(stderr, "NEG:%d) ", alu->src[1].neg); + fprintf(stderr, "NEG:%d ", alu->src[1].neg); + fprintf(stderr, "IM:%d) ", alu->index_mode); fprintf(stderr, "LAST:%d)\n", alu->last); id++; fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' '); @@ -2565,7 +2603,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r600_bytecode_init(&bc, rctx->chip_class); + r600_bytecode_init(&bc, rctx->chip_class, 0); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index d0ff75d..40abb25 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -54,6 +54,7 @@ struct r600_bytecode_alu { unsigned bank_swizzle; unsigned bank_swizzle_force; unsigned omod; + unsigned index_mode; }; struct r600_bytecode_tex { @@ -176,6 +177,10 @@ struct r600_cf_callstack { int max; }; +#define AR_HANDLE_NORMAL 0 +#define AR_HANDLE_RV6XX 1 /* except RV670 */ + + struct r600_bytecode { enum chip_class chip_class; int type; @@ -194,13 +199,14 @@ struct r600_bytecode { struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; unsigned ar_loaded; unsigned ar_reg; + unsigned ar_handling; }; /* eg_asm.c */ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class); +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, unsigned ar_handling); void r600_bytecode_clear(struct r600_bytecode *bc); int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 59d41cf..1f19190 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -804,10 +804,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi unsigned output_done, noutput; unsigned opcode; int i, j, r = 0, pos0; + unsigned ar_handling = AR_HANDLE_NORMAL; + + if ((rctx->chip_class == R600) && (rctx->family != CHIP_RV670)) + ar_handling = AR_HANDLE_RV6XX; ctx.bc = &shader->bc; ctx.shader = shader; - r600_bytecode_init(ctx.bc, rctx->chip_class); + r600_bytecode_init(ctx.bc, rctx->chip_class, ar_handling); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index b9c4126..4b2a19a 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -471,4 +471,11 @@ #define SQ_ALU_SCL_122 0x00000001 #define SQ_ALU_SCL_212 0x00000002 #define SQ_ALU_SCL_221 0x00000003 + +#define INDEX_MODE_AR_X 0 +#define INDEX_MODE_AR_Y 1 +#define INDEX_MODE_AR_Z 2 +#define INDEX_MODE_AR_W 3 +#define INDEX_MODE_LOOP 4 + #endif -- 1.7.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev