From: Dave Airlie <airl...@redhat.com>

So it appears R600s (except rv670) do AR handling different using a different
opcode. This patch fixes up r600g to work properly on r600.

This fixes ~100 piglit tests here (in GLSL1.30 mode) on rv610.

v3: add index_mode as per the docs.

This still fails any dst relative tests for some reason I can't quite see yet,
but it passes a lot more tests than without.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/gallium/drivers/r600/r600_asm.c    |   48 ++++++++++++++++++++++++++++---
 src/gallium/drivers/r600/r600_asm.h    |    8 ++++-
 src/gallium/drivers/r600/r600_shader.c |    6 +++-
 src/gallium/drivers/r600/r600_sq.h     |    7 ++++
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 8234744..aad286b 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -94,6 +94,7 @@ static inline unsigned int 
r600_bytecode_get_num_operands(struct r600_bytecode *
                case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
                case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
                case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT:
                case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
                case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
                case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
@@ -249,10 +250,11 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
        return tex;
 }
 
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class)
+void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, 
unsigned ar_handling)
 {
        LIST_INITHEAD(&bc->cf);
        bc->chip_class = chip_class;
+       bc->ar_handling = ar_handling;
 }
 
 static int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -441,7 +443,8 @@ static int is_alu_mova_inst(struct r600_bytecode *bc, 
struct r600_bytecode_alu *
                return !alu->is_op3 && (
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 
||
-                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT ||
+                       alu->inst == 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT);
        case EVERGREEN:
        case CAYMAN:
        default:
@@ -457,7 +460,8 @@ static int is_alu_vec_unit_inst(struct r600_bytecode *bc, 
struct r600_bytecode_a
        case R600:
        case R700:
                return is_alu_reduction_inst(bc, alu) ||
-                       is_alu_mova_inst(bc, alu);
+                       (is_alu_mova_inst(bc, alu) && 
+                        (alu->inst != 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT));
        case EVERGREEN:
        case CAYMAN:
        default:
@@ -478,6 +482,7 @@ static int is_alu_trans_unit_inst(struct r600_bytecode *bc, 
struct r600_bytecode
        case R700:
                if (!alu->is_op3)
                        return alu->inst == 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+                               alu->inst == 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT ||
                                alu->inst == 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
                                alu->inst == 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT ||
                                alu->inst == 
V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
@@ -1236,12 +1241,43 @@ static int r600_bytecode_alloc_kcache_lines(struct 
r600_bytecode *bc, struct r60
        return 0;
 }
 
+
+/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
+static int load_ar_r6xx(struct r600_bytecode *bc)
+{
+       struct r600_bytecode_alu alu;
+       int r;
+
+       if (bc->ar_loaded)
+               return 0;
+
+       /* hack to avoid making MOVA the last instruction in the clause */
+       if ((bc->cf_last->ndw>>1) >= 110)
+               bc->force_add_cf = 1;
+
+       memset(&alu, 0, sizeof(alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT;
+       alu.src[0].sel = bc->ar_reg;
+       alu.last = 1;
+       alu.index_mode = INDEX_MODE_LOOP;
+       r = r600_bytecode_add_alu(bc, &alu);
+       if (r)
+               return r;
+
+       bc->cf_last->r6xx_uses_waterfall = 1;
+       bc->ar_loaded = 1;
+       return 0;
+}
+
 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
 static int load_ar(struct r600_bytecode *bc)
 {
        struct r600_bytecode_alu alu;
        int r;
 
+       if (bc->ar_handling)
+               return load_ar_r6xx(bc);
+
        if (bc->ar_loaded)
                return 0;
 
@@ -1599,6 +1635,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode 
*bc, struct r600_bytecod
                                S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
                                S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
                                S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+                               S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
                                S_SQ_ALU_WORD0_LAST(alu->last);
 
        if (alu->is_op3) {
@@ -2286,7 +2323,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
                        fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
                        fprintf(stderr, "REL:%d ", alu->src[1].rel);
                        fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
-                       fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
+                       fprintf(stderr, "NEG:%d ", alu->src[1].neg);
+                       fprintf(stderr, "IM:%d) ", alu->index_mode);
                        fprintf(stderr, "LAST:%d)\n", alu->last);
                        id++;
                        fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], 
alu->last ? '*' : ' ');
@@ -2565,7 +2603,7 @@ int r600_vertex_elements_build_fetch_shader(struct 
r600_pipe_context *rctx, stru
        }
 
        memset(&bc, 0, sizeof(bc));
-       r600_bytecode_init(&bc, rctx->chip_class);
+       r600_bytecode_init(&bc, rctx->chip_class, 0);
 
        for (i = 0; i < ve->count; i++) {
                if (elements[i].instance_divisor > 1) {
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index d0ff75d..40abb25 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -54,6 +54,7 @@ struct r600_bytecode_alu {
        unsigned                        bank_swizzle;
        unsigned                        bank_swizzle_force;
        unsigned                        omod;
+       unsigned                        index_mode;
 };
 
 struct r600_bytecode_tex {
@@ -176,6 +177,10 @@ struct r600_cf_callstack {
        int                             max;
 };
 
+#define AR_HANDLE_NORMAL 0
+#define AR_HANDLE_RV6XX 1 /* except RV670 */
+
+
 struct r600_bytecode {
        enum chip_class                 chip_class;
        int                             type;
@@ -194,13 +199,14 @@ struct r600_bytecode {
        struct r600_cf_callstack        callstack[SQ_MAX_CALL_DEPTH];
        unsigned        ar_loaded;
        unsigned        ar_reg;
+       unsigned        ar_handling;
 };
 
 /* eg_asm.c */
 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf 
*cf);
 
 /* r600_asm.c */
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class);
+void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, 
unsigned ar_handling);
 void r600_bytecode_clear(struct r600_bytecode *bc);
 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct 
r600_bytecode_alu *alu);
 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct 
r600_bytecode_vtx *vtx);
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 59d41cf..1f19190 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -804,10 +804,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context 
* rctx, struct r600_pi
        unsigned output_done, noutput;
        unsigned opcode;
        int i, j, r = 0, pos0;
+       unsigned ar_handling = AR_HANDLE_NORMAL;
+
+       if ((rctx->chip_class == R600) && (rctx->family != CHIP_RV670))
+               ar_handling = AR_HANDLE_RV6XX;
 
        ctx.bc = &shader->bc;
        ctx.shader = shader;
-       r600_bytecode_init(ctx.bc, rctx->chip_class);
+       r600_bytecode_init(ctx.bc, rctx->chip_class, ar_handling);
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
        tgsi_parse_init(&ctx.parse, tokens);
diff --git a/src/gallium/drivers/r600/r600_sq.h 
b/src/gallium/drivers/r600/r600_sq.h
index b9c4126..4b2a19a 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -471,4 +471,11 @@
 #define SQ_ALU_SCL_122                           0x00000001
 #define SQ_ALU_SCL_212                           0x00000002
 #define SQ_ALU_SCL_221                           0x00000003
+
+#define   INDEX_MODE_AR_X 0
+#define   INDEX_MODE_AR_Y 1
+#define   INDEX_MODE_AR_Z 2
+#define   INDEX_MODE_AR_W 3
+#define   INDEX_MODE_LOOP 4
+
 #endif
-- 
1.7.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to