v3: handle hw-specific cases

Signed-off-by: Vadim Girlin <vadimgir...@gmail.com>
---
 
cc: Andy Furniss <andy...@ukfsn.org>
Hopefully this should work better on the non-evergreen chips

src/gallium/drivers/r600/r600_asm.c    |   4 +-
 src/gallium/drivers/r600/r600_asm.h    |  29 +++++--
 src/gallium/drivers/r600/r600_shader.c | 134 ++++++++++++++++++++++-----------
 3 files changed, 113 insertions(+), 54 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 3632aa5..b1dbfe1 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1524,8 +1524,8 @@ int r600_bytecode_build(struct r600_bytecode *bc)
        unsigned addr;
        int i, r;
 
-       if (bc->callstack[0].max > 0)
-               bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+       bc->nstack = bc->stack.max_entries;
+
        if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
                bc->nstack = 1;
        }
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 03cd238..ba9ad9f 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -173,16 +173,30 @@ struct r600_cf_stack_entry {
 };
 
 #define SQ_MAX_CALL_DEPTH 0x00000020
-struct r600_cf_callstack {
-       unsigned                        fc_sp_before_entry;
-       int                             sub_desc_index;
-       int                             current;
-       int                             max;
-};
 
 #define AR_HANDLE_NORMAL 0
 #define AR_HANDLE_RV6XX 1 /* except RV670 */
 
+/* FIXME: some chips have 8 subentries per stack entry, probably the
+ * performance may be improved for them if we'll take it into account */
+#define CF_STACK_ENTRY_SIZE 4
+
+struct r600_stack_info {
+       /* current level of non-WQM PUSH operations
+        * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
+       int push;
+       /* current level of WQM PUSH operations
+        * (PUSH, PUSH_ELSE, PUSH_WQM) */
+       int push_wqm;
+       /* current loop level */
+       int loop;
+
+       /* current total stack level (in subentries) */
+       int depth;
+
+       /* required depth */
+       int max_entries;
+};
 
 struct r600_bytecode {
        enum chip_class                 chip_class;
@@ -199,8 +213,7 @@ struct r600_bytecode {
        uint32_t                        *bytecode;
        uint32_t                        fc_sp;
        struct r600_cf_stack_entry      fc_stack[32];
-       unsigned                        call_sp;
-       struct r600_cf_callstack        callstack[SQ_MAX_CALL_DEPTH];
+       struct r600_stack_info          stack;
        unsigned        ar_loaded;
        unsigned        ar_reg;
        unsigned        ar_chan;
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 8642463..cc4a8ed 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -234,7 +234,7 @@ struct r600_shader_tgsi_instruction {
 
 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], 
eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned 
reason, unsigned check_max_only);
+static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned 
reason);
 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
 static int tgsi_else(struct r600_shader_ctx *ctx);
 static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -412,7 +412,7 @@ static void llvm_if(struct r600_shader_ctx *ctx)
 {
        r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
        fc_pushlevel(ctx, FC_IF);
-       callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+       callstack_push(ctx, FC_PUSH_VPM);
 }
 
 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx)
@@ -5522,63 +5522,110 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
        return 0;
 }
 
-static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, 
unsigned reason)
+static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+                                              unsigned reason)
+{
+       struct r600_stack_info *stack = &ctx->bc->stack;
+       unsigned elements, entries;
+
+       elements = (stack->loop + stack->push_wqm ) * CF_STACK_ENTRY_SIZE;
+       elements += stack->push;
+
+       switch (ctx->bc->chip_class) {
+       case R600:
+       case R700:
+               /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 
elements on
+                * the stack must be reserved to hold the current 
active/continue
+                * masks */
+               if (reason == FC_PUSH_VPM) {
+                       elements += 2;
+               }
+               break;
+
+       case CAYMAN:
+               /* r9xx: any stack operation on empty stack consumes 2 
additional
+                * elements */
+               elements += 2;
+
+               /* fallthrough */
+               /* FIXME: do the two elements added above cover the cases for 
the
+                * r8xx+ below? */
+
+       case EVERGREEN:
+               /* r8xx+: 2 extra elements are not always required, but one 
extra
+                * element must be added for each of the following cases:
+                * 1. There is an ALU_ELSE_AFTER instruction at the point of 
greatest
+                *    stack usage.
+                *    Currently we don't use ALU_ELSE_AFTER.
+                * 2. There are LOOP/WQM frames on the stack when any flavor of 
non-WQM
+                *    PUSH instruction executed.
+                *
+                *    NOTE: it seems we also need to reserve additional element 
when
+                *    the non-zero stack depth mod ENTRY_SIZE is equal to 0 */
+               if (reason == FC_PUSH_VPM &&
+                               (stack->loop || stack->push_wqm ||
+                                (stack->depth &&
+                                        (stack->depth & (CF_STACK_ENTRY_SIZE - 
1)) == 0))) {
+                       elements += 1;
+               }
+               break;
+
+       default:
+               assert(0);
+               break;
+       }
+
+       entries = (elements + (CF_STACK_ENTRY_SIZE - 1)) / CF_STACK_ENTRY_SIZE;
+
+       if (entries > stack->max_entries)
+               stack->max_entries = entries;
+}
+
+static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
 {
        switch(reason) {
        case FC_PUSH_VPM:
-               ctx->bc->callstack[ctx->bc->call_sp].current--;
+               --ctx->bc->stack.push;
+               --ctx->bc->stack.depth;
+               assert(ctx->bc->stack.push >= 0);
                break;
        case FC_PUSH_WQM:
+               --ctx->bc->stack.push_wqm;
+               ctx->bc->stack.depth -= CF_STACK_ENTRY_SIZE;
+               assert(ctx->bc->stack.push_wqm >= 0);
+               break;
        case FC_LOOP:
-               ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+               --ctx->bc->stack.loop;
+               ctx->bc->stack.depth -= CF_STACK_ENTRY_SIZE;
+               assert(ctx->bc->stack.loop >= 0);
                break;
-       case FC_REP:
-               /* TOODO : for 16 vp asic should -= 2; */
-               ctx->bc->callstack[ctx->bc->call_sp].current --;
+       default:
+               assert(0);
                break;
        }
+
+       assert(ctx->bc->stack.depth >= 0);
 }
 
-static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned 
reason, unsigned check_max_only)
+static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
 {
-       if (check_max_only) {
-               int diff;
-               switch (reason) {
-               case FC_PUSH_VPM:
-                       diff = 1;
-                       break;
-               case FC_PUSH_WQM:
-                       diff = 4;
-                       break;
-               default:
-                       assert(0);
-                       diff = 0;
-               }
-               if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
-                   ctx->bc->callstack[ctx->bc->call_sp].max) {
-                       ctx->bc->callstack[ctx->bc->call_sp].max =
-                               ctx->bc->callstack[ctx->bc->call_sp].current + 
diff;
-               }
-               return;
-       }
        switch (reason) {
        case FC_PUSH_VPM:
-               ctx->bc->callstack[ctx->bc->call_sp].current++;
+               ++ctx->bc->stack.push;
+               ++ctx->bc->stack.depth;
                break;
        case FC_PUSH_WQM:
+               ++ctx->bc->stack.push_wqm;
+               ctx->bc->stack.depth += CF_STACK_ENTRY_SIZE;
        case FC_LOOP:
-               ctx->bc->callstack[ctx->bc->call_sp].current += 4;
-               break;
-       case FC_REP:
-               ctx->bc->callstack[ctx->bc->call_sp].current++;
+               ++ctx->bc->stack.loop;
+               ctx->bc->stack.depth += CF_STACK_ENTRY_SIZE;
                break;
+       default:
+               assert(0);
        }
 
-       if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
-           ctx->bc->callstack[ctx->bc->call_sp].max) {
-               ctx->bc->callstack[ctx->bc->call_sp].max =
-                       ctx->bc->callstack[ctx->bc->call_sp].current;
-       }
+       callstack_update_max_depth(ctx, reason);
 }
 
 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
@@ -5665,7 +5712,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
 
        fc_pushlevel(ctx, FC_IF);
 
-       callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+       callstack_push(ctx, FC_PUSH_VPM);
        return 0;
 }
 
@@ -5695,7 +5742,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
        }
        fc_poplevel(ctx);
 
-       callstack_decrease_current(ctx, FC_PUSH_VPM);
+       callstack_pop(ctx, FC_PUSH_VPM);
        return 0;
 }
 
@@ -5708,7 +5755,7 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
        fc_pushlevel(ctx, FC_LOOP);
 
        /* check stack depth */
-       callstack_check_depth(ctx, FC_LOOP, 0);
+       callstack_push(ctx, FC_LOOP);
        return 0;
 }
 
@@ -5737,7 +5784,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
        }
        /* XXX add LOOPRET support */
        fc_poplevel(ctx);
-       callstack_decrease_current(ctx, FC_LOOP);
+       callstack_pop(ctx, FC_LOOP);
        return 0;
 }
 
@@ -5760,7 +5807,6 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
 
        fc_set_mid(ctx, fscp);
 
-       callstack_check_depth(ctx, FC_PUSH_VPM, 1);
        return 0;
 }
 
-- 
1.8.1.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to