v3: handle hw-specific cases Signed-off-by: Vadim Girlin <vadimgir...@gmail.com> --- cc: Andy Furniss <andy...@ukfsn.org> Hopefully this should work better on the non-evergreen chips
src/gallium/drivers/r600/r600_asm.c | 4 +- src/gallium/drivers/r600/r600_asm.h | 29 +++++-- src/gallium/drivers/r600/r600_shader.c | 134 ++++++++++++++++++++++----------- 3 files changed, 113 insertions(+), 54 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3632aa5..b1dbfe1 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1524,8 +1524,8 @@ int r600_bytecode_build(struct r600_bytecode *bc) unsigned addr; int i, r; - if (bc->callstack[0].max > 0) - bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + bc->nstack = bc->stack.max_entries; + if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { bc->nstack = 1; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 03cd238..ba9ad9f 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -173,16 +173,30 @@ struct r600_cf_stack_entry { }; #define SQ_MAX_CALL_DEPTH 0x00000020 -struct r600_cf_callstack { - unsigned fc_sp_before_entry; - int sub_desc_index; - int current; - int max; -}; #define AR_HANDLE_NORMAL 0 #define AR_HANDLE_RV6XX 1 /* except RV670 */ +/* FIXME: some chips have 8 subentries per stack entry, probably the + * performance may be improved for them if we'll take it into account */ +#define CF_STACK_ENTRY_SIZE 4 + +struct r600_stack_info { + /* current level of non-WQM PUSH operations + * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */ + int push; + /* current level of WQM PUSH operations + * (PUSH, PUSH_ELSE, PUSH_WQM) */ + int push_wqm; + /* current loop level */ + int loop; + + /* current total stack level (in subentries) */ + int depth; + + /* required depth */ + int max_entries; +}; struct r600_bytecode { enum chip_class chip_class; @@ -199,8 +213,7 @@ struct r600_bytecode { uint32_t *bytecode; uint32_t fc_sp; struct r600_cf_stack_entry fc_stack[32]; - unsigned call_sp; - struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; + struct r600_stack_info stack; unsigned ar_loaded; unsigned ar_reg; unsigned ar_chan; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8642463..cc4a8ed 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -234,7 +234,7 @@ struct r600_shader_tgsi_instruction { static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); -static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only); +static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); static int tgsi_else(struct r600_shader_ctx *ctx); static int tgsi_endif(struct r600_shader_ctx *ctx); @@ -412,7 +412,7 @@ static void llvm_if(struct r600_shader_ctx *ctx) { r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); fc_pushlevel(ctx, FC_IF); - callstack_check_depth(ctx, FC_PUSH_VPM, 0); + callstack_push(ctx, FC_PUSH_VPM); } static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx) @@ -5522,63 +5522,110 @@ static int pops(struct r600_shader_ctx *ctx, int pops) return 0; } -static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) +static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, + unsigned reason) +{ + struct r600_stack_info *stack = &ctx->bc->stack; + unsigned elements, entries; + + elements = (stack->loop + stack->push_wqm ) * CF_STACK_ENTRY_SIZE; + elements += stack->push; + + switch (ctx->bc->chip_class) { + case R600: + case R700: + /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on + * the stack must be reserved to hold the current active/continue + * masks */ + if (reason == FC_PUSH_VPM) { + elements += 2; + } + break; + + case CAYMAN: + /* r9xx: any stack operation on empty stack consumes 2 additional + * elements */ + elements += 2; + + /* fallthrough */ + /* FIXME: do the two elements added above cover the cases for the + * r8xx+ below? */ + + case EVERGREEN: + /* r8xx+: 2 extra elements are not always required, but one extra + * element must be added for each of the following cases: + * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest + * stack usage. + * Currently we don't use ALU_ELSE_AFTER. + * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM + * PUSH instruction executed. + * + * NOTE: it seems we also need to reserve additional element when + * the non-zero stack depth mod ENTRY_SIZE is equal to 0 */ + if (reason == FC_PUSH_VPM && + (stack->loop || stack->push_wqm || + (stack->depth && + (stack->depth & (CF_STACK_ENTRY_SIZE - 1)) == 0))) { + elements += 1; + } + break; + + default: + assert(0); + break; + } + + entries = (elements + (CF_STACK_ENTRY_SIZE - 1)) / CF_STACK_ENTRY_SIZE; + + if (entries > stack->max_entries) + stack->max_entries = entries; +} + +static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) { switch(reason) { case FC_PUSH_VPM: - ctx->bc->callstack[ctx->bc->call_sp].current--; + --ctx->bc->stack.push; + --ctx->bc->stack.depth; + assert(ctx->bc->stack.push >= 0); break; case FC_PUSH_WQM: + --ctx->bc->stack.push_wqm; + ctx->bc->stack.depth -= CF_STACK_ENTRY_SIZE; + assert(ctx->bc->stack.push_wqm >= 0); + break; case FC_LOOP: - ctx->bc->callstack[ctx->bc->call_sp].current -= 4; + --ctx->bc->stack.loop; + ctx->bc->stack.depth -= CF_STACK_ENTRY_SIZE; + assert(ctx->bc->stack.loop >= 0); break; - case FC_REP: - /* TOODO : for 16 vp asic should -= 2; */ - ctx->bc->callstack[ctx->bc->call_sp].current --; + default: + assert(0); break; } + + assert(ctx->bc->stack.depth >= 0); } -static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) +static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) { - if (check_max_only) { - int diff; - switch (reason) { - case FC_PUSH_VPM: - diff = 1; - break; - case FC_PUSH_WQM: - diff = 4; - break; - default: - assert(0); - diff = 0; - } - if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > - ctx->bc->callstack[ctx->bc->call_sp].max) { - ctx->bc->callstack[ctx->bc->call_sp].max = - ctx->bc->callstack[ctx->bc->call_sp].current + diff; - } - return; - } switch (reason) { case FC_PUSH_VPM: - ctx->bc->callstack[ctx->bc->call_sp].current++; + ++ctx->bc->stack.push; + ++ctx->bc->stack.depth; break; case FC_PUSH_WQM: + ++ctx->bc->stack.push_wqm; + ctx->bc->stack.depth += CF_STACK_ENTRY_SIZE; case FC_LOOP: - ctx->bc->callstack[ctx->bc->call_sp].current += 4; - break; - case FC_REP: - ctx->bc->callstack[ctx->bc->call_sp].current++; + ++ctx->bc->stack.loop; + ctx->bc->stack.depth += CF_STACK_ENTRY_SIZE; break; + default: + assert(0); } - if ((ctx->bc->callstack[ctx->bc->call_sp].current) > - ctx->bc->callstack[ctx->bc->call_sp].max) { - ctx->bc->callstack[ctx->bc->call_sp].max = - ctx->bc->callstack[ctx->bc->call_sp].current; - } + callstack_update_max_depth(ctx, reason); } static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) @@ -5665,7 +5712,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) fc_pushlevel(ctx, FC_IF); - callstack_check_depth(ctx, FC_PUSH_VPM, 0); + callstack_push(ctx, FC_PUSH_VPM); return 0; } @@ -5695,7 +5742,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) } fc_poplevel(ctx); - callstack_decrease_current(ctx, FC_PUSH_VPM); + callstack_pop(ctx, FC_PUSH_VPM); return 0; } @@ -5708,7 +5755,7 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx) fc_pushlevel(ctx, FC_LOOP); /* check stack depth */ - callstack_check_depth(ctx, FC_LOOP, 0); + callstack_push(ctx, FC_LOOP); return 0; } @@ -5737,7 +5784,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) } /* XXX add LOOPRET support */ fc_poplevel(ctx); - callstack_decrease_current(ctx, FC_LOOP); + callstack_pop(ctx, FC_LOOP); return 0; } @@ -5760,7 +5807,6 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) fc_set_mid(ctx, fscp); - callstack_check_depth(ctx, FC_PUSH_VPM, 1); return 0; } -- 1.8.1.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev