Overcautious stack reservation caused significant loss of performance. v2: fix stack depth computation
Signed-off-by: Vadim Girlin <vadimgir...@gmail.com> --- src/gallium/drivers/r600/r600_asm.c | 4 +-- src/gallium/drivers/r600/r600_asm.h | 10 ++---- src/gallium/drivers/r600/r600_shader.c | 64 ++++++++++++---------------------- 3 files changed, 27 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3632aa5..e891921 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1524,8 +1524,8 @@ int r600_bytecode_build(struct r600_bytecode *bc) unsigned addr; int i, r; - if (bc->callstack[0].max > 0) - bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + if (bc->max_stack_depth > 0) + bc->nstack = (bc->max_stack_depth >> 2) + 1; if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { bc->nstack = 1; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 03cd238..93f2dc9 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -173,12 +173,6 @@ struct r600_cf_stack_entry { }; #define SQ_MAX_CALL_DEPTH 0x00000020 -struct r600_cf_callstack { - unsigned fc_sp_before_entry; - int sub_desc_index; - int current; - int max; -}; #define AR_HANDLE_NORMAL 0 #define AR_HANDLE_RV6XX 1 /* except RV670 */ @@ -199,8 +193,8 @@ struct r600_bytecode { uint32_t *bytecode; uint32_t fc_sp; struct r600_cf_stack_entry fc_stack[32]; - unsigned call_sp; - struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; + int stack_depth; + int max_stack_depth; unsigned ar_loaded; unsigned ar_reg; unsigned ar_chan; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 59a7f92..77c8c77 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -234,7 +234,7 @@ struct r600_shader_tgsi_instruction { static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); -static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only); +static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); static int tgsi_else(struct r600_shader_ctx *ctx); static int tgsi_endif(struct r600_shader_ctx *ctx); @@ -412,7 +412,7 @@ static void llvm_if(struct r600_shader_ctx *ctx) { r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); fc_pushlevel(ctx, FC_IF); - callstack_check_depth(ctx, FC_PUSH_VPM, 0); + callstack_push(ctx, FC_PUSH_VPM); } static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx) @@ -5521,62 +5521,45 @@ static int pops(struct r600_shader_ctx *ctx, int pops) return 0; } -static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) +static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) { switch(reason) { case FC_PUSH_VPM: - ctx->bc->callstack[ctx->bc->call_sp].current--; + ctx->bc->stack_depth--; break; case FC_PUSH_WQM: case FC_LOOP: - ctx->bc->callstack[ctx->bc->call_sp].current -= 4; + ctx->bc->stack_depth -= 4; break; case FC_REP: /* TOODO : for 16 vp asic should -= 2; */ - ctx->bc->callstack[ctx->bc->call_sp].current --; + ctx->bc->stack_depth--; break; } + + assert(ctx->bc->stack_depth >= 0); } -static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) +static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) { - if (check_max_only) { - int diff; - switch (reason) { - case FC_PUSH_VPM: - diff = 1; - break; - case FC_PUSH_WQM: - diff = 4; - break; - default: - assert(0); - diff = 0; - } - if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > - ctx->bc->callstack[ctx->bc->call_sp].max) { - ctx->bc->callstack[ctx->bc->call_sp].max = - ctx->bc->callstack[ctx->bc->call_sp].current + diff; - } - return; - } + int diff; switch (reason) { case FC_PUSH_VPM: - ctx->bc->callstack[ctx->bc->call_sp].current++; + diff = 1; break; case FC_PUSH_WQM: case FC_LOOP: - ctx->bc->callstack[ctx->bc->call_sp].current += 4; - break; - case FC_REP: - ctx->bc->callstack[ctx->bc->call_sp].current++; + diff = 4; break; + default: + assert(0); + diff = 0; } - if ((ctx->bc->callstack[ctx->bc->call_sp].current) > - ctx->bc->callstack[ctx->bc->call_sp].max) { - ctx->bc->callstack[ctx->bc->call_sp].max = - ctx->bc->callstack[ctx->bc->call_sp].current; + ctx->bc->stack_depth += diff; + + if (ctx->bc->stack_depth > ctx->bc->max_stack_depth) { + ctx->bc->max_stack_depth = ctx->bc->stack_depth; } } @@ -5664,7 +5647,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) fc_pushlevel(ctx, FC_IF); - callstack_check_depth(ctx, FC_PUSH_VPM, 0); + callstack_push(ctx, FC_PUSH_VPM); return 0; } @@ -5694,7 +5677,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) } fc_poplevel(ctx); - callstack_decrease_current(ctx, FC_PUSH_VPM); + callstack_pop(ctx, FC_PUSH_VPM); return 0; } @@ -5707,7 +5690,7 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx) fc_pushlevel(ctx, FC_LOOP); /* check stack depth */ - callstack_check_depth(ctx, FC_LOOP, 0); + callstack_push(ctx, FC_LOOP); return 0; } @@ -5736,7 +5719,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) } /* XXX add LOOPRET support */ fc_poplevel(ctx); - callstack_decrease_current(ctx, FC_LOOP); + callstack_pop(ctx, FC_LOOP); return 0; } @@ -5759,7 +5742,6 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) fc_set_mid(ctx, fscp); - callstack_check_depth(ctx, FC_PUSH_VPM, 1); return 0; } -- 1.8.1.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev