From: Marek Olšák <marek.ol...@amd.com> Do KILL at the end of shaders so as not to break WQM.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100070 --- src/gallium/drivers/radeon/r600_pipe_common.c | 5 +++++ src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 9 +++++++++ src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 21 +++++++++++++++++++++ 5 files changed, 37 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 04f7fc1..fd67d9a 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -1375,20 +1375,25 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, r600_init_screen_texture_functions(rscreen); r600_init_screen_query_functions(rscreen); rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); rscreen->has_rbplus = false; rscreen->rbplus_allowed = false; + /* Set the flag in debug_flags, so that the shader cache takes it + * into account. */ + if (flags & PIPE_SCREEN_ENABLE_CORRECT_TGSI_DERIVATIVES_AFTER_KILL) + rscreen->debug_flags |= DBG_FS_CORRECT_DERIVS_AFTER_KILL; + r600_disk_cache_create(rscreen); slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (rscreen->force_aniso >= 0) { printf("radeon: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(rscreen->force_aniso)); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index e67982a..b22a3a7 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -78,20 +78,21 @@ #define DBG_PS (1 << 8) #define DBG_CS (1 << 9) #define DBG_TCS (1 << 10) #define DBG_TES (1 << 11) #define DBG_NO_IR (1 << 12) #define DBG_NO_TGSI (1 << 13) #define DBG_NO_ASM (1 << 14) #define DBG_PREOPT_IR (1 << 15) #define DBG_CHECK_IR (1 << 16) #define DBG_NO_OPT_VARIANT (1 << 17) +#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18) /* gaps */ #define DBG_TEST_DMA (1 << 20) /* Bits 21-31 are reserved for the r600g driver. */ /* features */ #define DBG_NO_ASYNC_DMA (1llu << 32) #define DBG_NO_HYPERZ (1llu << 33) #define DBG_NO_DISCARD_RANGE (1llu << 34) #define DBG_NO_2D_TILING (1llu << 35) #define DBG_NO_TILING (1llu << 36) #define DBG_SWITCH_ON_EOP (1llu << 37) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c9fe250..108386e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3242,20 +3242,23 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base) struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader *shader = ctx->shader; struct tgsi_shader_info *info = &shader->selector->info; LLVMBuilderRef builder = ctx->gallivm.builder; unsigned i, j, first_vgpr, vgpr; LLVMValueRef color[8][4] = {}; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; LLVMValueRef ret; + if (ctx->postponed_kill) + ac_build_kill(&ctx->ac, LLVMBuildLoad(builder, ctx->postponed_kill, "")); + /* Read the output values. */ for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; switch (semantic_name) { case TGSI_SEMANTIC_COLOR: assert(semantic_index < 8); for (j = 0; j < 4; j++) { LLVMValueRef ptr = ctx->outputs[i][j]; @@ -5511,20 +5514,26 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, if (ctx->type == PIPE_SHADER_GEOMETRY) { int i; for (i = 0; i < 4; i++) { ctx->gs_next_vertex[i] = lp_build_alloca(&ctx->gallivm, ctx->i32, ""); } } + if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill && + ctx->screen->b.debug_flags & DBG_FS_CORRECT_DERIVS_AFTER_KILL) { + /* This is initialized to 0.0 = not kill. */ + ctx->postponed_kill = lp_build_alloca(&ctx->gallivm, ctx->f32, ""); + } + if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); return false; } si_llvm_build_ret(ctx, ctx->return_value); return true; } /** diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 5ccde71..3556e69 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -210,20 +210,21 @@ struct si_shader_context { unsigned range_md_kind; unsigned fpmath_md_kind; LLVMValueRef fpmath_md_2p5_ulp; /* Preloaded descriptors. */ LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef lds; LLVMValueRef gs_next_vertex[4]; + LLVMValueRef postponed_kill; LLVMValueRef return_value; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i32; LLVMTypeRef i64; LLVMTypeRef i128; LLVMTypeRef f32; LLVMTypeRef v2i32; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 9fa56c7..12f8de4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -53,20 +53,41 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, emit_data->args[0] = LLVMBuildSelect(builder, conds[0], lp_build_const_float(gallivm, -1.0f), bld_base->base.zero, ""); } static void kil_emit(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMBuilderRef builder = ctx->gallivm.builder; + + if (ctx->postponed_kill) { + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) { + LLVMValueRef val; + + /* Take the minimum kill value. This is the same as OR + * between 2 kill values. If the value is negative, + * the pixel will be killed. + */ + val = LLVMBuildLoad(builder, ctx->postponed_kill, ""); + val = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN, + val, emit_data->args[0]); + LLVMBuildStore(builder, val, ctx->postponed_kill); + } else { + LLVMBuildStore(builder, + LLVMConstReal(ctx->f32, -1), + ctx->postponed_kill); + } + return; + } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) ac_build_kill(&ctx->ac, emit_data->args[0]); else ac_build_kill(&ctx->ac, NULL); } static void emit_icmp(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev