From: Nicolai Hähnle <nicolai.haeh...@amd.com> We do not enable this by default for additive blending, since it slightly breaks OpenGL invariance guarantees due to non-determinism.
Still, there may be some applications can benefit from white-listing via the radeonsi_commutative_blend_add drirc setting without any real visible artifacts. --- src/gallium/drivers/radeonsi/driinfo_radeonsi.h | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 2 + src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 67 +++++++++++++++++++++++-- src/gallium/drivers/radeonsi/si_state.h | 1 + src/util/xmlpool/t_options.h | 5 ++ 6 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h index 8be85289a0c..989e5175cc0 100644 --- a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h +++ b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h @@ -1,5 +1,6 @@ // DriConf options specific to radeonsi DRI_CONF_SECTION_PERFORMANCE DRI_CONF_RADEONSI_ENABLE_SISCHED("false") DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") + DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false") DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index b4972be739c..c44ea3be740 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1043,20 +1043,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, (sscreen->b.chip_class == SI && sscreen->b.info.pfp_fw_version >= 79 && sscreen->b.info.me_fw_version >= 142); sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI; sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI && sscreen->b.info.max_se >= 2 && !(sscreen->b.debug_flags & DBG_NO_OUT_OF_ORDER); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); + sscreen->commutative_blend_add = + driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 && sscreen->b.family <= CHIP_POLARIS12) || sscreen->b.family == CHIP_VEGA10 || sscreen->b.family == CHIP_RAVEN; sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 && !(sscreen->b.debug_flags & DBG_NO_DPBB); sscreen->dfsm_allowed = sscreen->dpbb_allowed && !(sscreen->b.debug_flags & DBG_NO_DFSM); /* While it would be nice not to have this flag, we are constrained diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d200c9f571f..27e2bc81172 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -90,20 +90,21 @@ struct u_suballocator; struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; unsigned tess_offchip_block_dw_size; bool has_clear_state; bool has_distributed_tess; bool has_draw_indirect_multi; bool has_ds_bpermute; bool has_out_of_order_rast; bool assume_no_z_fights; + bool commutative_blend_add; bool has_msaa_sample_loc_bug; bool dpbb_allowed; bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; mtx_t shader_parts_mutex; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index a8af5752771..6a063e7f7a6 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -370,20 +370,62 @@ static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; default: return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; } } +static void si_blend_check_commutativity(struct si_screen *sscreen, + struct si_state_blend *blend, + enum pipe_blend_func func, + enum pipe_blendfactor src, + enum pipe_blendfactor dst, + unsigned chanmask) +{ + /* Src factor is allowed when it does not depend on Dst */ + static const uint32_t src_allowed = + (1u << PIPE_BLENDFACTOR_ONE) | + (1u << PIPE_BLENDFACTOR_SRC_COLOR) | + (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | + (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | + (1u << PIPE_BLENDFACTOR_CONST_COLOR) | + (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | + (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | + (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | + (1u << PIPE_BLENDFACTOR_ZERO) | + (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | + (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | + (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | + (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | + (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | + (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); + + if (dst == PIPE_BLENDFACTOR_ONE && + (src_allowed & (1u << src))) { + /* Addition is commutative, but floating point addition isn't + * associative: subtle changes can be introduced via different + * rounding. + * + * Out-of-order is also non-deterministic, which means that + * this breaks OpenGL invariance requirements. So only enable + * out-of-order additive blending if explicitly allowed by a + * setting. + */ + if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || + (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) + blend->commutative_4bit |= chanmask; + } +} + /** * Get rid of DST in the blend factors by commuting the operands: * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) */ static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor, unsigned expected_dst, unsigned replacement_src) { if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) { @@ -486,20 +528,25 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, /* cb_render_state will disable unused ones */ blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); if (state->rt[j].colormask) blend->cb_target_enabled_4bit |= 0xf << (4 * i); if (!state->rt[j].colormask || !state->rt[j].blend_enable) { si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } + si_blend_check_commutativity(sctx->screen, blend, + eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); + si_blend_check_commutativity(sctx->screen, blend, + eqA, srcA, dstA, 0x8 << (4 * i)); + /* Blending optimizations for RB+. * These transformations don't change the behavior. * * First, get rid of DST in the blend factors: * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) */ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR, PIPE_BLENDFACTOR_SRC_COLOR); si_blend_remove_dst(&eqA, &srcA, &dstA, @@ -629,20 +676,21 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state) (!old_blend || old_blend->alpha_to_coverage != blend->alpha_to_coverage || old_blend->blend_enable_4bit != blend->blend_enable_4bit || old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) si_mark_atom_dirty(sctx, &sctx->dpbb_state); if (sctx->screen->has_out_of_order_rast && (!old_blend || (old_blend->blend_enable_4bit != blend->blend_enable_4bit || old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || + old_blend->commutative_4bit != blend->commutative_4bit || old_blend->logicop_enable != blend->logicop_enable))) si_mark_atom_dirty(sctx, &sctx->msaa_config); } static void si_delete_blend_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); } @@ -3193,26 +3241,37 @@ static bool si_out_of_order_rasterization(struct si_context *sctx) return false; if (sctx->b.num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set) return false; } if (!colormask) return true; - bool blend_enabled = (colormask & blend->blend_enable_4bit) != 0; + unsigned blendmask = colormask & blend->blend_enable_4bit; - if (blend_enabled) - return false; /* TODO */ + if (blendmask) { + /* Only commutative blending. */ + if (blendmask & ~blend->commutative_4bit) + return false; + + if (!dsa_order_invariant.pass_set) + return false; + } + + if (colormask & ~blendmask) { + if (!dsa_order_invariant.pass_last) + return false; + } - return dsa_order_invariant.pass_last; + return true; } static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; /* 33% faster rendering to linear color buffers */ bool dst_is_linear = sctx->framebuffer.any_dst_linear; bool out_of_order_rast = si_out_of_order_rasterization(sctx); unsigned sc_mode_cntl_1 = diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 56e597a5813..5aa50c58932 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -48,20 +48,21 @@ struct si_shader_selector; struct si_state_blend { struct si_pm4_state pm4; uint32_t cb_target_mask; /* Set 0xf or 0x0 (4 bits) per render target if the following is * true. ANDed with spi_shader_col_format. */ unsigned cb_target_enabled_4bit; unsigned blend_enable_4bit; unsigned need_src_alpha_4bit; + unsigned commutative_4bit; bool alpha_to_coverage:1; bool alpha_to_one:1; bool dual_src_blend:1; bool logicop_enable:1; }; struct si_state_rasterizer { struct si_pm4_state pm4; /* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */ struct si_pm4_state *pm4_poly_offset; diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h index c92215183a5..214c7c359ee 100644 --- a/src/util/xmlpool/t_options.h +++ b/src/util/xmlpool/t_options.h @@ -436,10 +436,15 @@ DRI_CONF_OPT_END #define DRI_CONF_RADEONSI_ENABLE_SISCHED(def) \ DRI_CONF_OPT_BEGIN_B(radeonsi_enable_sisched, def) \ DRI_CONF_DESC(en,gettext("Use the LLVM sisched option for shader compiles")) \ DRI_CONF_OPT_END #define DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS(def) \ DRI_CONF_OPT_BEGIN_B(radeonsi_assume_no_z_fights, def) \ DRI_CONF_DESC(en,gettext("Assume no Z fights (enables aggressive out-of-order rasterization to improve performance; may cause rendering errors)")) \ DRI_CONF_OPT_END + +#define DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD(def) \ +DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \ + DRI_CONF_DESC(en,gettext("Commutative additive blending optimizations (may cause rendering errors)")) \ +DRI_CONF_OPT_END -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev