For the series: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Sat, Sep 9, 2017 at 12:43 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > We do not enable this by default for additive blending, since it slightly > breaks OpenGL invariance guarantees due to non-determinism. > > Still, there may be some applications can benefit from white-listing > via the radeonsi_commutative_blend_add drirc setting without any real > visible artifacts. > --- > src/gallium/drivers/radeonsi/driinfo_radeonsi.h | 1 + > src/gallium/drivers/radeonsi/si_pipe.c | 2 + > src/gallium/drivers/radeonsi/si_pipe.h | 1 + > src/gallium/drivers/radeonsi/si_state.c | 67 > +++++++++++++++++++++++-- > src/gallium/drivers/radeonsi/si_state.h | 1 + > src/util/xmlpool/t_options.h | 5 ++ > 6 files changed, 73 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > index 8be85289a0c..989e5175cc0 100644 > --- a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > +++ b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > @@ -1,5 +1,6 @@ > // DriConf options specific to radeonsi > DRI_CONF_SECTION_PERFORMANCE > DRI_CONF_RADEONSI_ENABLE_SISCHED("false") > DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") > + DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false") > DRI_CONF_SECTION_END > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index b4972be739c..c44ea3be740 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -1043,20 +1043,22 @@ struct pipe_screen *radeonsi_screen_create(struct > radeon_winsys *ws, > (sscreen->b.chip_class == SI && > sscreen->b.info.pfp_fw_version >= 79 && > sscreen->b.info.me_fw_version >= 142); > > sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI; > sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI && > sscreen->b.info.max_se >= 2 && > !(sscreen->b.debug_flags & > DBG_NO_OUT_OF_ORDER); > sscreen->assume_no_z_fights = > driQueryOptionb(config->options, > "radeonsi_assume_no_z_fights"); > + sscreen->commutative_blend_add = > + driQueryOptionb(config->options, > "radeonsi_commutative_blend_add"); > sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= > CHIP_POLARIS10 && > sscreen->b.family <= > CHIP_POLARIS12) || > sscreen->b.family == CHIP_VEGA10 || > sscreen->b.family == CHIP_RAVEN; > sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 && > !(sscreen->b.debug_flags & DBG_NO_DPBB); > sscreen->dfsm_allowed = sscreen->dpbb_allowed && > !(sscreen->b.debug_flags & DBG_NO_DFSM); > > /* While it would be nice not to have this flag, we are constrained > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index d200c9f571f..27e2bc81172 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -90,20 +90,21 @@ struct u_suballocator; > struct si_screen { > struct r600_common_screen b; > unsigned gs_table_depth; > unsigned tess_offchip_block_dw_size; > bool has_clear_state; > bool has_distributed_tess; > bool has_draw_indirect_multi; > bool has_ds_bpermute; > bool has_out_of_order_rast; > bool assume_no_z_fights; > + bool commutative_blend_add; > bool has_msaa_sample_loc_bug; > bool dpbb_allowed; > bool dfsm_allowed; > bool llvm_has_working_vgpr_indexing; > > /* Whether shaders are monolithic (1-part) or separate (3-part). */ > bool use_monolithic_shaders; > bool record_llvm_ir; > > mtx_t shader_parts_mutex; > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index a8af5752771..6a063e7f7a6 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -370,20 +370,62 @@ static uint32_t si_translate_blend_opt_factor(int > blend_fact, bool is_alpha) > case PIPE_BLENDFACTOR_INV_SRC_ALPHA: > return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; > case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: > return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE > : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; > default: > return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; > } > } > > +static void si_blend_check_commutativity(struct si_screen *sscreen, > + struct si_state_blend *blend, > + enum pipe_blend_func func, > + enum pipe_blendfactor src, > + enum pipe_blendfactor dst, > + unsigned chanmask) > +{ > + /* Src factor is allowed when it does not depend on Dst */ > + static const uint32_t src_allowed = > + (1u << PIPE_BLENDFACTOR_ONE) | > + (1u << PIPE_BLENDFACTOR_SRC_COLOR) | > + (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | > + (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | > + (1u << PIPE_BLENDFACTOR_CONST_COLOR) | > + (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | > + (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | > + (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | > + (1u << PIPE_BLENDFACTOR_ZERO) | > + (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | > + (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | > + (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | > + (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | > + (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | > + (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); > + > + if (dst == PIPE_BLENDFACTOR_ONE && > + (src_allowed & (1u << src))) { > + /* Addition is commutative, but floating point addition isn't > + * associative: subtle changes can be introduced via different > + * rounding. > + * > + * Out-of-order is also non-deterministic, which means that > + * this breaks OpenGL invariance requirements. So only enable > + * out-of-order additive blending if explicitly allowed by a > + * setting. > + */ > + if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || > + (func == PIPE_BLEND_ADD && > sscreen->commutative_blend_add)) > + blend->commutative_4bit |= chanmask; > + } > +} > + > /** > * Get rid of DST in the blend factors by commuting the operands: > * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) > */ > static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, > unsigned *dst_factor, unsigned expected_dst, > unsigned replacement_src) > { > if (*src_factor == expected_dst && > *dst_factor == PIPE_BLENDFACTOR_ZERO) { > @@ -486,20 +528,25 @@ static void *si_create_blend_state_mode(struct > pipe_context *ctx, > /* cb_render_state will disable unused ones */ > blend->cb_target_mask |= (unsigned)state->rt[j].colormask << > (4 * i); > if (state->rt[j].colormask) > blend->cb_target_enabled_4bit |= 0xf << (4 * i); > > if (!state->rt[j].colormask || !state->rt[j].blend_enable) { > si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * > 4, blend_cntl); > continue; > } > > + si_blend_check_commutativity(sctx->screen, blend, > + eqRGB, srcRGB, dstRGB, 0x7 << (4 > * i)); > + si_blend_check_commutativity(sctx->screen, blend, > + eqA, srcA, dstA, 0x8 << (4 * i)); > + > /* Blending optimizations for RB+. > * These transformations don't change the behavior. > * > * First, get rid of DST in the blend factors: > * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) > */ > si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, > PIPE_BLENDFACTOR_DST_COLOR, > PIPE_BLENDFACTOR_SRC_COLOR); > si_blend_remove_dst(&eqA, &srcA, &dstA, > @@ -629,20 +676,21 @@ static void si_bind_blend_state(struct pipe_context > *ctx, void *state) > (!old_blend || > old_blend->alpha_to_coverage != blend->alpha_to_coverage || > old_blend->blend_enable_4bit != blend->blend_enable_4bit || > old_blend->cb_target_enabled_4bit != > blend->cb_target_enabled_4bit)) > si_mark_atom_dirty(sctx, &sctx->dpbb_state); > > if (sctx->screen->has_out_of_order_rast && > (!old_blend || > (old_blend->blend_enable_4bit != blend->blend_enable_4bit || > old_blend->cb_target_enabled_4bit != > blend->cb_target_enabled_4bit || > + old_blend->commutative_4bit != blend->commutative_4bit || > old_blend->logicop_enable != blend->logicop_enable))) > si_mark_atom_dirty(sctx, &sctx->msaa_config); > } > > static void si_delete_blend_state(struct pipe_context *ctx, void *state) > { > struct si_context *sctx = (struct si_context *)ctx; > si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); > } > > @@ -3193,26 +3241,37 @@ static bool si_out_of_order_rasterization(struct > si_context *sctx) > return false; > > if (sctx->b.num_perfect_occlusion_queries != 0 && > !dsa_order_invariant.pass_set) > return false; > } > > if (!colormask) > return true; > > - bool blend_enabled = (colormask & blend->blend_enable_4bit) != 0; > + unsigned blendmask = colormask & blend->blend_enable_4bit; > > - if (blend_enabled) > - return false; /* TODO */ > + if (blendmask) { > + /* Only commutative blending. */ > + if (blendmask & ~blend->commutative_4bit) > + return false; > + > + if (!dsa_order_invariant.pass_set) > + return false; > + } > + > + if (colormask & ~blendmask) { > + if (!dsa_order_invariant.pass_last) > + return false; > + } > > - return dsa_order_invariant.pass_last; > + return true; > } > > static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom > *atom) > { > struct radeon_winsys_cs *cs = sctx->b.gfx.cs; > unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; > /* 33% faster rendering to linear color buffers */ > bool dst_is_linear = sctx->framebuffer.any_dst_linear; > bool out_of_order_rast = si_out_of_order_rasterization(sctx); > unsigned sc_mode_cntl_1 = > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index 56e597a5813..5aa50c58932 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -48,20 +48,21 @@ struct si_shader_selector; > > struct si_state_blend { > struct si_pm4_state pm4; > uint32_t cb_target_mask; > /* Set 0xf or 0x0 (4 bits) per render target if the following is > * true. ANDed with spi_shader_col_format. > */ > unsigned cb_target_enabled_4bit; > unsigned blend_enable_4bit; > unsigned need_src_alpha_4bit; > + unsigned commutative_4bit; > bool alpha_to_coverage:1; > bool alpha_to_one:1; > bool dual_src_blend:1; > bool logicop_enable:1; > }; > > struct si_state_rasterizer { > struct si_pm4_state pm4; > /* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */ > struct si_pm4_state *pm4_poly_offset; > diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h > index c92215183a5..214c7c359ee 100644 > --- a/src/util/xmlpool/t_options.h > +++ b/src/util/xmlpool/t_options.h > @@ -436,10 +436,15 @@ DRI_CONF_OPT_END > > #define DRI_CONF_RADEONSI_ENABLE_SISCHED(def) \ > DRI_CONF_OPT_BEGIN_B(radeonsi_enable_sisched, def) \ > DRI_CONF_DESC(en,gettext("Use the LLVM sisched option for shader > compiles")) \ > DRI_CONF_OPT_END > > #define DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS(def) \ > DRI_CONF_OPT_BEGIN_B(radeonsi_assume_no_z_fights, def) \ > DRI_CONF_DESC(en,gettext("Assume no Z fights (enables aggressive > out-of-order rasterization to improve performance; may cause rendering > errors)")) \ > DRI_CONF_OPT_END > + > +#define DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD(def) \ > +DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \ > + DRI_CONF_DESC(en,gettext("Commutative additive blending > optimizations (may cause rendering errors)")) \ > +DRI_CONF_OPT_END > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev