From: Marek Olšák <marek.ol...@amd.com> si_emit_streamout_end is called directly, it's not a state.
Cc: 19.0 <mesa-sta...@lists.freedesktop.org> --- src/gallium/drivers/radeonsi/si_pipe.c | 2 ++ src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 2 +- src/gallium/drivers/radeonsi/si_state_streamout.c | 10 ++++++++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5caeb57..43c4914 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1069,20 +1069,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); sscreen->commutative_blend_add = driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); sscreen->clear_db_cache_before_clear = driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear"); + sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 || + sscreen->info.family == CHIP_RAVEN; sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && sscreen->info.family <= CHIP_POLARIS12) || sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; /* Only enable primitive binning on APUs by default. */ sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 301d386..ee53192 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -463,20 +463,21 @@ struct si_screen { unsigned eqaa_force_coverage_samples; unsigned eqaa_force_z_samples; unsigned eqaa_force_color_samples; bool has_clear_state; bool has_distributed_tess; bool has_draw_indirect_multi; bool has_out_of_order_rast; bool assume_no_z_fights; bool commutative_blend_add; bool clear_db_cache_before_clear; + bool has_gfx9_scissor_bug; bool has_msaa_sample_loc_bug; bool has_ls_vgpr_init_bug; bool has_dcc_constant_encode; bool dpbb_allowed; bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 2a514f1..e2fba41 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1220,21 +1220,21 @@ static void si_get_draw_start_count(struct si_context *sctx, } static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info, unsigned skip_atom_mask) { unsigned num_patches = 0; /* Vega10/Raven scissor bug workaround. When any context register is * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR * registers must be written too. */ - bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) && + bool handle_scissor_bug = sctx->screen->has_gfx9_scissor_bug && !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors); bool context_roll = false; /* set correctly for GFX9 only */ context_roll |= si_emit_rasterizer_prim_state(sctx); if (sctx->tes_shader.cso) context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches); if (handle_scissor_bug && (info->count_from_stream_output || sctx->dirty_atoms & si_atoms_that_always_roll_context() || diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 2bf6862..de0b051 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -269,20 +269,21 @@ static void si_emit_streamout_begin(struct si_context *sctx) sctx->streamout.begin_emitted = true; } void si_emit_streamout_end(struct si_context *sctx) { struct radeon_cmdbuf *cs = sctx->gfx_cs; struct si_streamout_target **t = sctx->streamout.targets; unsigned i; uint64_t va; + bool context_reg_changed = false; si_flush_vgt_streamout(sctx); for (i = 0; i < sctx->streamout.num_targets; i++) { if (!t[i]) continue; va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | @@ -296,25 +297,34 @@ void si_emit_streamout_end(struct si_context *sctx) radeon_add_to_buffer_list(sctx, sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE); /* Zero the buffer size. The counters (primitives generated, * primitives emitted) may be enabled even if there is not * buffer bound. This ensures that the primitives-emitted query * won't increment. */ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); + context_reg_changed = true; t[i]->buf_filled_size_valid = true; } sctx->streamout.begin_emitted = false; + + /* If we caused a context roll (= changed context registers), + * we need to apply the scissor bug workaround. + */ + if (sctx->screen->has_gfx9_scissor_bug && context_reg_changed) { + sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); + } } /* STREAMOUT CONFIG DERIVED STATE * * Streamout must be enabled for the PRIMITIVES_GENERATED query to work. * The buffer mask is an independent state, so no writes occur if there * are no buffers bound. */ static void si_emit_streamout_enable(struct si_context *sctx) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev