From: Marek Olšák <marek.ol...@amd.com>

si_emit_streamout_end is called directly, it's not a state.

Cc: 19.0 <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_pipe.c            |  2 ++
 src/gallium/drivers/radeonsi/si_pipe.h            |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c      |  2 +-
 src/gallium/drivers/radeonsi/si_state_streamout.c | 10 ++++++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5caeb57..43c4914 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1069,20 +1069,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
 
        sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI &&
                                         sscreen->info.max_se >= 2 &&
                                         !(sscreen->debug_flags & 
DBG(NO_OUT_OF_ORDER));
        sscreen->assume_no_z_fights =
                driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
        sscreen->commutative_blend_add =
                driQueryOptionb(config->options, 
"radeonsi_commutative_blend_add");
        sscreen->clear_db_cache_before_clear =
                driQueryOptionb(config->options, 
"radeonsi_clear_db_cache_before_clear");
+       sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
+                                       sscreen->info.family == CHIP_RAVEN;
        sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= 
CHIP_POLARIS10 &&
                                            sscreen->info.family <= 
CHIP_POLARIS12) ||
                                           sscreen->info.family == CHIP_VEGA10 
||
                                           sscreen->info.family == CHIP_RAVEN;
        sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
                                        sscreen->info.family == CHIP_RAVEN;
        sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2;
 
        /* Only enable primitive binning on APUs by default. */
        sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 301d386..ee53192 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -463,20 +463,21 @@ struct si_screen {
        unsigned                        eqaa_force_coverage_samples;
        unsigned                        eqaa_force_z_samples;
        unsigned                        eqaa_force_color_samples;
        bool                            has_clear_state;
        bool                            has_distributed_tess;
        bool                            has_draw_indirect_multi;
        bool                            has_out_of_order_rast;
        bool                            assume_no_z_fights;
        bool                            commutative_blend_add;
        bool                            clear_db_cache_before_clear;
+       bool                            has_gfx9_scissor_bug;
        bool                            has_msaa_sample_loc_bug;
        bool                            has_ls_vgpr_init_bug;
        bool                            has_dcc_constant_encode;
        bool                            dpbb_allowed;
        bool                            dfsm_allowed;
        bool                            llvm_has_working_vgpr_indexing;
 
        /* Whether shaders are monolithic (1-part) or separate (3-part). */
        bool                            use_monolithic_shaders;
        bool                            record_llvm_ir;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2a514f1..e2fba41 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1220,21 +1220,21 @@ static void si_get_draw_start_count(struct si_context 
*sctx,
 }
 
 static void si_emit_all_states(struct si_context *sctx, const struct 
pipe_draw_info *info,
                               unsigned skip_atom_mask)
 {
        unsigned num_patches = 0;
        /* Vega10/Raven scissor bug workaround. When any context register is
         * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
         * registers must be written too.
         */
-       bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family 
== CHIP_RAVEN) &&
+       bool handle_scissor_bug = sctx->screen->has_gfx9_scissor_bug &&
                                  !si_is_atom_dirty(sctx, 
&sctx->atoms.s.scissors);
        bool context_roll = false; /* set correctly for GFX9 only */
 
        context_roll |= si_emit_rasterizer_prim_state(sctx);
        if (sctx->tes_shader.cso)
                context_roll |= si_emit_derived_tess_state(sctx, info, 
&num_patches);
 
        if (handle_scissor_bug &&
            (info->count_from_stream_output ||
             sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c 
b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 2bf6862..de0b051 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -269,20 +269,21 @@ static void si_emit_streamout_begin(struct si_context 
*sctx)
 
        sctx->streamout.begin_emitted = true;
 }
 
 void si_emit_streamout_end(struct si_context *sctx)
 {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
        struct si_streamout_target **t = sctx->streamout.targets;
        unsigned i;
        uint64_t va;
+       bool context_reg_changed = false;
 
        si_flush_vgt_streamout(sctx);
 
        for (i = 0; i < sctx->streamout.num_targets; i++) {
                if (!t[i])
                        continue;
 
                va = t[i]->buf_filled_size->gpu_address + 
t[i]->buf_filled_size_offset;
                radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
                radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
@@ -296,25 +297,34 @@ void si_emit_streamout_end(struct si_context *sctx)
                radeon_add_to_buffer_list(sctx,  sctx->gfx_cs,
                                          t[i]->buf_filled_size,
                                          RADEON_USAGE_WRITE,
                                          RADEON_PRIO_SO_FILLED_SIZE);
 
                /* Zero the buffer size. The counters (primitives generated,
                 * primitives emitted) may be enabled even if there is not
                 * buffer bound. This ensures that the primitives-emitted query
                 * won't increment. */
                radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 
16*i, 0);
+               context_reg_changed = true;
 
                t[i]->buf_filled_size_valid = true;
        }
 
        sctx->streamout.begin_emitted = false;
+
+       /* If we caused a context roll (= changed context registers),
+        * we need to apply the scissor bug workaround.
+        */
+       if (sctx->screen->has_gfx9_scissor_bug && context_reg_changed) {
+               sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+               si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
+       }
 }
 
 /* STREAMOUT CONFIG DERIVED STATE
  *
  * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
  * The buffer mask is an independent state, so no writes occur if there
  * are no buffers bound.
  */
 
 static void si_emit_streamout_enable(struct si_context *sctx)
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to