From: Marek Olšák <marek.ol...@amd.com>

also move it to draw_vbo, because it should be 0 in most cases
---
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 +
 src/gallium/drivers/radeonsi/si_pm4.c        | 16 +---------------
 src/gallium/drivers/radeonsi/si_pm4.h        |  1 -
 src/gallium/drivers/radeonsi/si_state.h      |  3 +++
 src/gallium/drivers/radeonsi/si_state_draw.c | 17 +++++++++++++++--
 5 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b6474e6..da6aca1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -221,20 +221,21 @@ struct si_context {
 
        struct si_shader_ctx_state      fixed_func_tcs_shader;
        LLVMTargetMachineRef            tm; /* only non-threaded compilation */
        bool                            gfx_flush_in_progress;
        bool                            compute_is_busy;
 
        /* Atoms (direct states). */
        union si_state_atoms            atoms;
        unsigned                        dirty_atoms; /* mask */
        /* PM4 states (precomputed immutable states) */
+       unsigned                        dirty_states;
        union si_state                  queued;
        union si_state                  emitted;
 
        /* Atom declarations. */
        struct r600_atom                prefetch_L2;
        struct si_framebuffer           framebuffer;
        struct si_sample_locs           msaa_sample_locs;
        struct r600_atom                db_render_state;
        struct r600_atom                msaa_config;
        struct si_sample_mask           sample_mask;
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c 
b/src/gallium/drivers/radeonsi/si_pm4.c
index 97b6799..2680439 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -22,22 +22,20 @@
  *
  * Authors:
  *      Christian König <christian.koe...@amd.com>
  */
 
 #include "radeon/r600_cs.h"
 #include "util/u_memory.h"
 #include "si_pipe.h"
 #include "sid.h"
 
-#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state 
*))
-
 void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
 {
        state->last_opcode = opcode;
        state->last_pm4 = state->ndw++;
 }
 
 void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
 {
        state->pm4[state->ndw++] = dw;
 }
@@ -150,36 +148,24 @@ void si_pm4_emit(struct si_context *sctx, struct 
si_pm4_state *state)
                                          RADEON_USAGE_READ,
                                           RADEON_PRIO_IB2);
 
                radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
                radeon_emit(cs, ib->gpu_address);
                radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
                radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
        }
 }
 
-void si_pm4_emit_dirty(struct si_context *sctx)
-{
-       for (int i = 0; i < NUMBER_OF_STATES; ++i) {
-               struct si_pm4_state *state = sctx->queued.array[i];
-
-               if (!state || sctx->emitted.array[i] == state)
-                       continue;
-
-               si_pm4_emit(sctx, state);
-               sctx->emitted.array[i] = state;
-       }
-}
-
 void si_pm4_reset_emitted(struct si_context *sctx)
 {
        memset(&sctx->emitted, 0, sizeof(sctx->emitted));
+       sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES);
 }
 
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
                                   struct si_pm4_state *state)
 {
        struct pipe_screen *screen = sctx->b.b.screen;
        unsigned aligned_ndw = align(state->ndw, 8);
 
        /* only supported on CIK and later */
        if (sctx->b.chip_class < CIK)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h 
b/src/gallium/drivers/radeonsi/si_pm4.h
index 9b02a80..106abe1 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -71,14 +71,13 @@ void si_pm4_add_bo(struct si_pm4_state *state,
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
                                   struct si_pm4_state *state);
 
 void si_pm4_clear_state(struct si_pm4_state *state);
 void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
                       struct si_pm4_state *state,
                       unsigned idx);
 
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
-void si_pm4_emit_dirty(struct si_context *sctx);
 void si_pm4_reset_emitted(struct si_context *sctx);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 915a8eb..bdcfb5b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -122,20 +122,22 @@ union si_state {
                struct si_pm4_state             *hs;
                struct si_pm4_state             *es;
                struct si_pm4_state             *gs;
                struct si_pm4_state             *vgt_shader_config;
                struct si_pm4_state             *vs;
                struct si_pm4_state             *ps;
        } named;
        struct si_pm4_state     *array[0];
 };
 
+#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
+
 union si_state_atoms {
        struct {
                /* The order matters. */
                struct r600_atom *prefetch_L2;
                struct r600_atom *render_cond;
                struct r600_atom *streamout_begin;
                struct r600_atom *streamout_enable; /* must be after 
streamout_begin */
                struct r600_atom *framebuffer;
                struct r600_atom *msaa_sample_locs;
                struct r600_atom *db_render_state;
@@ -260,20 +262,21 @@ struct si_buffer_resources {
 
 #define si_pm4_block_idx(member) \
        (offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
 
 #define si_pm4_state_changed(sctx, member) \
        ((sctx)->queued.named.member != (sctx)->emitted.named.member)
 
 #define si_pm4_bind_state(sctx, member, value) \
        do { \
                (sctx)->queued.named.member = (value); \
+               (sctx)->dirty_states |= 1 << si_pm4_block_idx(member); \
        } while(0)
 
 #define si_pm4_delete_state(sctx, member, value) \
        do { \
                if ((sctx)->queued.named.member == (value)) { \
                        (sctx)->queued.named.member = NULL; \
                } \
                si_pm4_free_state(sctx, (struct si_pm4_state *)(value), \
                                  si_pm4_block_idx(member)); \
        } while(0)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index c80d4d6..cce5f30 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1109,30 +1109,43 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
         * this must be called after si_need_cs_space, because we must let
         * need_cs_space flush before we add buffers to the buffer list.
         */
        if (!si_upload_vertex_buffer_descriptors(sctx))
                return;
 
        /* Flush caches before the first state atom, which does L2 prefetches. 
*/
        if (sctx->b.flags)
                si_emit_cache_flush(sctx);
 
-       /* Emit states. */
+       /* Emit state atoms. */
        mask = sctx->dirty_atoms;
        while (mask) {
                struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
 
                atom->emit(&sctx->b, atom);
        }
        sctx->dirty_atoms = 0;
 
-       si_pm4_emit_dirty(sctx);
+       /* Emit states. */
+       mask = sctx->dirty_states;
+       while (mask) {
+               unsigned i = u_bit_scan(&mask);
+               struct si_pm4_state *state = sctx->queued.array[i];
+
+               if (!state || sctx->emitted.array[i] == state)
+                       continue;
+
+               si_pm4_emit(sctx, state);
+               sctx->emitted.array[i] = state;
+       }
+       sctx->dirty_states = 0;
+
        si_emit_scratch_reloc(sctx);
        si_emit_rasterizer_prim_state(sctx);
        si_emit_draw_registers(sctx, info);
 
        si_ce_pre_draw_synchronization(sctx);
 
        si_emit_draw_packets(sctx, info, &ib);
 
        si_ce_post_draw_synchronization(sctx);
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to