From: Marek Olšák <marek.ol...@amd.com> also move it to draw_vbo, because it should be 0 in most cases --- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_pm4.c | 16 +--------------- src/gallium/drivers/radeonsi/si_pm4.h | 1 - src/gallium/drivers/radeonsi/si_state.h | 3 +++ src/gallium/drivers/radeonsi/si_state_draw.c | 17 +++++++++++++++-- 5 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b6474e6..da6aca1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -221,20 +221,21 @@ struct si_context { struct si_shader_ctx_state fixed_func_tcs_shader; LLVMTargetMachineRef tm; /* only non-threaded compilation */ bool gfx_flush_in_progress; bool compute_is_busy; /* Atoms (direct states). */ union si_state_atoms atoms; unsigned dirty_atoms; /* mask */ /* PM4 states (precomputed immutable states) */ + unsigned dirty_states; union si_state queued; union si_state emitted; /* Atom declarations. */ struct r600_atom prefetch_L2; struct si_framebuffer framebuffer; struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 97b6799..2680439 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -22,22 +22,20 @@ * * Authors: * Christian König <christian.koe...@amd.com> */ #include "radeon/r600_cs.h" #include "util/u_memory.h" #include "si_pipe.h" #include "sid.h" -#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) - void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) { state->last_opcode = opcode; state->last_pm4 = state->ndw++; } void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) { state->pm4[state->ndw++] = dw; } @@ -150,36 +148,24 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) RADEON_USAGE_READ, RADEON_PRIO_IB2); radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); radeon_emit(cs, ib->gpu_address); radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff); radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff); } } -void si_pm4_emit_dirty(struct si_context *sctx) -{ - for (int i = 0; i < NUMBER_OF_STATES; ++i) { - struct si_pm4_state *state = sctx->queued.array[i]; - - if (!state || sctx->emitted.array[i] == state) - continue; - - si_pm4_emit(sctx, state); - sctx->emitted.array[i] = state; - } -} - void si_pm4_reset_emitted(struct si_context *sctx) { memset(&sctx->emitted, 0, sizeof(sctx->emitted)); + sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES); } void si_pm4_upload_indirect_buffer(struct si_context *sctx, struct si_pm4_state *state) { struct pipe_screen *screen = sctx->b.b.screen; unsigned aligned_ndw = align(state->ndw, 8); /* only supported on CIK and later */ if (sctx->b.chip_class < CIK) diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 9b02a80..106abe1 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -71,14 +71,13 @@ void si_pm4_add_bo(struct si_pm4_state *state, void si_pm4_upload_indirect_buffer(struct si_context *sctx, struct si_pm4_state *state); void si_pm4_clear_state(struct si_pm4_state *state); void si_pm4_free_state_simple(struct si_pm4_state *state); void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx); void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state); -void si_pm4_emit_dirty(struct si_context *sctx); void si_pm4_reset_emitted(struct si_context *sctx); #endif diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 915a8eb..bdcfb5b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -122,20 +122,22 @@ union si_state { struct si_pm4_state *hs; struct si_pm4_state *es; struct si_pm4_state *gs; struct si_pm4_state *vgt_shader_config; struct si_pm4_state *vs; struct si_pm4_state *ps; } named; struct si_pm4_state *array[0]; }; +#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) + union si_state_atoms { struct { /* The order matters. */ struct r600_atom *prefetch_L2; struct r600_atom *render_cond; struct r600_atom *streamout_begin; struct r600_atom *streamout_enable; /* must be after streamout_begin */ struct r600_atom *framebuffer; struct r600_atom *msaa_sample_locs; struct r600_atom *db_render_state; @@ -260,20 +262,21 @@ struct si_buffer_resources { #define si_pm4_block_idx(member) \ (offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *)) #define si_pm4_state_changed(sctx, member) \ ((sctx)->queued.named.member != (sctx)->emitted.named.member) #define si_pm4_bind_state(sctx, member, value) \ do { \ (sctx)->queued.named.member = (value); \ + (sctx)->dirty_states |= 1 << si_pm4_block_idx(member); \ } while(0) #define si_pm4_delete_state(sctx, member, value) \ do { \ if ((sctx)->queued.named.member == (value)) { \ (sctx)->queued.named.member = NULL; \ } \ si_pm4_free_state(sctx, (struct si_pm4_state *)(value), \ si_pm4_block_idx(member)); \ } while(0) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c80d4d6..cce5f30 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1109,30 +1109,43 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) * this must be called after si_need_cs_space, because we must let * need_cs_space flush before we add buffers to the buffer list. */ if (!si_upload_vertex_buffer_descriptors(sctx)) return; /* Flush caches before the first state atom, which does L2 prefetches. */ if (sctx->b.flags) si_emit_cache_flush(sctx); - /* Emit states. */ + /* Emit state atoms. */ mask = sctx->dirty_atoms; while (mask) { struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)]; atom->emit(&sctx->b, atom); } sctx->dirty_atoms = 0; - si_pm4_emit_dirty(sctx); + /* Emit states. */ + mask = sctx->dirty_states; + while (mask) { + unsigned i = u_bit_scan(&mask); + struct si_pm4_state *state = sctx->queued.array[i]; + + if (!state || sctx->emitted.array[i] == state) + continue; + + si_pm4_emit(sctx, state); + sctx->emitted.array[i] = state; + } + sctx->dirty_states = 0; + si_emit_scratch_reloc(sctx); si_emit_rasterizer_prim_state(sctx); si_emit_draw_registers(sctx, info); si_ce_pre_draw_synchronization(sctx); si_emit_draw_packets(sctx, info, &ib); si_ce_post_draw_synchronization(sctx); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev