From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state_draw.c | 97 ++++++++++++++++------------ 1 file changed, 57 insertions(+), 40 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 3f933fe..ae48115 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1155,28 +1155,80 @@ void si_ce_pre_draw_synchronization(struct si_context *sctx) void si_ce_post_draw_synchronization(struct si_context *sctx) { if (sctx->ce_need_synchronization) { radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 0)); radeon_emit(sctx->b.gfx.cs, 0); /* unused */ sctx->ce_need_synchronization = false; } } +static bool si_cache_flush_and_prefetch(struct si_context *sctx) +{ + /* Flush caches before prefetches. */ + if (sctx->b.flags) + si_emit_cache_flush(sctx); + + /* Dumping from CE to L2 should be done after cache flushes, but + * this is only guaranteed when CE is behind or in-sync with DE. + */ + if (!si_upload_graphics_shader_descriptors(sctx)) + return false; + + if (sctx->prefetch_L2) + cik_emit_prefetch_L2(sctx); + + return true; +} + +static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info) +{ + /* Emit state atoms. */ + unsigned mask = sctx->dirty_atoms; + while (mask) { + struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)]; + + atom->emit(&sctx->b, atom); + } + sctx->dirty_atoms = 0; + + /* Emit states. */ + mask = sctx->dirty_states; + while (mask) { + unsigned i = u_bit_scan(&mask); + struct si_pm4_state *state = sctx->queued.array[i]; + + if (!state || sctx->emitted.array[i] == state) + continue; + + si_pm4_emit(sctx, state); + sctx->emitted.array[i] = state; + } + sctx->dirty_states = 0; + + /* Emit draw states. */ + unsigned num_patches = 0; + + si_emit_rasterizer_prim_state(sctx); + if (sctx->tes_shader.cso) + si_emit_derived_tess_state(sctx, info, &num_patches); + si_emit_vs_state(sctx, info); + si_emit_draw_registers(sctx, info, num_patches); +} + void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_resource *indexbuf = info->index.resource; - unsigned mask, dirty_tex_counter; + unsigned dirty_tex_counter; enum pipe_prim_type rast_prim; - unsigned num_patches = 0; unsigned index_size = info->index_size; unsigned index_offset = info->indirect ? info->start * index_size : 0; if (likely(!info->indirect)) { /* SI-CI treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ if (unlikely(!info->instance_count)) return; @@ -1244,23 +1296,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) { sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix; sctx->do_update_shaders = true; } } if (sctx->do_update_shaders && !si_update_shaders(sctx)) return; - if (!si_upload_graphics_shader_descriptors(sctx)) - return; - if (index_size) { /* Translate or upload, if needed. */ /* 8-bit indices are supported on VI. */ if (sctx->b.chip_class <= CIK && index_size == 1) { unsigned start, count, start_offset, size, offset; void *ptr; si_get_draw_start_count(sctx, info, &start, &count); start_offset = start * 2; size = count * 2; @@ -1335,55 +1384,23 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) */ if (!si_upload_vertex_buffer_descriptors(sctx)) return; /* GFX9 scissor bug workaround. There is also a more efficient but * more involved alternative workaround. */ if (sctx->b.chip_class == GFX9 && si_is_atom_dirty(sctx, &sctx->b.scissors.atom)) sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; - /* Flush caches before the first state atom, which does L2 prefetches. */ - if (sctx->b.flags) - si_emit_cache_flush(sctx); - - if (sctx->prefetch_L2) - cik_emit_prefetch_L2(sctx); - - /* Emit state atoms. */ - mask = sctx->dirty_atoms; - while (mask) { - struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)]; - - atom->emit(&sctx->b, atom); - } - sctx->dirty_atoms = 0; - - /* Emit states. */ - mask = sctx->dirty_states; - while (mask) { - unsigned i = u_bit_scan(&mask); - struct si_pm4_state *state = sctx->queued.array[i]; - - if (!state || sctx->emitted.array[i] == state) - continue; - - si_pm4_emit(sctx, state); - sctx->emitted.array[i] = state; - } - sctx->dirty_states = 0; - - si_emit_rasterizer_prim_state(sctx); - if (sctx->tes_shader.cso) - si_emit_derived_tess_state(sctx, info, &num_patches); - si_emit_vs_state(sctx, info); - si_emit_draw_registers(sctx, info, num_patches); + if (!si_cache_flush_and_prefetch(sctx)) + return; + si_emit_all_states(sctx, info); si_ce_pre_draw_synchronization(sctx); si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset); si_ce_post_draw_synchronization(sctx); if (sctx->trace_buf) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. * It must be done after drawing. */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev