From: Marek Olšák <marek.ol...@amd.com> This and the previous commit decrease IB sizes and the number of si_update_shaders invocations as follows:
IB size si_update_shader calls Borderlands 2 -10% -27% Deus Ex: MD -5% -11% Talos Principle -8% -30% --- src/gallium/drivers/radeonsi/si_state.c | 68 ++++++++++++++++++++++--- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 24 +++++++-- 3 files changed, 80 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 5e5f564..323ec5e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); } static void si_bind_blend_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; - si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); - si_mark_atom_dirty(sctx, &sctx->cb_render_state); - sctx->do_update_shaders = true; + struct si_state_blend *old_blend = sctx->queued.named.blend; + struct si_state_blend *blend = (struct si_state_blend *)state; + + if (!state) + return; + + if (!old_blend || + old_blend->cb_target_mask != blend->cb_target_mask || + old_blend->dual_src_blend != blend->dual_src_blend) + si_mark_atom_dirty(sctx, &sctx->cb_render_state); + + si_pm4_bind_state(sctx, blend, state); + + if (!old_blend || + old_blend->cb_target_mask != blend->cb_target_mask || + old_blend->alpha_to_coverage != blend->alpha_to_coverage || + old_blend->alpha_to_one != blend->alpha_to_one || + old_blend->dual_src_blend != blend->dual_src_blend || + old_blend->blend_enable_4bit != blend->blend_enable_4bit || + old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) + sctx->do_update_shaders = true; } static void si_delete_blend_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); } static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) @@ -914,24 +932,41 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) } sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz); si_pm4_bind_state(sctx, rasterizer, rs); si_update_poly_offset_state(sctx); - si_mark_atom_dirty(sctx, &sctx->clip_regs); + if (!old_rs || + old_rs->clip_plane_enable != rs->clip_plane_enable || + old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) + si_mark_atom_dirty(sctx, &sctx->clip_regs); + sctx->ia_multi_vgt_param_key.u.line_stipple_enabled = rs->line_stipple_enable; - sctx->do_update_shaders = true; + + if (!old_rs || + old_rs->clip_plane_enable != rs->clip_plane_enable || + old_rs->rasterizer_discard != rs->rasterizer_discard || + old_rs->sprite_coord_enable != rs->sprite_coord_enable || + old_rs->flatshade != rs->flatshade || + old_rs->two_side != rs->two_side || + old_rs->multisample_enable != rs->multisample_enable || + old_rs->poly_stipple_enable != rs->poly_stipple_enable || + old_rs->poly_smooth != rs->poly_smooth || + old_rs->line_smooth != rs->line_smooth || + old_rs->clamp_fragment_color != rs->clamp_fragment_color || + old_rs->force_persample_interp != rs->force_persample_interp) + sctx->do_update_shaders = true; } static void si_delete_rs_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; if (sctx->queued.named.rasterizer == state) si_pm4_bind_state(sctx, poly_offset, NULL); si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); } @@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct pipe_context *ctx, si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); } return dsa; } static void si_bind_dsa_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; + struct si_state_dsa *old_dsa = sctx->queued.named.dsa; struct si_state_dsa *dsa = state; if (!state) return; si_pm4_bind_state(sctx, dsa, dsa); if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, sizeof(struct si_dsa_stencil_ref_part)) != 0) { sctx->stencil_ref.dsa_part = dsa->stencil_ref; si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); } - sctx->do_update_shaders = true; + + if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func) + sctx->do_update_shaders = true; } static void si_delete_dsa_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); } static void *si_create_db_flush_dsa(struct si_context *sctx) { @@ -2437,20 +2475,21 @@ static void si_framebuffer_cache_flush(struct si_context *sctx) static void si_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { struct si_context *sctx = (struct si_context *)ctx; struct pipe_constant_buffer constbuf = {0}; struct r600_surface *surf = NULL; struct r600_texture *rtex; bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; unsigned old_nr_samples = sctx->framebuffer.nr_samples; + unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; bool unbound = false; int i; /* Fast path for linear <-> sRGB changes and no-op changes. */ if (state->nr_cbufs && state->nr_cbufs == sctx->framebuffer.state.nr_cbufs && state->zsbuf == sctx->framebuffer.state.zsbuf) { struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs; /* See if the surfaces are equivalent. */ @@ -2593,23 +2632,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, surf = (struct r600_surface*)state->zsbuf; rtex = (struct r600_texture*)surf->base.texture; if (!surf->depth_initialized) { si_init_depth_surface(sctx, surf); } r600_context_add_resource_size(ctx, surf->base.texture); } si_update_poly_offset_state(sctx); - si_mark_atom_dirty(sctx, &sctx->cb_render_state); si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); + if (old_colorbuf_enabled_4bit != sctx->framebuffer.colorbuf_enabled_4bit) + si_mark_atom_dirty(sctx, &sctx->cb_render_state); + if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) si_mark_atom_dirty(sctx, &sctx->msaa_config); if (sctx->framebuffer.nr_samples != old_nr_samples) { si_mark_atom_dirty(sctx, &sctx->msaa_config); si_mark_atom_dirty(sctx, &sctx->db_render_state); /* Set sample locations as fragment shader constants. */ switch (sctx->framebuffer.nr_samples) { case 1: @@ -3727,20 +3768,23 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned data_format, num_format; int first_non_void; unsigned vbo_index = elements[i].vertex_buffer_index; unsigned char swizzle[4]; if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { FREE(v); return NULL; } + if (elements[i].instance_divisor) + v->uses_instance_divisors = true; + if (!used[vbo_index]) { v->first_vb_use_mask |= 1 << i; used[vbo_index] = true; } desc = util_format_description(elements[i].src_format); first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; @@ -3840,25 +3884,33 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, S_008F0C_DATA_FORMAT(data_format); } memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); return v; } static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; + struct si_vertex_element *old = sctx->vertex_elements; struct si_vertex_element *v = (struct si_vertex_element*)state; sctx->vertex_elements = v; sctx->vertex_buffers_dirty = true; - sctx->do_update_shaders = true; + + if (v && + (!old || + old->count != v->count || + old->uses_instance_divisors != v->uses_instance_divisors || + v->uses_instance_divisors || /* we don't check which divisors changed */ + memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count))) + sctx->do_update_shaders = true; } static void si_delete_vertex_element(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; if (sctx->vertex_elements == state) sctx->vertex_elements = NULL; FREE(state); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 275f830..4da51be 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -102,20 +102,21 @@ struct si_vertex_element { unsigned count; unsigned first_vb_use_mask; /* Vertex buffer descriptor list size aligned for optimal prefetch. */ unsigned desc_list_byte_size; uint8_t fix_fetch[SI_MAX_ATTRIBS]; uint32_t rsrc_word3[SI_MAX_ATTRIBS]; uint32_t format_size[SI_MAX_ATTRIBS]; struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; + bool uses_instance_divisors; }; union si_state { struct { struct si_state_blend *blend; struct si_state_rasterizer *rasterizer; struct si_state_dsa *dsa; struct si_pm4_state *poly_offset; struct si_pm4_state *ls; struct si_pm4_state *hs; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c21f855..677a6de 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2307,32 +2307,39 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx)); si_set_active_descriptors_for_shader(sctx, sel); si_update_streamout_state(sctx); si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant, si_get_vs(sctx)->cso, si_get_vs_state(sctx)); } static void si_bind_ps_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; + struct si_shader_selector *old_sel = sctx->ps_shader.cso; struct si_shader_selector *sel = state; /* skip if supplied shader is one already in use */ - if (sctx->ps_shader.cso == sel) + if (old_sel == sel) return; sctx->ps_shader.cso = sel; sctx->ps_shader.current = sel ? sel->first_variant : NULL; sctx->do_update_shaders = true; - if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess) - si_update_tess_uses_prim_id(sctx); - si_mark_atom_dirty(sctx, &sctx->cb_render_state); + + if (sel) { + if (sctx->ia_multi_vgt_param_key.u.uses_tess) + si_update_tess_uses_prim_id(sctx); + + if (!old_sel || + old_sel->info.colors_written != sel->info.colors_written) + si_mark_atom_dirty(sctx, &sctx->cb_render_state); + } si_set_active_descriptors_for_shader(sctx, sel); } static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) { if (shader->is_optimized) { util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority, &shader->optimized_ready); util_queue_fence_destroy(&shader->optimized_ready); } @@ -3081,20 +3088,23 @@ static void si_update_vgt_shader_config(struct si_context *sctx) si_pm4_bind_state(sctx, vgt_shader_config, *pm4); } bool si_update_shaders(struct si_context *sctx) { struct pipe_context *ctx = (struct pipe_context*)sctx; struct si_compiler_ctx_state compiler_state; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_shader *old_vs = si_get_vs_state(sctx); bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false; + struct si_shader *old_ps = sctx->ps_shader.current; + unsigned old_spi_shader_col_format = + old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0; int r; compiler_state.tm = sctx->tm; compiler_state.debug = sctx->b.debug; compiler_state.is_debug_context = sctx->is_debug; /* Update stages before GS. */ if (sctx->tes_shader.cso) { if (!sctx->tf_ring) { si_init_tess_factor_ring(sctx); @@ -3205,21 +3215,25 @@ bool si_update_shaders(struct si_context *sctx) S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS); if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || sctx->flatshade != rs->flatshade) { sctx->sprite_coord_enable = rs->sprite_coord_enable; sctx->flatshade = rs->flatshade; si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps)) + if (sctx->screen->b.rbplus_allowed && + si_pm4_state_changed(sctx, ps) && + (!old_ps || + old_spi_shader_col_format != + sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format)) si_mark_atom_dirty(sctx, &sctx->cb_render_state); if (sctx->ps_db_shader_control != db_shader_control) { sctx->ps_db_shader_control = db_shader_control; si_mark_atom_dirty(sctx, &sctx->db_render_state); } if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) { sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->msaa_config); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev