From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_state.c | 8 +++---- src/gallium/drivers/radeonsi/si_state_binning.c | 4 ++-- src/gallium/drivers/radeonsi/si_state_draw.c | 30 +++++++++++------------- src/gallium/drivers/radeonsi/si_state_shaders.c | 10 ++++---- src/gallium/drivers/radeonsi/si_state_viewport.c | 2 +- 6 files changed, 27 insertions(+), 29 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ee53192..c0211f5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1063,21 +1063,21 @@ struct si_context { unsigned num_vs_flushes; unsigned num_ps_flushes; unsigned num_cs_flushes; unsigned num_cb_cache_flushes; unsigned num_db_cache_flushes; unsigned num_L2_invalidates; unsigned num_L2_writebacks; unsigned num_resident_handles; uint64_t num_alloc_tex_transfer_bytes; unsigned last_tex_ps_draw_ratio; /* for query */ - unsigned context_roll_counter; + unsigned context_roll; /* Queries. */ /* Maintain the list of active queries for pausing between IBs. */ int num_occlusion_queries; int num_perfect_occlusion_queries; struct list_head active_queries; unsigned num_cs_dw_queries_suspend; /* Render condition. */ struct pipe_query *render_cond; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 757c17f..bc7e777 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -249,21 +249,21 @@ static void si_emit_cb_render_state(struct si_context *sctx) } } /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); } if (initial_cdw != cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } /* * Blender functions */ static uint32_t si_translate_blend_function(int blend_func) { switch (blend_func) { case PIPE_BLEND_ADD: @@ -786,21 +786,21 @@ static void si_emit_clip_regs(struct si_context *sctx) S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask | (culldist_mask << 8)); radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } /* * inferred state between framebuffer and rasterizer */ static void si_update_poly_offset_state(struct si_context *sctx) { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { @@ -1448,21 +1448,21 @@ static void si_emit_db_render_state(struct si_context *sctx) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; if (sctx->screen->has_rbplus && !sctx->screen->rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } /* * format translation */ static uint32_t si_translate_colorformat(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); if (!desc) return V_028C70_COLOR_INVALID; @@ -3537,21 +3537,21 @@ static void si_emit_msaa_config(struct si_context *sctx) SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl, sc_aa_config); /* R_028804_DB_EQAA */ radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); /* R_028A4C_PA_SC_MODE_CNTL_1 */ radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1); if (initial_cdw != cs->current.cdw) { - sctx->context_roll_counter++; + sctx->context_roll = true; /* GFX9: Flush DFSM when the AA mode changes. */ if (sctx->screen->dfsm_allowed) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); } } } void si_update_ps_iter_samples(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 3516e56..5c6c2e6 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -314,21 +314,21 @@ static void si_emit_dpbb_disable(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0, S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | S_028C44_DISABLE_START_OF_PRIM(1)); radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL, SI_TRACKED_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } void si_emit_dpbb_state(struct si_context *sctx) { struct si_screen *sscreen = sctx->screen; struct si_state_blend *blend = sctx->queued.named.blend; struct si_state_dsa *dsa = sctx->queued.named.dsa; unsigned db_shader_control = sctx->ps_db_shader_control; assert(sctx->chip_class >= GFX9); @@ -436,12 +436,12 @@ void si_emit_dpbb_state(struct si_context *sctx) S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) | S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) | S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) | S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1)); radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL, SI_TRACKED_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index b673c2f..7a51b7c 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -59,21 +59,21 @@ static unsigned si_conv_pipe_prim(unsigned mode) return prim_conv[mode]; } /** * This calculates the LDS size for tessellation shaders (VS, TCS, TES). * LS.LDS_SIZE is shared by all 3 shader stages. * * The information about LDS and other non-compile-time parameters is then * written to userdata SGPRs. */ -static bool si_emit_derived_tess_state(struct si_context *sctx, +static void si_emit_derived_tess_state(struct si_context *sctx, const struct pipe_draw_info *info, unsigned *num_patches) { struct radeon_cmdbuf *cs = sctx->gfx_cs; struct si_shader *ls_current; struct si_shader_selector *ls; /* The TES pointer will only be used for sctx->last_tcs. * It would be wrong to think that TCS = TES. */ struct si_shader_selector *tcs = sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; @@ -298,23 +298,22 @@ static bool si_emit_derived_tess_state(struct si_context *sctx, if (sctx->last_ls_hs_config != ls_hs_config) { if (sctx->chip_class >= CIK) { radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); } else { radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); } sctx->last_ls_hs_config = ls_hs_config; - return true; /* true if the context rolls */ + sctx->context_roll = true; } - return false; } static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info) { switch (info->mode) { case PIPE_PRIM_PATCHES: return info->count / info->vertices_per_patch; case PIPE_PRIM_POLYGON: return info->count >= 3; case SI_PRIM_RECTANGLE_LIST: @@ -534,44 +533,44 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, (info->instance_count > 1 && (info->count_from_stream_output || si_num_prims_for_vertices(info) <= 1)))) sctx->flags |= SI_CONTEXT_VGT_FLUSH; } return ia_multi_vgt_param; } /* rast_prim is the primitive type after GS. */ -static bool si_emit_rasterizer_prim_state(struct si_context *sctx) +static void si_emit_rasterizer_prim_state(struct si_context *sctx) { struct radeon_cmdbuf *cs = sctx->gfx_cs; enum pipe_prim_type rast_prim = sctx->current_rast_prim; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; /* Skip this if not rendering lines. */ if (!util_prim_is_lines(rast_prim)) - return false; + return; if (rast_prim == sctx->last_rast_prim && rs->pa_sc_line_stipple == sctx->last_sc_line_stipple) - return false; + return; /* For lines, reset the stipple pattern at each primitive. Otherwise, * reset the stipple pattern at each packet (line strips, line loops). */ radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE, rs->pa_sc_line_stipple | S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2)); sctx->last_rast_prim = rast_prim; sctx->last_sc_line_stipple = rs->pa_sc_line_stipple; - return true; /* true if the context rolls */ + sctx->context_roll = true; } static void si_emit_vs_state(struct si_context *sctx, const struct pipe_draw_info *info) { sctx->current_vs_state &= C_VS_STATE_INDEXED; sctx->current_vs_state |= S_VS_STATE_INDEXED(!!info->index_size); if (sctx->num_vs_blit_sgprs) { /* Re-emit the state after we leave u_blitter. */ @@ -893,21 +892,21 @@ static void si_emit_surface_sync(struct si_context *sctx, radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ radeon_emit(cs, 0); /* CP_COHER_BASE */ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ } /* ACQUIRE_MEM has an implicit context roll if the current context * is busy. */ if (sctx->has_graphics) - sctx->context_roll_counter++; + sctx->context_roll = true; } void si_emit_cache_flush(struct si_context *sctx) { struct radeon_cmdbuf *cs = sctx->gfx_cs; uint32_t flags = sctx->flags; if (!sctx->has_graphics) { /* Only process compute flags. */ flags &= SI_CONTEXT_INV_ICACHE | @@ -1222,24 +1221,23 @@ static void si_get_draw_start_count(struct si_context *sctx, *start = info->start; *count = info->count; } } static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info, unsigned skip_atom_mask) { unsigned num_patches = 0; - sctx->context_roll_counter |= si_emit_rasterizer_prim_state(sctx); + si_emit_rasterizer_prim_state(sctx); if (sctx->tes_shader.cso) - sctx->context_roll_counter |= - si_emit_derived_tess_state(sctx, info, &num_patches); + si_emit_derived_tess_state(sctx, info, &num_patches); /* Emit state atoms. */ unsigned mask = sctx->dirty_atoms & ~skip_atom_mask; while (mask) sctx->atoms.array[u_bit_scan(&mask)].emit(sctx); sctx->dirty_atoms &= skip_atom_mask; /* Emit states. */ mask = sctx->dirty_states; @@ -1453,29 +1451,29 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i if (!si_upload_vertex_buffer_descriptors(sctx)) goto return_cleanup; /* Vega10/Raven scissor bug workaround. When any context register is * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR * registers must be written too. */ bool handle_scissor_bug = sctx->screen->has_gfx9_scissor_bug && !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors); - /* If this is > 0 after all the non-draw packets, a context roll occured. */ - sctx->context_roll_counter = 0; + /* If this is true after all the non-draw packets, a context roll occured. */ + sctx->context_roll = false; if (handle_scissor_bug && (info->count_from_stream_output || sctx->dirty_atoms & si_atoms_that_always_roll_context() || sctx->dirty_states & si_states_that_always_roll_context() || si_prim_restart_index_changed(sctx, info))) - sctx->context_roll_counter++; + sctx->context_roll = true; /* Use optimal packet order based on whether we need to sync the pipeline. */ if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH))) { /* If we have to wait for idle, set all states first, so that all * SET packets are processed in parallel with previous draw calls. * Then draw and prefetch at the end. This ensures that the time * the CUs are idle is very short. @@ -1490,21 +1488,21 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Emit all states except possibly render condition. */ si_emit_all_states(sctx, info, masked_atoms); si_emit_cache_flush(sctx); /* <-- CUs are idle here. */ if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) sctx->atoms.s.render_cond.emit(sctx); sctx->dirty_atoms = 0; - if (handle_scissor_bug && sctx->context_roll_counter) { + if (handle_scissor_bug && sctx->context_roll) { sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; sctx->atoms.s.scissors.emit(sctx); } si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset); /* <-- CUs are busy here. */ /* Start prefetches after the draw has been started. Both will run * in parallel, but starting the draw first is more important. */ @@ -1519,21 +1517,21 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Only prefetch the API VS and VBO descriptors. */ if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, true); if (!si_upload_graphics_shader_descriptors(sctx)) return; si_emit_all_states(sctx, info, 0); - if (handle_scissor_bug && sctx->context_roll_counter) { + if (handle_scissor_bug && sctx->context_roll) { sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; sctx->atoms.s.scissors.emit(sctx); } si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset); /* Prefetch the remaining shaders after the draw has been * started. */ if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5bdfd4f..d00bb17 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -569,21 +569,21 @@ static void si_emit_shader_es(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, shader->vgt_tf_param); if (shader->vgt_vertex_reuse_block_cntl) radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; unsigned oc_lds_en; @@ -818,21 +818,21 @@ static void si_emit_shader_gs(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, shader->vgt_tf_param); if (shader->vgt_vertex_reuse_block_cntl) radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); } if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) { struct si_shader_selector *sel = shader->selector; const ubyte *num_components = sel->info.num_stream_output_components; unsigned gs_num_invocations = sel->gs_num_invocations; struct si_pm4_state *pm4; uint64_t va; unsigned max_stream = sel->max_gs_stream; @@ -995,21 +995,21 @@ static void si_emit_shader_vs(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, shader->vgt_tf_param); if (shader->vgt_vertex_reuse_block_cntl) radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } /** * Compute the state for \p shader, which will run as a vertex shader on the * hardware. * * If \p gs is non-NULL, it points to the geometry shader for which this shader * is the copy shader. */ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, @@ -1187,21 +1187,21 @@ static void si_emit_shader_ps(struct si_context *sctx) radeon_opt_set_context_reg2(sctx, R_028710_SPI_SHADER_Z_FORMAT, SI_TRACKED_SPI_SHADER_Z_FORMAT, shader->ctx_reg.ps.spi_shader_z_format, shader->ctx_reg.ps.spi_shader_col_format); radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, shader->ctx_reg.ps.cb_shader_mask); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } static void si_shader_ps(struct si_shader *shader) { struct tgsi_shader_info *info = &shader->selector->info; struct si_pm4_state *pm4; unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; unsigned input_ena = shader->config.spi_ps_input_ena; @@ -2863,21 +2863,21 @@ static void si_emit_spi_map(struct si_context *sctx) /* R_028644_SPI_PS_INPUT_CNTL_0 */ /* Dota 2: Only ~16% of SPI map updates set different values. */ /* Talos: Only ~9% of SPI map updates set different values. */ unsigned initial_cdw = sctx->gfx_cs->current.cdw; radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl, sctx->tracked_regs.spi_ps_input_cntl, num_interp); if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll_counter++; + sctx->context_roll = true; } /** * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that. */ static void si_init_config_add_vgt_flush(struct si_context *sctx) { if (sctx->init_config_has_vgt_flush) return; diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index a9a1be7..1ec6921 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -276,21 +276,21 @@ static void si_emit_guardband(struct si_context *ctx) radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) | S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4)); radeon_opt_set_context_reg(ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(rs->half_pixel_center) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode)); if (initial_cdw != ctx->gfx_cs->current.cdw) - ctx->context_roll_counter++; + ctx->context_roll = true; } static void si_emit_scissors(struct si_context *ctx) { struct radeon_cmdbuf *cs = ctx->gfx_cs; struct pipe_scissor_state *states = ctx->scissors.states; unsigned mask = ctx->scissors.dirty_mask; bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; /* The simple case: Only 1 viewport is active. */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev