From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state.c | 4 +- src/gallium/drivers/radeonsi/si_state_draw.c | 13 +-- .../drivers/radeonsi/si_state_shaders.c | 106 +++++++++--------- .../drivers/radeonsi/si_state_viewport.c | 13 +-- 4 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3d19af28507..8489d0f9778 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1404,21 +1404,21 @@ static void si_emit_db_render_state(struct si_context *sctx) db_shader_control = sctx->ps_db_shader_control; /* Bug workaround for smoothing (overrasterization) on SI. */ if (sctx->chip_class == SI && sctx->smoothing_enabled) { db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); } /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ - if (!rs || !rs->multisample_enable) + if (!rs->multisample_enable) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; if (sctx->screen->has_rbplus && !sctx->screen->rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, db_shader_control); } @@ -3237,21 +3237,21 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx) S_028830_SMALL_PRIM_FILTER_ENABLE(1) | /* line bug */ S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); /* The alternative of setting sample locations to 0 would * require a DB flush to avoid Z errors, see * https://bugs.freedesktop.org/show_bug.cgi?id=96908 */ if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && - rs && !rs->multisample_enable) + !rs->multisample_enable) small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); } } static bool si_out_of_order_rasterization(struct si_context *sctx) { struct si_state_blend *blend = sctx->queued.named.blend; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 5370587d747..942cb3c7994 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1222,29 +1222,24 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) */ if (unlikely(!info->instance_count)) return; /* Handle count == 0. */ if (unlikely(!info->count && (index_size || !info->count_from_stream_output))) return; } - if (unlikely(!sctx->vs_shader.cso)) { - assert(0); - return; - } - if (unlikely(!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard))) { - assert(0); - return; - } - if (unlikely(!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES))) { + if (unlikely(!sctx->vs_shader.cso || + !rs || + (!sctx->ps_shader.cso && !rs->rasterizer_discard) || + (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)))) { assert(0); return; } /* Recompute and re-emit the texture resource states if needed. */ dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter); if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) { sctx->last_dirty_tex_counter = dirty_tex_counter; sctx->framebuffer.dirty_cbufs |= ((1 << sctx->framebuffer.state.nr_cbufs) - 1); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index aa270ebcb4d..e7610af2fa7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1411,79 +1411,77 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10; } /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */ if (!key->part.ps.epilog.last_cbuf) { key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit; key->part.ps.epilog.color_is_int8 &= sel->info.colors_written; key->part.ps.epilog.color_is_int10 &= sel->info.colors_written; } - if (rs) { - bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim); - bool is_line = util_prim_is_lines(sctx->current_rast_prim); + bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim); + bool is_line = util_prim_is_lines(sctx->current_rast_prim); - key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read; - key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read; + key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read; + key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read; - if (sctx->queued.named.blend) { - key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one && - rs->multisample_enable; - } + if (sctx->queued.named.blend) { + key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one && + rs->multisample_enable; + } - key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; - key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) || - (is_line && rs->line_smooth)) && - sctx->framebuffer.nr_samples <= 1; - key->part.ps.epilog.clamp_color = rs->clamp_fragment_color; + key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; + key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) || + (is_line && rs->line_smooth)) && + sctx->framebuffer.nr_samples <= 1; + key->part.ps.epilog.clamp_color = rs->clamp_fragment_color; - if (sctx->ps_iter_samples > 1 && - sel->info.reads_samplemask) { - key->part.ps.prolog.samplemask_log_ps_iter = - util_logbase2(sctx->ps_iter_samples); - } + if (sctx->ps_iter_samples > 1 && + sel->info.reads_samplemask) { + key->part.ps.prolog.samplemask_log_ps_iter = + util_logbase2(sctx->ps_iter_samples); + } - if (rs->force_persample_interp && - rs->multisample_enable && - sctx->framebuffer.nr_samples > 1 && - sctx->ps_iter_samples > 1) { - key->part.ps.prolog.force_persp_sample_interp = - sel->info.uses_persp_center || - sel->info.uses_persp_centroid; - - key->part.ps.prolog.force_linear_sample_interp = - sel->info.uses_linear_center || - sel->info.uses_linear_centroid; - } else if (rs->multisample_enable && - sctx->framebuffer.nr_samples > 1) { - key->part.ps.prolog.bc_optimize_for_persp = - sel->info.uses_persp_center && - sel->info.uses_persp_centroid; - key->part.ps.prolog.bc_optimize_for_linear = - sel->info.uses_linear_center && - sel->info.uses_linear_centroid; - } else { - /* Make sure SPI doesn't compute more than 1 pair - * of (i,j), which is the optimization here. */ - key->part.ps.prolog.force_persp_center_interp = - sel->info.uses_persp_center + - sel->info.uses_persp_centroid + - sel->info.uses_persp_sample > 1; - - key->part.ps.prolog.force_linear_center_interp = - sel->info.uses_linear_center + - sel->info.uses_linear_centroid + - sel->info.uses_linear_sample > 1; - - if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE]) - key->mono.u.ps.interpolate_at_sample_force_center = 1; - } + if (rs->force_persample_interp && + rs->multisample_enable && + sctx->framebuffer.nr_samples > 1 && + sctx->ps_iter_samples > 1) { + key->part.ps.prolog.force_persp_sample_interp = + sel->info.uses_persp_center || + sel->info.uses_persp_centroid; + + key->part.ps.prolog.force_linear_sample_interp = + sel->info.uses_linear_center || + sel->info.uses_linear_centroid; + } else if (rs->multisample_enable && + sctx->framebuffer.nr_samples > 1) { + key->part.ps.prolog.bc_optimize_for_persp = + sel->info.uses_persp_center && + sel->info.uses_persp_centroid; + key->part.ps.prolog.bc_optimize_for_linear = + sel->info.uses_linear_center && + sel->info.uses_linear_centroid; + } else { + /* Make sure SPI doesn't compute more than 1 pair + * of (i,j), which is the optimization here. */ + key->part.ps.prolog.force_persp_center_interp = + sel->info.uses_persp_center + + sel->info.uses_persp_centroid + + sel->info.uses_persp_sample > 1; + + key->part.ps.prolog.force_linear_center_interp = + sel->info.uses_linear_center + + sel->info.uses_linear_centroid + + sel->info.uses_linear_sample > 1; + + if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE]) + key->mono.u.ps.interpolate_at_sample_force_center = 1; } key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx); /* ps_uses_fbfetch is true only if the color buffer is bound. */ if (sctx->ps_uses_fbfetch) { struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; struct pipe_resource *tex = cb0->texture; /* 1D textures are allocated and used as 2D on GFX9. */ diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index d16c3e7e41b..d0287d5ad75 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -186,22 +186,21 @@ static void si_emit_guardband(struct si_context *ctx) bottom = ( max_range - vp.translate[1]) / vp.scale[1]; assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1); guardband_x = MIN2(-left, right); guardband_y = MIN2(-top, bottom); discard_x = 1.0; discard_y = 1.0; - if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim)) && - ctx->queued.named.rasterizer) { + if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim))) { /* When rendering wide points or lines, we need to be more * conservative about when to discard them entirely. */ const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer; float pixels; if (ctx->current_rast_prim == PIPE_PRIM_POINTS) pixels = rs->max_point_size; else pixels = rs->line_width; @@ -222,24 +221,21 @@ static void si_emit_guardband(struct si_context *ctx) radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */ radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ } static void si_emit_scissors(struct si_context *ctx) { struct radeon_winsys_cs *cs = ctx->gfx_cs; struct pipe_scissor_state *states = ctx->scissors.states; unsigned mask = ctx->scissors.dirty_mask; - bool scissor_enabled = false; - - if (ctx->queued.named.rasterizer) - scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; + bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; /* The simple case: Only 1 viewport is active. */ if (!ctx->vs_writes_viewport_index) { struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0]; if (!(mask & 1)) return; radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL); @@ -341,27 +337,24 @@ si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz, return; } util_viewport_zmin_zmax(vp, halfz, zmin, zmax); } static void si_emit_depth_ranges(struct si_context *ctx) { struct radeon_winsys_cs *cs = ctx->gfx_cs; struct pipe_viewport_state *states = ctx->viewports.states; unsigned mask = ctx->viewports.depth_range_dirty_mask; - bool clip_halfz = false; + bool clip_halfz = ctx->queued.named.rasterizer->clip_halfz; bool window_space = ctx->vs_disables_clipping_viewport; float zmin, zmax; - if (ctx->queued.named.rasterizer) - clip_halfz = ctx->queued.named.rasterizer->clip_halfz; - /* The simple case: Only 1 viewport is active. */ if (!ctx->vs_writes_viewport_index) { if (!(mask & 1)) return; si_viewport_zmin_zmax(&states[0], clip_halfz, window_space, &zmin, &zmax); radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); radeon_emit(cs, fui(zmin)); -- 2.17.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev