From: Marek Olšák <marek.ol...@amd.com> This will be more useful when we change the quant mode to increase subpixel precision and decrease the viewport range (which might not be possible if the viewport is not centered in the viewport range). --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + src/gallium/drivers/radeonsi/si_state.c | 11 +++- src/gallium/drivers/radeonsi/si_state.h | 2 + .../drivers/radeonsi/si_state_viewport.c | 62 +++++++++++++++---- 4 files changed, 62 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index bdb576f7e5c..5a6f7bb35cb 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -341,20 +341,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003; ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff; /* Set all saved registers state to saved. */ ctx->tracked_regs.reg_saved = 0xffffffff; } else { /* Set all saved registers state to unknown. */ ctx->tracked_regs.reg_saved = 0; } /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c2d3a6660ad..8940f78cb54 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2724,20 +2724,29 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; unsigned old_nr_samples = sctx->framebuffer.nr_samples; unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; bool old_has_stencil = old_has_zsbuf && ((struct si_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; bool unbound = false; int i; + /* Reject zero-sized framebuffers due to a hw bug on SI that occurs + * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. + * We could implement the full workaround here, but it's a useless case. + */ + if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { + unreachable("the framebuffer shouldn't have zero area"); + return; + } + si_update_fb_dirtiness_after_rendering(sctx); for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { if (!sctx->framebuffer.state.cbufs[i]) continue; tex = (struct si_texture*)sctx->framebuffer.state.cbufs[i]->texture; if (tex->dcc_gather_statistics) vi_separate_dcc_stop_query(sctx, tex); } @@ -4900,22 +4909,20 @@ static void si_init_config(struct si_context *sctx) if (!has_clear_state) { si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, S_028230_ER_TRI(0xA) | S_028230_ER_POINT(0xA) | S_028230_ER_RECT(0xA) | /* Required by DX10_DIAMOND_TEST_ENA: */ S_028230_ER_LINE_LR(0x1A) | S_028230_ER_LINE_RL(0x26) | S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA)); - /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ - si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); } if (sctx->chip_class >= GFX9) { si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index d9c3e70cf77..f22a1637a88 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -275,20 +275,22 @@ enum si_tracked_reg { SI_TRACKED_PA_CL_CLIP_CNTL, SI_TRACKED_PA_SC_BINNER_CNTL_0, SI_TRACKED_DB_DFSM_CONTROL, SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */ SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ, SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ, SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ, + SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, + SI_TRACKED_PA_SC_CLIPRECT_RULE, SI_NUM_TRACKED_REGS, }; struct si_tracked_regs { uint32_t reg_saved; uint32_t reg_value[SI_NUM_TRACKED_REGS]; uint32_t spi_ps_input_cntl[32]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 8dc68b126eb..335d63b1814 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -119,61 +119,95 @@ static void si_emit_one_scissor(struct si_context *ctx, if (ctx->vs_disables_clipping_viewport) { final.minx = final.miny = 0; final.maxx = final.maxy = SI_MAX_SCISSOR; } else { si_clamp_scissor(ctx, &final, vp_scissor); } if (scissor) si_clip_scissor(&final, scissor); + /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_- + * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. + */ + if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) { + radeon_emit(cs, S_028250_TL_X(1) | + S_028250_TL_Y(1) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(1) | + S_028254_BR_Y(1)); + return; + } + radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) | S_028250_WINDOW_OFFSET_DISABLE(1)); radeon_emit(cs, S_028254_BR_X(final.maxx) | S_028254_BR_Y(final.maxy)); } /* the range is [-MAX, MAX] */ #define SI_MAX_VIEWPORT_RANGE 32768 static void si_emit_guardband(struct si_context *ctx) { - const struct si_signed_scissor *vp_as_scissor; - struct si_signed_scissor max_vp_scissor; + struct si_signed_scissor vp_as_scissor; struct pipe_viewport_state vp; float left, top, right, bottom, max_range, guardband_x, guardband_y; float discard_x, discard_y; if (ctx->vs_writes_viewport_index) { /* Shaders can draw to any viewport. Make a union of all * viewports. */ - max_vp_scissor = ctx->viewports.as_scissor[0]; + vp_as_scissor = ctx->viewports.as_scissor[0]; for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) { - si_scissor_make_union(&max_vp_scissor, + si_scissor_make_union(&vp_as_scissor, &ctx->viewports.as_scissor[i]); } - vp_as_scissor = &max_vp_scissor; } else { - vp_as_scissor = &ctx->viewports.as_scissor[0]; + vp_as_scissor = ctx->viewports.as_scissor[0]; } + /* Determine the optimal hardware screen offset to center the viewport + * within the viewport range in order to maximize the guardband size. + */ + int hw_screen_offset_x = (vp_as_scissor.maxx - vp_as_scissor.minx) / 2; + int hw_screen_offset_y = (vp_as_scissor.maxy - vp_as_scissor.miny) / 2; + + const unsigned hw_screen_offset_max = 8176; + /* SI-CI need to align the offset to an ubertile consisting of all SEs. */ + const unsigned hw_screen_offset_alignment = + ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + + hw_screen_offset_x = MIN2(hw_screen_offset_x, hw_screen_offset_max); + hw_screen_offset_y = MIN2(hw_screen_offset_y, hw_screen_offset_max); + + /* Align the screen offset by dropping the low 4 bits. */ + hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1); + hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1); + + /* Apply the offset to center the viewport and maximize the guardband. */ + vp_as_scissor.minx -= hw_screen_offset_x; + vp_as_scissor.maxx -= hw_screen_offset_x; + vp_as_scissor.miny -= hw_screen_offset_y; + vp_as_scissor.maxy -= hw_screen_offset_y; + /* Reconstruct the viewport transformation from the scissor. */ - vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; - vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; - vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; - vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; + vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0; + vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0; + vp.scale[0] = vp_as_scissor.maxx - vp.translate[0]; + vp.scale[1] = vp_as_scissor.maxy - vp.translate[1]; /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ - if (vp_as_scissor->minx == vp_as_scissor->maxx) + if (vp_as_scissor.minx == vp_as_scissor.maxx) vp.scale[0] = 0.5; - if (vp_as_scissor->miny == vp_as_scissor->maxy) + if (vp_as_scissor.miny == vp_as_scissor.maxy) vp.scale[1] = 0.5; /* Find the biggest guard band that is inside the supported viewport * range. The guard band is specified as a horizontal and vertical * distance from (0,0) in clip space. * * This is done by applying the inverse viewport transformation * on the viewport limits to get those limits in clip space. * * Use a limit one pixel smaller to allow for some precision error. @@ -214,20 +248,24 @@ static void si_emit_guardband(struct si_context *ctx) } /* If any of the GB registers is updated, all of them must be updated. * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y), fui(guardband_x), fui(discard_x)); + radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, + SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, + S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) | + S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4)); } static void si_emit_scissors(struct si_context *ctx) { struct radeon_cmdbuf *cs = ctx->gfx_cs; struct pipe_scissor_state *states = ctx->scissors.states; unsigned mask = ctx->scissors.dirty_mask; bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; /* The simple case: Only 1 viewport is active. */ -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev