From: Nicolai Hähnle <nicolai.haeh...@amd.com> This is a bit conservative, but a more precise solution requires access to the rasterizer state. This is something to tackle after the fork between r600 and radeonsi.
Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeon/r600_viewport.c | 21 +++++++++++++++++++-- src/gallium/drivers/radeonsi/si_state_draw.c | 7 +++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_viewport.c b/src/gallium/drivers/radeon/r600_viewport.c index cf6d5f28ac0..6e4fc9d751c 100644 --- a/src/gallium/drivers/radeon/r600_viewport.c +++ b/src/gallium/drivers/radeon/r600_viewport.c @@ -158,20 +158,21 @@ static void r600_emit_one_scissor(struct r600_common_context *rctx, /* the range is [-MAX, MAX] */ #define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384) static void r600_emit_guardband(struct r600_common_context *rctx, struct r600_signed_scissor *vp_as_scissor) { struct radeon_winsys_cs *cs = rctx->gfx.cs; struct pipe_viewport_state vp; float left, top, right, bottom, max_range, guardband_x, guardband_y; + float discard_x, discard_y; /* Reconstruct the viewport transformation from the scissor. */ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ if (vp_as_scissor->minx == vp_as_scissor->maxx) vp.scale[0] = 0.5; @@ -191,30 +192,46 @@ static void r600_emit_guardband(struct r600_common_context *rctx, left = (-max_range - vp.translate[0]) / vp.scale[0]; right = ( max_range - vp.translate[0]) / vp.scale[0]; top = (-max_range - vp.translate[1]) / vp.scale[1]; bottom = ( max_range - vp.translate[1]) / vp.scale[1]; assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1); guardband_x = MIN2(-left, right); guardband_y = MIN2(-top, bottom); + discard_x = 1.0; + discard_y = 1.0; + + if (rctx->current_rast_prim < PIPE_PRIM_TRIANGLES) { + /* When rendering wide points or lines, we need to be more + * conservative about when to discard them entirely. Since + * point size can be determined by the VS output, we basically + * disable discard completely completely here. + * + * TODO: This can hurt performance when rendering lines and + * points with fixed size, and could be improved. + */ + discard_x = guardband_x; + discard_y = guardband_y; + } + /* If any of the GB registers is updated, all of them must be updated. */ if (rctx->chip_class >= CAYMAN) radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4); else radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4); radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */ - radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */ + radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */ - radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ + radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ } static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->gfx.cs; struct pipe_scissor_state *states = rctx->scissors.states; unsigned mask = rctx->scissors.dirty_mask; bool scissor_enabled = rctx->scissor_enabled; struct r600_signed_scissor max_vp_scissor; int i; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e4f592c3845..fb91d936c96 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1248,20 +1248,27 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) * draw_vbo recursively, and before si_update_shaders, which uses * current_rast_prim for this draw_vbo call. */ if (sctx->gs_shader.cso) rast_prim = sctx->gs_shader.cso->gs_output_prim; else if (sctx->tes_shader.cso) rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; else rast_prim = info->mode; if (rast_prim != sctx->b.current_rast_prim) { + bool old_is_poly = sctx->b.current_rast_prim >= PIPE_PRIM_TRIANGLES; + bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES; + if (old_is_poly != new_is_poly) { + sctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; + si_set_atom_dirty(sctx, &sctx->b.scissors.atom, true); + } + sctx->b.current_rast_prim = rast_prim; sctx->do_update_shaders = true; } if (sctx->tes_shader.cso && (sctx->b.family == CHIP_VEGA10 || sctx->b.family == CHIP_RAVEN)) { /* Determine whether the LS VGPR fix should be applied. * * It is only required when num input CPs > num output CPs, * which cannot happen with the fixed function TCS. We should -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev