On Tue, Jul 17, 2012 at 7:58 PM, <j.gli...@gmail.com> wrote: > From: Jerome Glisse <jgli...@redhat.com> > > htile is used for HiZ and HiS support and fast Z/S clears. > This commit just adds the htile setup and Fast Z clear. > We don't take full advantage of HiS with that patch. > > v2 really use fast clear, still random issue with some tiles > need to try more flush combination, fix depth/stencil > texture decompression > v3 fix random issue on r6xx/r7xx > v4 rebase on top of lastest mesa, disable CB export when clearing > htile surface to avoid wasting bandwidth > v5 resummarize htile surface when uploading z value. Fix z/stencil > decompression, the custom blitter with custom dsa is no longer > needed. > v6 Reorganize render control/override update mecanism, fixing more > issues in the process. > v7 Add nop after depth surface base update to work around some htile > flushing issue. For htile to 8x8 on r6xx/r7xx as other combination > have issue. Do not enable hyperz when flushing/uncompressing > depth buffer. > v8 Fix htile surface, preload and prefetch setup. Only set preload > and prefetch on htile surface clear like fglrx. Record depth > clear value per level. Support several level for the htile > surface. First depth clear can't be a fast clear. > v9 Fix comments, properly account new register in emit function, > disable fast zclear if clearing different layer of texture > array to different value > > Signed-off-by: Pierre-Eric Pelloux-Prayer <pell...@gmail.com> > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> > Signed-off-by: Jerome Glisse <jgli...@redhat.com> > --- > src/gallium/drivers/r600/evergreen_hw_context.c | 6 + > src/gallium/drivers/r600/evergreen_state.c | 102 ++++++++++++----- > src/gallium/drivers/r600/evergreend.h | 4 + > src/gallium/drivers/r600/r600_blit.c | 38 +++++++ > src/gallium/drivers/r600/r600_hw_context.c | 25 +++++ > src/gallium/drivers/r600/r600_pipe.c | 8 ++ > src/gallium/drivers/r600/r600_pipe.h | 13 ++- > src/gallium/drivers/r600/r600_resource.h | 7 ++ > src/gallium/drivers/r600/r600_state.c | 133 > ++++++++++++++++++++--- > src/gallium/drivers/r600/r600_texture.c | 103 ++++++++++++++++++ > src/gallium/drivers/r600/r600d.h | 6 + > 11 files changed, 399 insertions(+), 46 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c > b/src/gallium/drivers/r600/evergreen_hw_context.c > index 081701f..546c884 100644 > --- a/src/gallium/drivers/r600/evergreen_hw_context.c > +++ b/src/gallium/drivers/r600/evergreen_hw_context.c > @@ -62,6 +62,9 @@ static const struct r600_reg evergreen_context_reg_list[] = > { > {GROUP_FORCE_NEW_BLOCK, 0, 0}, > {R_028058_DB_DEPTH_SIZE, 0, 0}, > {R_02805C_DB_DEPTH_SLICE, 0, 0}, > + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, > + {R_028ABC_DB_HTILE_SURFACE, 0, 0}, > + {R_028AC8_DB_PRELOAD_CONTROL, 0, 0}, > {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, > {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, > {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, > @@ -319,6 +322,9 @@ static const struct r600_reg cayman_context_reg_list[] = { > {GROUP_FORCE_NEW_BLOCK, 0, 0}, > {R_028058_DB_DEPTH_SIZE, 0, 0}, > {R_02805C_DB_DEPTH_SLICE, 0, 0}, > + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, > + {R_028ABC_DB_HTILE_SURFACE, 0, 0}, > + {R_028AC8_DB_PRELOAD_CONTROL, 0, 0}, > {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, > {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, > {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index a66387b..214d76b 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -710,13 +710,15 @@ static void *evergreen_create_blend_state(struct > pipe_context *ctx, > } > blend->cb_target_mask = target_mask; > > - if (target_mask) > + if (target_mask) { > color_control |= S_028808_MODE(V_028808_CB_NORMAL); > - else > + } else { > color_control |= S_028808_MODE(V_028808_CB_DISABLE); > + } > > r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, > color_control); > + > /* only have dual source on MRT0 */ > blend->dual_src_blend = util_blend_state_is_dual(state, 0); > for (int i = 0; i < 8; i++) { > @@ -1668,6 +1670,26 @@ static void evergreen_db(struct r600_context *rctx, > struct r600_pipe_state *rsta > } > } > > + /* hyperz */ > + if (rtex->hyperz) { > + uint64_t htile_offset = > rtex->hyperz->surface.level[level].offset; > + > + rctx->db_misc_state.hyperz = true; > + rctx->db_misc_state.db_htile_surface_mask = 0xffffffff; > + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > + z_info |= S_028040_TILE_SURFACE_ENABLE(1); > + r600_pipe_state_add_reg_bo(rstate, > R_028014_DB_HTILE_DATA_BASE, > + htile_offset >> 8, > &rtex->hyperz->resource, > + RADEON_USAGE_READWRITE); > + /* FORCE_OFF means HiZ/HiS are determined by > DB_SHADER_CONTROL */ > + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, > rtex->db_preload_control); > + } else { > + if (rctx->db_misc_state.hyperz) { > + rctx->db_misc_state.hyperz = FALSE; > + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > + } > + } > +
If the zbuffer were disabled by binding a NULL zbuffer, db_misc_state wouldn't be updated, because evergreen_db wouldn't be called. Is this intentional? If not, db_misc_state.hyperz should be set to false in set_framebuffer_state if state->zsbuf is NULL and then made dirty, so that the DB registers are properly updated for a NULL zbuffer. (same for r600_state.c) > r600_pipe_state_add_reg_bo(rstate, R_028040_DB_Z_INFO, z_info, > &rtex->resource, RADEON_USAGE_READWRITE); > r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, > @@ -1747,13 +1769,39 @@ static void evergreen_emit_db_misc_state(struct > r600_context *rctx, struct r600_ > { > struct radeon_winsys_cs *cs = rctx->cs; > struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; > + unsigned db_render_override = 0; > unsigned db_render_control = 0; > unsigned db_count_control = 0; > - unsigned db_render_override = > - S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | > - S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | > - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); > - > + unsigned cliprect_rule = 0xffff; > + unsigned db_htile_surface = 0; > + struct r600_resource_texture *rtex; > + float depth_clear = 0.0; > + > + if (a->hyperz && rctx->framebuffer.zsbuf) { > + rtex = (struct > r600_resource_texture*)rctx->framebuffer.zsbuf->texture; > + > + db_htile_surface = rtex->db_htile_surface; > + db_htile_surface &= rctx->db_misc_state.db_htile_surface_mask; > + /* further htile surface without preload */ > + rctx->db_misc_state.db_htile_surface_mask = > S_028ABC_HTILE_WIDTH(1) | > + > S_028ABC_HTILE_HEIGHT(1) | > + > S_028ABC_LINEAR(1) | > + > S_028ABC_FULL_CACHE(1); > + } > + > + db_render_override = > S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | > + > S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); > + if (a->hyperz && !a->flush_depthstencil_through_cb) { > + /* FORCE_OFF means HiZ/HiS are determined by > DB_SHADER_CONTROL */ > + db_render_override |= > S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF); > + if (a->clear_depthstencil) { > + db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1); > + /* need to disable cliprect for fast clear */ > + cliprect_rule = 0; > + } > + } else { > + db_render_override |= > S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE); > + } > if (a->occlusion_query_enabled) { > db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1); > db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); > @@ -1764,10 +1812,21 @@ static void evergreen_emit_db_misc_state(struct > r600_context *rctx, struct r600_ > S_028000_COPY_CENTROID(1); > } > > + if (rctx->framebuffer.zsbuf) { > + unsigned level = rctx->framebuffer.zsbuf->u.tex.level; > + > + rtex = (struct > r600_resource_texture*)rctx->framebuffer.zsbuf->texture; > + depth_clear = rtex->depth_clear_value[level]; > + } > + r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(depth_clear)); > r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); > - r600_write_value(cs, db_render_control); /* > R_028000_DB_RENDER_CONTROL */ > - r600_write_value(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL > */ > + /* R_028000_DB_RENDER_CONTROL */ > + r600_write_value(cs, db_render_control); > + /* R_028004_DB_COUNT_CONTROL */ > + r600_write_value(cs, db_count_control); > r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, > db_render_override); > + r600_write_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, > cliprect_rule); > + r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, > db_htile_surface); > } > > static void evergreen_emit_vertex_buffers(struct r600_context *rctx, > @@ -1911,7 +1970,7 @@ void evergreen_init_state_functions(struct r600_context > *rctx) > { > r600_init_atom(&rctx->cb_misc_state.atom, > evergreen_emit_cb_misc_state, 0, 0); > r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); > - r600_init_atom(&rctx->db_misc_state.atom, > evergreen_emit_db_misc_state, 7, 0); > + r600_init_atom(&rctx->db_misc_state.atom, > evergreen_emit_db_misc_state, 16, 0); > r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > r600_init_atom(&rctx->vertex_buffer_state.atom, > evergreen_fs_emit_vertex_buffers, 0, 0); > r600_init_atom(&rctx->cs_vertex_buffer_state.atom, > evergreen_cs_emit_vertex_buffers, 0, 0); > @@ -2065,19 +2124,15 @@ static void cayman_init_atom_start_cs(struct > r600_context *rctx) > r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */ > r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */ > > - r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); > - r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ > - r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ > + r600_store_context_reg_seq(cb, R_009830_DB_DEBUG, 3); > + r600_store_value(cb, 0); /* R_009830_DB_DEBUG */ > + r600_store_value(cb, 0); /* R_009834_DB_DEBUG2 */ > + r600_store_value(cb, 0); /* R_009838_DB_DEBUG3 */ > + r600_store_config_reg(cb, R_009854_DB_WATERMARKS, 0x00420204); > > r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); > > - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, > 3); > - r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ > - r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ > - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ > - > r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); > - r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); > > r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); > r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ > @@ -2552,7 +2607,6 @@ void evergreen_init_atom_start_cs(struct r600_context > *rctx) > r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ > > r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); > - r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); > r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); > > r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); > @@ -2563,10 +2617,9 @@ void evergreen_init_atom_start_cs(struct r600_context > *rctx) > r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F); > r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); > > - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, > 3); > + r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, > 2); > r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ > r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ > - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ > > r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00); > > @@ -2666,7 +2719,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, > struct r600_pipe_shader > > rstate->nregs = 0; > > - db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); > + db_shader_control = 0; > for (i = 0; i < rshader->ninput; i++) { > /* evergreen NUM_INTERP only contains values interpolated > into the LDS, > POSITION goes via GPRs from the SC so isn't counted */ > @@ -2875,14 +2928,13 @@ void evergreen_update_dual_export_state(struct > r600_context * rctx) > { > unsigned dual_export = rctx->export_16bpc && rctx->nr_cbufs && > !rctx->ps_shader->current->ps_depth_export; > - > unsigned db_source_format = dual_export ? V_02880C_EXPORT_DB_TWO : > V_02880C_EXPORT_DB_FULL; > - > unsigned db_shader_control = > rctx->ps_shader->current->db_shader_control | > S_02880C_DUAL_EXPORT_ENABLE(dual_export) | > S_02880C_DB_SOURCE_FORMAT(db_source_format); > > + db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); > if (db_shader_control != rctx->db_shader_control) { > struct r600_pipe_state rstate; > > diff --git a/src/gallium/drivers/r600/evergreend.h > b/src/gallium/drivers/r600/evergreend.h > index 6c4873c..1ac5944 100644 > --- a/src/gallium/drivers/r600/evergreend.h > +++ b/src/gallium/drivers/r600/evergreend.h > @@ -1589,6 +1589,10 @@ > #define S_028008_SLICE_MAX(x) (((x) & 0x7FF) << 13) > #define G_028008_SLICE_MAX(x) (((x) >> 13) & 0x7FF) > #define C_028008_SLICE_MAX 0xFF001FFF > +#define R_009830_DB_DEBUG 0x00009830 > +#define R_009834_DB_DEBUG2 0x00009834 > +#define R_009838_DB_DEBUG3 0x00009838 > +#define R_009854_DB_WATERMARKS 0x00009854 > #define R_02800C_DB_RENDER_OVERRIDE 0x0002800C > #define V_02800C_FORCE_OFF 0 > #define V_02800C_FORCE_ENABLE 1 > diff --git a/src/gallium/drivers/r600/r600_blit.c > b/src/gallium/drivers/r600/r600_blit.c > index 98f8b84..9c01f6d 100644 > --- a/src/gallium/drivers/r600/r600_blit.c > +++ b/src/gallium/drivers/r600/r600_blit.c > @@ -24,6 +24,7 @@ > #include "util/u_surface.h" > #include "util/u_blitter.h" > #include "util/u_format.h" > +#include "r600d.h" > > enum r600_blitter_op /* bitmask */ > { > @@ -220,11 +221,48 @@ static void r600_clear(struct pipe_context *ctx, > unsigned buffers, > struct r600_context *rctx = (struct r600_context *)ctx; > struct pipe_framebuffer_state *fb = &rctx->framebuffer; > > + /* if hyperz enabled just clear hyperz */ > + if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTHSTENCIL)) { > + struct r600_resource_texture *rtex; > + unsigned level = fb->zsbuf->u.tex.level; > + unsigned flayer = fb->zsbuf->u.tex.first_layer; > + unsigned llayer = fb->zsbuf->u.tex.last_layer; > + > + rtex = (struct r600_resource_texture*)fb->zsbuf->texture; > + rtex->depth_clear_value[level] = depth; The depth value should be set only if (buffers & PIPE_CLEAR_DEPTH) is non-zero, which the first conditional in this block doesn't assure. You clearly don't work with the stencil clear value here, so maybe PIPE_CLEAR_DEPTHSTENCIL should be replaced with PIPE_CLEAR_DEPTH. > + > + /* enable fast clear only if clearing all layer in texture > array > + * to same value, we can't use hyperz fast clear if hyperz > have > + * different value. > + */ > + if (rtex->hyperz && rtex->surface.array_size > 1 && !flayer > && llayer == rtex->surface.array_size) { You do realize that this conditional disallows the fast clear for ordinary 1D and 2D textures, right? I am not sure what you meant to do, but the condition "rtex->surface.array_size > 1" is definitely wrong, because array_size is most often equal to 1. > + if (buffers & PIPE_CLEAR_DEPTH) { > + rctx->db_misc_state.hyperz = true; > + if (rtex->htile_initialized[level]) { > + > rctx->db_misc_state.clear_depthstencil = true; > + } else { > + rtex->htile_initialized[level] = true; > + > rctx->db_misc_state.db_htile_surface_mask = S_028D24_HTILE_WIDTH(1) | > + > S_028D24_HTILE_HEIGHT(1) | > + > S_028D24_LINEAR(1) | > + > S_028D24_FULL_CACHE(1); > + } > + r600_atom_dirty(rctx, > &rctx->db_misc_state.atom); > + } > + } > + } > + > r600_blitter_begin(ctx, R600_CLEAR); > util_blitter_clear(rctx->blitter, fb->width, fb->height, > fb->nr_cbufs, buffers, fb->nr_cbufs ? > fb->cbufs[0]->format : PIPE_FORMAT_NONE, > color, depth, stencil); > r600_blitter_end(ctx); > + > + /* disable fast clear */ > + if (rctx->db_misc_state.clear_depthstencil) { > + rctx->db_misc_state.clear_depthstencil = false; > + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > + } > } > > static void r600_clear_render_target(struct pipe_context *ctx, > diff --git a/src/gallium/drivers/r600/r600_hw_context.c > b/src/gallium/drivers/r600/r600_hw_context.c > index e80f39c..4f2c03a 100644 > --- a/src/gallium/drivers/r600/r600_hw_context.c > +++ b/src/gallium/drivers/r600/r600_hw_context.c > @@ -180,6 +180,27 @@ static void r600_init_block(struct r600_context *ctx, > (ctx->family < CHIP_RV770) && reg[i+j].flags & > REG_FLAG_RV6XX_SBU) { > block->pm4[block->pm4_ndwords++] = > PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); > block->pm4[block->pm4_ndwords++] = reg[i+j].sbu_flags; > + if (reg[i+j].sbu_flags & SURFACE_BASE_UPDATE_DEPTH) { > + /* to work around flushing issue in htile > surface */ > + block->pm4[block->pm4_ndwords++] = > PKT3(PKT3_NOP, 16, 0); > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + block->pm4[block->pm4_ndwords++] = 0xcafedead; > + } > } > } > /* check that we stay in limit */ > @@ -364,7 +385,11 @@ static const struct r600_reg r600_context_reg_list[] = { > {GROUP_FORCE_NEW_BLOCK, 0, 0}, > {R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0}, > {R_028A6C_VGT_GS_OUT_PRIM_TYPE, 0, 0}, > + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, > + {GROUP_FORCE_NEW_BLOCK, 0, 0}, > + {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0}, > {R_028D24_DB_HTILE_SURFACE, 0, 0}, > + {R_028D30_DB_PRELOAD_CONTROL, 0, 0}, > {R_028D34_DB_PREFETCH_LIMIT, 0, 0}, > {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, > {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index a143381..9f20560 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -951,6 +951,14 @@ struct pipe_screen *r600_screen_create(struct > radeon_winsys *ws) > pipe_mutex_init(rscreen->fences.mutex); > > rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", TRUE); > + /* Hyperz leads to lockup on r6xx/r7xx and evergreen, due to this > instabilities > + * don't enable this by default until we can figure out how to do it > properly > + * > + * You can trigger lockup easily with : > + * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8 > + * run it in a loop, it will lockup often on first run > + */ > + rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE); > > rscreen->global_pool = compute_memory_pool_new(rscreen); > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index 6449a4d..5ff4d51 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -77,9 +77,13 @@ struct r600_surface_sync_cmd { > }; > > struct r600_db_misc_state { > - struct r600_atom atom; > - bool occlusion_query_enabled; > - bool flush_depthstencil_through_cb; > + struct r600_atom atom; > + unsigned db_htile_surface_mask; > + bool occlusion_query_enabled; > + bool flush_depthstencil_through_cb; > + bool clear_depthstencil; > + bool hyperz; > + bool resummarize; As pointed out in the previous review, resummarize is unused. Please, don't add UNUSED CODE. Also clear_depthstencil could be renamed to clear_depth, because we don't use the fast stencil clear yet. It will have to be implemented separately anyway. > }; > > struct r600_cb_misc_state { > @@ -143,6 +147,7 @@ struct r600_screen { > struct r600_pipe_fences fences; > > bool use_surface_alloc; > + bool use_hyperz; > > /*for compute global memory binding, we allocate stuff here, instead > of > * buffers. > @@ -181,7 +186,7 @@ struct r600_pipe_dsa { > unsigned alpha_ref; > ubyte valuemask[2]; > ubyte writemask[2]; > - unsigned sx_alpha_test_control; > + unsigned sx_alpha_test_control; > }; > > struct r600_vertex_element > diff --git a/src/gallium/drivers/r600/r600_resource.h > b/src/gallium/drivers/r600/r600_resource.h > index a7570c7..59ec025 100644 > --- a/src/gallium/drivers/r600/r600_resource.h > +++ b/src/gallium/drivers/r600/r600_resource.h > @@ -64,6 +64,13 @@ struct r600_resource_texture { > struct r600_resource_texture *flushed_depth_texture; > boolean is_flushing_texture; > struct radeon_surface surface; > + unsigned db_prefetch_limit; > + unsigned db_htile_surface; > + unsigned db_preload_control; > + struct r600_resource_texture *hyperz; > + float > depth_clear_value[PIPE_MAX_TEXTURE_LEVELS]; > + /* first depth clear initialize the htile buffer */ > + bool > htile_initialized[PIPE_MAX_TEXTURE_LEVELS]; > }; > > #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != > V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != > V_038000_ARRAY_LINEAR_ALIGNED) > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 3d5835c..8925a23 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -1581,6 +1581,7 @@ static void r600_db(struct r600_context *rctx, struct > r600_pipe_state *rstate, > struct r600_resource_texture *rtex; > struct r600_surface *surf; > unsigned level, pitch, slice, format, offset, array_mode; > + unsigned db_depth_info; > > if (state->zsbuf == NULL) > return; > @@ -1625,6 +1626,27 @@ static void r600_db(struct r600_context *rctx, struct > r600_pipe_state *rstate, > > format = r600_translate_dbformat(state->zsbuf->format); > assert(format != ~0); > + db_depth_info = S_028010_ARRAY_MODE(array_mode) | > S_028010_FORMAT(format); > + > + /* hyperz */ > + if (rtex->hyperz) { > + uint64_t htile_offset = > rtex->hyperz->surface.level[level].offset; > + > + rctx->db_misc_state.hyperz = true; > + rctx->db_misc_state.db_htile_surface_mask = 0xffffffff; > + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > + db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1); > + r600_pipe_state_add_reg_bo(rstate, > R_028014_DB_HTILE_DATA_BASE, > + htile_offset >> 8, > &rtex->hyperz->resource, > + RADEON_USAGE_READWRITE); > + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, > rtex->db_preload_control); > + r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, > rtex->db_prefetch_limit); > + } else { > + if (rctx->db_misc_state.hyperz) { > + rctx->db_misc_state.hyperz = FALSE; > + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > + } > + } > > r600_pipe_state_add_reg_bo(rstate, R_02800C_DB_DEPTH_BASE, > offset >> 8, &rtex->resource, > RADEON_USAGE_READWRITE); > @@ -1638,8 +1660,8 @@ static void r600_db(struct r600_context *rctx, struct > r600_pipe_state *rstate, > > S_028004_SLICE_MAX(state->zsbuf->u.tex.last_layer)); > } > r600_pipe_state_add_reg_bo(rstate, R_028010_DB_DEPTH_INFO, > - S_028010_ARRAY_MODE(array_mode) | > S_028010_FORMAT(format), > - &rtex->resource, RADEON_USAGE_READWRITE); > + db_depth_info, > + &rtex->resource, RADEON_USAGE_READWRITE); > r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, > (surf->aligned_height / 8) - 1); > } > @@ -1723,10 +1745,38 @@ static void r600_emit_db_misc_state(struct > r600_context *rctx, struct r600_atom > struct radeon_winsys_cs *cs = rctx->cs; > struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; > unsigned db_render_control = 0; > - unsigned db_render_override = > - S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | > - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | > - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); > + unsigned db_render_override = 0; > + unsigned cliprect_rule = 0xffff; > + unsigned db_htile_surface = 0; > + struct r600_resource_texture *rtex; > + float depth_clear = 0.0; > + > + if (a->hyperz && rctx->framebuffer.zsbuf) { > + rtex = (struct > r600_resource_texture*)rctx->framebuffer.zsbuf->texture; > + > + db_htile_surface = rtex->db_htile_surface; > + db_htile_surface &= rctx->db_misc_state.db_htile_surface_mask; > + /* further htile surface without preload */ > + rctx->db_misc_state.db_htile_surface_mask = > S_028D24_HTILE_WIDTH(1) | > + > S_028D24_HTILE_HEIGHT(1) | > + > S_028D24_LINEAR(1) | > + > S_028D24_FULL_CACHE(1); > + } > + > + db_render_override |= > S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | > + > S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); > + if (a->hyperz) { > + /* FORCE_OFF means HiZ/HiS are determined by > DB_SHADER_CONTROL */ > + db_render_override |= > S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF); > + if (a->clear_depthstencil) { > + db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1); > + db_render_control |= > S_028D0C_ZPASS_INCREMENT_DISABLE(1); > + /* need to disable cliprect for fast clear */ > + cliprect_rule = 0; > + } > + } else { > + db_render_override |= > S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); > + } > > if (a->occlusion_query_enabled) { > if (rctx->chip_class >= R700) { > @@ -1740,9 +1790,18 @@ static void r600_emit_db_misc_state(struct > r600_context *rctx, struct r600_atom > S_028D0C_COPY_CENTROID(1); > } > > + if (rctx->framebuffer.zsbuf) { > + unsigned level = rctx->framebuffer.zsbuf->u.tex.level; > + > + rtex = (struct > r600_resource_texture*)rctx->framebuffer.zsbuf->texture; > + depth_clear = rtex->depth_clear_value[level]; > + } > + r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(depth_clear)); > r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2); > r600_write_value(cs, db_render_control); /* > R_028D0C_DB_RENDER_CONTROL */ > r600_write_value(cs, db_render_override); /* > R_028D10_DB_RENDER_OVERRIDE */ > + r600_write_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, > cliprect_rule); > + r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, > db_htile_surface); > } > > static void r600_emit_vertex_buffers(struct r600_context *rctx, struct > r600_atom *atom) > @@ -1845,7 +1904,7 @@ void r600_init_state_functions(struct r600_context > *rctx) > { > r600_init_atom(&rctx->cb_misc_state.atom, r600_emit_cb_misc_state, 0, > 0); > r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); > - r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, > 0); > + r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, > 13, 0); > r600_atom_dirty(rctx, &rctx->db_misc_state.atom); > r600_init_atom(&rctx->vertex_buffer_state.atom, > r600_emit_vertex_buffers, 0, 0); > r600_init_atom(&rctx->vs_constbuf_state.atom, > r600_emit_vs_constant_buffers, 0, 0); > @@ -1954,7 +2013,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) > int num_es_stack_entries; > enum radeon_family family; > struct r600_command_buffer *cb = &rctx->start_cs_cmd; > - uint32_t tmp; > + uint32_t tmp, db_watermarks, db_debug; > unsigned i; > > r600_init_command_buffer(cb, 256, EMIT_EARLY); > @@ -1974,6 +2033,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) > vs_prio = 1; > gs_prio = 2; > es_prio = 3; > + > switch (family) { > case CHIP_R600: > num_ps_gprs = 192; > @@ -2140,15 +2200,55 @@ void r600_init_atom_start_cs(struct r600_context > *rctx) > > if (rctx->chip_class >= R700) { > r600_store_config_reg(cb, > R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000); > - r600_store_config_reg(cb, R_009830_DB_DEBUG, 0); > - r600_store_config_reg(cb, R_009838_DB_WATERMARKS, 0x00420204); > r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); > } else { > r600_store_config_reg(cb, > R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); > - r600_store_config_reg(cb, R_009830_DB_DEBUG, 0x82000000); > - r600_store_config_reg(cb, R_009838_DB_WATERMARKS, 0x01020204); > r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 1); > } > + > + /* FIXME db_watermarks & db_debug need adjustment for MSAA */ > + switch (family) { > + case CHIP_R600: > + db_debug = 0x82200000; > + db_watermarks = 0x01020204; > + break; > + case CHIP_RV630: > + db_debug = 0x92000000; > + db_watermarks = 0x01020204; > + break; > + case CHIP_RV635: > + db_debug = 0x82000000; > + db_watermarks = 0x01020204; > + break; > + case CHIP_RV610: > + db_debug = 0x82000000; > + db_watermarks = 0x01020204; > + break; > + case CHIP_RV620: > + default: > + db_debug = 0x82000000; > + db_watermarks = 0x01020204; > + break; > + case CHIP_RS780: > + case CHIP_RS880: > + db_debug = 0x88000000; > + db_watermarks = 0x81020204; > + break; > + case CHIP_RV670: > + db_debug = 0x80000000; > + db_watermarks = 0x01020204; > + break; > + case CHIP_RV770: > + case CHIP_RV730: > + case CHIP_RV740: > + case CHIP_RV710: > + db_debug = 0x00000000; > + db_watermarks = 0x00420204; > + break; > + } > + r600_store_config_reg(cb, R_009830_DB_DEBUG, db_debug); > + r600_store_config_reg(cb, R_009838_DB_WATERMARKS, db_watermarks); > + > r600_store_context_reg_seq(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 9); > r600_store_value(cb, 0); /* R_0288A8_SQ_ESGS_RING_ITEMSIZE */ > r600_store_value(cb, 0); /* R_0288AC_SQ_GSVS_RING_ITEMSIZE */ > @@ -2192,9 +2292,8 @@ void r600_init_atom_start_cs(struct r600_context *rctx) > > r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); > > - r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); > + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 1); > r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ > - r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ > > r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3); > r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */ > @@ -2234,7 +2333,6 @@ void r600_init_atom_start_cs(struct r600_context *rctx) > } > > r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); > - r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); > > if (rctx->chip_class >= R700) { > r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, > 0xAAAAAAAA); > @@ -2317,7 +2415,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, > struct r600_pipe_shader *shad > tmp); > } > > - db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); > + db_shader_control = 0; > for (i = 0; i < rshader->noutput; i++) { > if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) > z_export = 1; > @@ -2478,13 +2576,14 @@ void *r600_create_db_flush_dsa(struct r600_context > *rctx) > return rctx->context.create_depth_stencil_alpha_state(&rctx->context, > &dsa); > } > > -void r600_update_dual_export_state(struct r600_context * rctx) > +void r600_update_dual_export_state(struct r600_context *rctx) > { > unsigned dual_export = rctx->export_16bpc && rctx->nr_cbufs && > !rctx->ps_shader->current->ps_depth_export; > unsigned db_shader_control = > rctx->ps_shader->current->db_shader_control | > S_02880C_DUAL_EXPORT_ENABLE(dual_export); > > + db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); This cleanup for DB_SHADER_CONTROL could be in a separate patch. > if (db_shader_control != rctx->db_shader_control) { > struct r600_pipe_state rstate; > > diff --git a/src/gallium/drivers/r600/r600_texture.c > b/src/gallium/drivers/r600/r600_texture.c > index d16c252..0837458 100644 > --- a/src/gallium/drivers/r600/r600_texture.c > +++ b/src/gallium/drivers/r600/r600_texture.c > @@ -471,6 +471,10 @@ static void r600_texture_destroy(struct pipe_screen > *screen, > if (rtex->stencil) > pipe_resource_reference((struct pipe_resource > **)&rtex->stencil, NULL); > > + if (rtex->hyperz) { > + pipe_resource_reference((struct pipe_resource > **)&rtex->hyperz, NULL); > + } > + > pb_reference(&resource->buf, NULL); > FREE(rtex); > } > @@ -487,6 +491,59 @@ static const struct u_resource_vtbl r600_texture_vtbl = > NULL /* transfer_inline_write */ > }; > > +static void r600_htile_settings(struct r600_screen *rscreen, > + struct r600_resource_texture *zbuf, > + struct radeon_surface *hsurface) > +{ > + unsigned max_pixels_per_db; > + const unsigned k = 1024; > + unsigned npix_x, npix_y; > + > + npix_x = hsurface->npix_x >> 5; > + npix_y = hsurface->npix_y >> 5; > + npix_x = npix_x ? npix_x - 1 : 0; > + npix_y = npix_y ? npix_y - 1 : 0; > + max_pixels_per_db = (hsurface->npix_x * hsurface->npix_y * > rscreen->info.r600_num_backends); > + max_pixels_per_db /= (rscreen->info.r600_num_tile_pipes * 2); > + zbuf->db_prefetch_limit = (hsurface->npix_y / 8); > + zbuf->db_prefetch_limit = zbuf->db_prefetch_limit ? > zbuf->db_prefetch_limit - 1 : 0; > + zbuf->db_preload_control = S_028D30_START_X(0) | S_028D30_START_Y(0) | > + S_028D30_MAX_X(npix_x) | > + S_028D30_MAX_Y(npix_y); > + /* force htile to always 8x8 as there is bug with 4x4, 4x8 or 8x4 > configuration */ > + zbuf->db_htile_surface = S_028D24_HTILE_WIDTH(1) | > S_028D24_HTILE_HEIGHT(1); > + /* Preload should be set once when binding the surface, an not set > after > + * however given that we have issue with flushing just don't set it at > + * all, decrease lockup likelyhood afaict > + */ > +#if 0 > + zbuf->db_htile_surface |= S_028D24_PRELOAD(1); > +#endif > + > + if (max_pixels_per_db <= 64 * k) { > + zbuf->db_htile_surface |= S_028D24_LINEAR(1); > + } else if (max_pixels_per_db <= 512 * k) { > + zbuf->db_htile_surface |= S_028D24_LINEAR(1); > + zbuf->db_htile_surface |= S_028D24_FULL_CACHE(1); > + } else { > + zbuf->db_htile_surface |= S_028D24_FULL_CACHE(1); > + if (hsurface->npix_x <= 512) { > + zbuf->db_htile_surface |= S_028D24_PREFETCH_WIDTH(16); > + zbuf->db_htile_surface |= S_028D24_PREFETCH_HEIGHT(4); > + } else if (hsurface->npix_x <= 1024) { > + zbuf->db_htile_surface |= S_028D24_PREFETCH_WIDTH(16); > + zbuf->db_htile_surface |= S_028D24_PREFETCH_HEIGHT(2); > + } else { > + zbuf->db_htile_surface |= S_028D24_PREFETCH_WIDTH(16); > + zbuf->db_htile_surface |= S_028D24_PREFETCH_HEIGHT(0); > + } > + /* r6xx, r7xx have issue with preload window, don't use it */ > + if (rscreen->family >= CHIP_CEDAR) { > + zbuf->db_htile_surface |= > S_028D24_HTILE_USES_PRELOAD_WIN(1); > + } > + } > +} > + > static struct r600_resource_texture * > r600_texture_create_object(struct pipe_screen *screen, > const struct pipe_resource *base, > @@ -513,6 +570,7 @@ r600_texture_create_object(struct pipe_screen *screen, > resource->b.b.screen = screen; > rtex->pitch_override = pitch_in_bytes_override; > rtex->real_format = base->format; > + rtex->hyperz = NULL; > > /* We must split depth and stencil into two separate buffers on > Evergreen. */ > if ((base->bind & PIPE_BIND_DEPTH_STENCIL) && > @@ -574,6 +632,51 @@ r600_texture_create_object(struct pipe_screen *screen, > } > } > > + if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) && The allocation of the htile surface should probably be skipped for R600_RESOURCE_FLAG_FLUSHED_DEPTH too (the flag is new in Mesa master). Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev