On Sun, Jul 29, 2012 at 1:50 PM, Marek Olšák <mar...@gmail.com> wrote: > On Tue, Jul 17, 2012 at 7:58 PM, <j.gli...@gmail.com> wrote: >> From: Jerome Glisse <jgli...@redhat.com> >> >> htile is used for HiZ and HiS support and fast Z/S clears. >> This commit just adds the htile setup and Fast Z clear. >> We don't take full advantage of HiS with that patch. >> >> v2 really use fast clear, still random issue with some tiles >> need to try more flush combination, fix depth/stencil >> texture decompression >> v3 fix random issue on r6xx/r7xx >> v4 rebase on top of lastest mesa, disable CB export when clearing >> htile surface to avoid wasting bandwidth >> v5 resummarize htile surface when uploading z value. Fix z/stencil >> decompression, the custom blitter with custom dsa is no longer >> needed. >> v6 Reorganize render control/override update mecanism, fixing more >> issues in the process. >> v7 Add nop after depth surface base update to work around some htile >> flushing issue. For htile to 8x8 on r6xx/r7xx as other combination >> have issue. Do not enable hyperz when flushing/uncompressing >> depth buffer. >> v8 Fix htile surface, preload and prefetch setup. Only set preload >> and prefetch on htile surface clear like fglrx. Record depth >> clear value per level. Support several level for the htile >> surface. First depth clear can't be a fast clear. >> v9 Fix comments, properly account new register in emit function, >> disable fast zclear if clearing different layer of texture >> array to different value >> >> Signed-off-by: Pierre-Eric Pelloux-Prayer <pell...@gmail.com> >> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> >> Signed-off-by: Jerome Glisse <jgli...@redhat.com> >> --- >> src/gallium/drivers/r600/evergreen_hw_context.c | 6 + >> src/gallium/drivers/r600/evergreen_state.c | 102 ++++++++++++----- >> src/gallium/drivers/r600/evergreend.h | 4 + >> src/gallium/drivers/r600/r600_blit.c | 38 +++++++ >> src/gallium/drivers/r600/r600_hw_context.c | 25 +++++ >> src/gallium/drivers/r600/r600_pipe.c | 8 ++ >> src/gallium/drivers/r600/r600_pipe.h | 13 ++- >> src/gallium/drivers/r600/r600_resource.h | 7 ++ >> src/gallium/drivers/r600/r600_state.c | 133 >> ++++++++++++++++++++--- >> src/gallium/drivers/r600/r600_texture.c | 103 ++++++++++++++++++ >> src/gallium/drivers/r600/r600d.h | 6 + >> 11 files changed, 399 insertions(+), 46 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c >> b/src/gallium/drivers/r600/evergreen_hw_context.c >> index 081701f..546c884 100644 >> --- a/src/gallium/drivers/r600/evergreen_hw_context.c >> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c >> @@ -62,6 +62,9 @@ static const struct r600_reg evergreen_context_reg_list[] >> = { >> {GROUP_FORCE_NEW_BLOCK, 0, 0}, >> {R_028058_DB_DEPTH_SIZE, 0, 0}, >> {R_02805C_DB_DEPTH_SLICE, 0, 0}, >> + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, >> + {R_028ABC_DB_HTILE_SURFACE, 0, 0}, >> + {R_028AC8_DB_PRELOAD_CONTROL, 0, 0}, >> {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, >> {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, >> {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, >> @@ -319,6 +322,9 @@ static const struct r600_reg cayman_context_reg_list[] = >> { >> {GROUP_FORCE_NEW_BLOCK, 0, 0}, >> {R_028058_DB_DEPTH_SIZE, 0, 0}, >> {R_02805C_DB_DEPTH_SLICE, 0, 0}, >> + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, >> + {R_028ABC_DB_HTILE_SURFACE, 0, 0}, >> + {R_028AC8_DB_PRELOAD_CONTROL, 0, 0}, >> {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, >> {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, >> {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, >> diff --git a/src/gallium/drivers/r600/evergreen_state.c >> b/src/gallium/drivers/r600/evergreen_state.c >> index a66387b..214d76b 100644 >> --- a/src/gallium/drivers/r600/evergreen_state.c >> +++ b/src/gallium/drivers/r600/evergreen_state.c >> @@ -710,13 +710,15 @@ static void *evergreen_create_blend_state(struct >> pipe_context *ctx, >> } >> blend->cb_target_mask = target_mask; >> >> - if (target_mask) >> + if (target_mask) { >> color_control |= S_028808_MODE(V_028808_CB_NORMAL); >> - else >> + } else { >> color_control |= S_028808_MODE(V_028808_CB_DISABLE); >> + } >> >> r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, >> color_control); >> + >> /* only have dual source on MRT0 */ >> blend->dual_src_blend = util_blend_state_is_dual(state, 0); >> for (int i = 0; i < 8; i++) { >> @@ -1668,6 +1670,26 @@ static void evergreen_db(struct r600_context *rctx, >> struct r600_pipe_state *rsta >> } >> } >> >> + /* hyperz */ >> + if (rtex->hyperz) { >> + uint64_t htile_offset = >> rtex->hyperz->surface.level[level].offset; >> + >> + rctx->db_misc_state.hyperz = true; >> + rctx->db_misc_state.db_htile_surface_mask = 0xffffffff; >> + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); >> + z_info |= S_028040_TILE_SURFACE_ENABLE(1); >> + r600_pipe_state_add_reg_bo(rstate, >> R_028014_DB_HTILE_DATA_BASE, >> + htile_offset >> 8, >> &rtex->hyperz->resource, >> + RADEON_USAGE_READWRITE); >> + /* FORCE_OFF means HiZ/HiS are determined by >> DB_SHADER_CONTROL */ >> + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, >> rtex->db_preload_control); >> + } else { >> + if (rctx->db_misc_state.hyperz) { >> + rctx->db_misc_state.hyperz = FALSE; >> + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); >> + } >> + } >> + >> r600_pipe_state_add_reg_bo(rstate, R_028040_DB_Z_INFO, z_info, >> &rtex->resource, RADEON_USAGE_READWRITE); >> r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, >> @@ -1747,13 +1769,39 @@ static void evergreen_emit_db_misc_state(struct >> r600_context *rctx, struct r600_ >> { >> struct radeon_winsys_cs *cs = rctx->cs; >> struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; >> + unsigned db_render_override = 0; >> unsigned db_render_control = 0; >> unsigned db_count_control = 0; >> - unsigned db_render_override = >> - S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | >> - S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | >> - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); >> - >> + unsigned cliprect_rule = 0xffff; >> + unsigned db_htile_surface = 0; >> + struct r600_resource_texture *rtex; >> + float depth_clear = 0.0; >> + >> + if (a->hyperz && rctx->framebuffer.zsbuf) { >> + rtex = (struct >> r600_resource_texture*)rctx->framebuffer.zsbuf->texture; >> + >> + db_htile_surface = rtex->db_htile_surface; >> + db_htile_surface &= >> rctx->db_misc_state.db_htile_surface_mask; >> + /* further htile surface without preload */ >> + rctx->db_misc_state.db_htile_surface_mask = >> S_028ABC_HTILE_WIDTH(1) | >> + >> S_028ABC_HTILE_HEIGHT(1) | >> + >> S_028ABC_LINEAR(1) | >> + >> S_028ABC_FULL_CACHE(1); >> + } >> + >> + db_render_override = >> S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | >> + >> S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); >> + if (a->hyperz && !a->flush_depthstencil_through_cb) { >> + /* FORCE_OFF means HiZ/HiS are determined by >> DB_SHADER_CONTROL */ >> + db_render_override |= >> S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF); >> + if (a->clear_depthstencil) { >> + db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1); >> + /* need to disable cliprect for fast clear */ >> + cliprect_rule = 0; >> + } >> + } else { >> + db_render_override |= >> S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE); >> + } >> if (a->occlusion_query_enabled) { >> db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1); >> db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); >> @@ -1764,10 +1812,21 @@ static void evergreen_emit_db_misc_state(struct >> r600_context *rctx, struct r600_ >> S_028000_COPY_CENTROID(1); >> } >> >> + if (rctx->framebuffer.zsbuf) { >> + unsigned level = rctx->framebuffer.zsbuf->u.tex.level; >> + >> + rtex = (struct >> r600_resource_texture*)rctx->framebuffer.zsbuf->texture; >> + depth_clear = rtex->depth_clear_value[level]; >> + } >> + r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, >> fui(depth_clear)); >> r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); >> - r600_write_value(cs, db_render_control); /* >> R_028000_DB_RENDER_CONTROL */ >> - r600_write_value(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL >> */ >> + /* R_028000_DB_RENDER_CONTROL */ >> + r600_write_value(cs, db_render_control); >> + /* R_028004_DB_COUNT_CONTROL */ >> + r600_write_value(cs, db_count_control); >> r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, >> db_render_override); >> + r600_write_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, >> cliprect_rule); >> + r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, >> db_htile_surface); >> } >> >> static void evergreen_emit_vertex_buffers(struct r600_context *rctx, >> @@ -1911,7 +1970,7 @@ void evergreen_init_state_functions(struct >> r600_context *rctx) >> { >> r600_init_atom(&rctx->cb_misc_state.atom, >> evergreen_emit_cb_misc_state, 0, 0); >> r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); >> - r600_init_atom(&rctx->db_misc_state.atom, >> evergreen_emit_db_misc_state, 7, 0); >> + r600_init_atom(&rctx->db_misc_state.atom, >> evergreen_emit_db_misc_state, 16, 0); >> r600_atom_dirty(rctx, &rctx->db_misc_state.atom); >> r600_init_atom(&rctx->vertex_buffer_state.atom, >> evergreen_fs_emit_vertex_buffers, 0, 0); >> r600_init_atom(&rctx->cs_vertex_buffer_state.atom, >> evergreen_cs_emit_vertex_buffers, 0, 0); >> @@ -2065,19 +2124,15 @@ static void cayman_init_atom_start_cs(struct >> r600_context *rctx) >> r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */ >> r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */ >> >> - r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); >> - r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ >> - r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ >> + r600_store_context_reg_seq(cb, R_009830_DB_DEBUG, 3); >> + r600_store_value(cb, 0); /* R_009830_DB_DEBUG */ >> + r600_store_value(cb, 0); /* R_009834_DB_DEBUG2 */ >> + r600_store_value(cb, 0); /* R_009838_DB_DEBUG3 */ >> + r600_store_config_reg(cb, R_009854_DB_WATERMARKS, 0x00420204); >> >> r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); >> >> - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, >> 3); >> - r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ >> - r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ >> - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ >> - >> r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); >> - r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); >> >> r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); >> r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ >> @@ -2552,7 +2607,6 @@ void evergreen_init_atom_start_cs(struct r600_context >> *rctx) >> r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ >> >> r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); >> - r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); >> r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); >> >> r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); >> @@ -2563,10 +2617,9 @@ void evergreen_init_atom_start_cs(struct r600_context >> *rctx) >> r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F); >> r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); >> >> - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, >> 3); >> + r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, >> 2); >> r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ >> r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ >> - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ >> >> r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00); >> >> @@ -2666,7 +2719,7 @@ void evergreen_pipe_shader_ps(struct pipe_context >> *ctx, struct r600_pipe_shader >> >> rstate->nregs = 0; >> >> - db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); >> + db_shader_control = 0; >> for (i = 0; i < rshader->ninput; i++) { >> /* evergreen NUM_INTERP only contains values interpolated >> into the LDS, >> POSITION goes via GPRs from the SC so isn't counted */ >> @@ -2875,14 +2928,13 @@ void evergreen_update_dual_export_state(struct >> r600_context * rctx) >> { >> unsigned dual_export = rctx->export_16bpc && rctx->nr_cbufs && >> !rctx->ps_shader->current->ps_depth_export; >> - >> unsigned db_source_format = dual_export ? V_02880C_EXPORT_DB_TWO : >> V_02880C_EXPORT_DB_FULL; >> - >> unsigned db_shader_control = >> rctx->ps_shader->current->db_shader_control | >> S_02880C_DUAL_EXPORT_ENABLE(dual_export) | >> S_02880C_DB_SOURCE_FORMAT(db_source_format); >> >> + db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); >> if (db_shader_control != rctx->db_shader_control) { >> struct r600_pipe_state rstate; >> >> diff --git a/src/gallium/drivers/r600/evergreend.h >> b/src/gallium/drivers/r600/evergreend.h >> index 6c4873c..1ac5944 100644 >> --- a/src/gallium/drivers/r600/evergreend.h >> +++ b/src/gallium/drivers/r600/evergreend.h >> @@ -1589,6 +1589,10 @@ >> #define S_028008_SLICE_MAX(x) (((x) & 0x7FF) << 13) >> #define G_028008_SLICE_MAX(x) (((x) >> 13) & 0x7FF) >> #define C_028008_SLICE_MAX 0xFF001FFF >> +#define R_009830_DB_DEBUG 0x00009830 >> +#define R_009834_DB_DEBUG2 0x00009834 >> +#define R_009838_DB_DEBUG3 0x00009838 >> +#define R_009854_DB_WATERMARKS 0x00009854 >> #define R_02800C_DB_RENDER_OVERRIDE 0x0002800C >> #define V_02800C_FORCE_OFF 0 >> #define V_02800C_FORCE_ENABLE 1 >> diff --git a/src/gallium/drivers/r600/r600_blit.c >> b/src/gallium/drivers/r600/r600_blit.c >> index 98f8b84..9c01f6d 100644 >> --- a/src/gallium/drivers/r600/r600_blit.c >> +++ b/src/gallium/drivers/r600/r600_blit.c >> @@ -24,6 +24,7 @@ >> #include "util/u_surface.h" >> #include "util/u_blitter.h" >> #include "util/u_format.h" >> +#include "r600d.h" >> >> enum r600_blitter_op /* bitmask */ >> { >> @@ -220,11 +221,48 @@ static void r600_clear(struct pipe_context *ctx, >> unsigned buffers, >> struct r600_context *rctx = (struct r600_context *)ctx; >> struct pipe_framebuffer_state *fb = &rctx->framebuffer; >> >> + /* if hyperz enabled just clear hyperz */ >> + if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTHSTENCIL)) { >> + struct r600_resource_texture *rtex; >> + unsigned level = fb->zsbuf->u.tex.level; >> + unsigned flayer = fb->zsbuf->u.tex.first_layer; >> + unsigned llayer = fb->zsbuf->u.tex.last_layer; >> + >> + rtex = (struct r600_resource_texture*)fb->zsbuf->texture; >> + rtex->depth_clear_value[level] = depth; >> + >> + /* enable fast clear only if clearing all layer in texture >> array >> + * to same value, we can't use hyperz fast clear if hyperz >> have >> + * different value. >> + */ >> + if (rtex->hyperz && rtex->surface.array_size > 1 && !flayer >> && llayer == rtex->surface.array_size) { >> + if (buffers & PIPE_CLEAR_DEPTH) { >> + rctx->db_misc_state.hyperz = true; >> + if (rtex->htile_initialized[level]) { >> + >> rctx->db_misc_state.clear_depthstencil = true; >> + } else { >> + rtex->htile_initialized[level] = >> true; >> + >> rctx->db_misc_state.db_htile_surface_mask = S_028D24_HTILE_WIDTH(1) | >> + >> S_028D24_HTILE_HEIGHT(1) | >> + >> S_028D24_LINEAR(1) | >> + >> S_028D24_FULL_CACHE(1); >> + } >> + r600_atom_dirty(rctx, >> &rctx->db_misc_state.atom); >> + } >> + } >> + } >> + >> r600_blitter_begin(ctx, R600_CLEAR); >> util_blitter_clear(rctx->blitter, fb->width, fb->height, >> fb->nr_cbufs, buffers, fb->nr_cbufs ? >> fb->cbufs[0]->format : PIPE_FORMAT_NONE, >> color, depth, stencil); >> r600_blitter_end(ctx); >> + >> + /* disable fast clear */ >> + if (rctx->db_misc_state.clear_depthstencil) { >> + rctx->db_misc_state.clear_depthstencil = false; >> + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); >> + } >> } >> >> static void r600_clear_render_target(struct pipe_context *ctx, >> diff --git a/src/gallium/drivers/r600/r600_hw_context.c >> b/src/gallium/drivers/r600/r600_hw_context.c >> index e80f39c..4f2c03a 100644 >> --- a/src/gallium/drivers/r600/r600_hw_context.c >> +++ b/src/gallium/drivers/r600/r600_hw_context.c >> @@ -180,6 +180,27 @@ static void r600_init_block(struct r600_context *ctx, >> (ctx->family < CHIP_RV770) && reg[i+j].flags & >> REG_FLAG_RV6XX_SBU) { >> block->pm4[block->pm4_ndwords++] = >> PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); >> block->pm4[block->pm4_ndwords++] = >> reg[i+j].sbu_flags; >> + if (reg[i+j].sbu_flags & SURFACE_BASE_UPDATE_DEPTH) { >> + /* to work around flushing issue in htile >> surface */ >> + block->pm4[block->pm4_ndwords++] = >> PKT3(PKT3_NOP, 16, 0); >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + block->pm4[block->pm4_ndwords++] = >> 0xcafedead; >> + } >> } >> } >> /* check that we stay in limit */ >> @@ -364,7 +385,11 @@ static const struct r600_reg r600_context_reg_list[] = { >> {GROUP_FORCE_NEW_BLOCK, 0, 0}, >> {R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0}, >> {R_028A6C_VGT_GS_OUT_PRIM_TYPE, 0, 0}, >> + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, >> + {GROUP_FORCE_NEW_BLOCK, 0, 0}, >> + {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0}, >> {R_028D24_DB_HTILE_SURFACE, 0, 0}, >> + {R_028D30_DB_PRELOAD_CONTROL, 0, 0}, >> {R_028D34_DB_PREFETCH_LIMIT, 0, 0}, >> {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, >> {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, >> diff --git a/src/gallium/drivers/r600/r600_pipe.c >> b/src/gallium/drivers/r600/r600_pipe.c >> index a143381..9f20560 100644 >> --- a/src/gallium/drivers/r600/r600_pipe.c >> +++ b/src/gallium/drivers/r600/r600_pipe.c >> @@ -951,6 +951,14 @@ struct pipe_screen *r600_screen_create(struct >> radeon_winsys *ws) >> pipe_mutex_init(rscreen->fences.mutex); >> >> rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", >> TRUE); >> + /* Hyperz leads to lockup on r6xx/r7xx and evergreen, due to this >> instabilities >> + * don't enable this by default until we can figure out how to do it >> properly >> + * >> + * You can trigger lockup easily with : >> + * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8 >> + * run it in a loop, it will lockup often on first run >> + */ >> + rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE); >> >> rscreen->global_pool = compute_memory_pool_new(rscreen); >> >> diff --git a/src/gallium/drivers/r600/r600_pipe.h >> b/src/gallium/drivers/r600/r600_pipe.h >> index 6449a4d..5ff4d51 100644 >> --- a/src/gallium/drivers/r600/r600_pipe.h >> +++ b/src/gallium/drivers/r600/r600_pipe.h >> @@ -77,9 +77,13 @@ struct r600_surface_sync_cmd { >> }; >> >> struct r600_db_misc_state { >> - struct r600_atom atom; >> - bool occlusion_query_enabled; >> - bool flush_depthstencil_through_cb; >> + struct r600_atom atom; >> + unsigned db_htile_surface_mask; >> + bool occlusion_query_enabled; >> + bool flush_depthstencil_through_cb; >> + bool clear_depthstencil; >> + bool hyperz; >> + bool resummarize; >> }; >> >> struct r600_cb_misc_state { >> @@ -143,6 +147,7 @@ struct r600_screen { >> struct r600_pipe_fences fences; >> >> bool use_surface_alloc; >> + bool use_hyperz; >> >> /*for compute global memory binding, we allocate stuff here, instead >> of >> * buffers. >> @@ -181,7 +186,7 @@ struct r600_pipe_dsa { >> unsigned alpha_ref; >> ubyte valuemask[2]; >> ubyte writemask[2]; >> - unsigned sx_alpha_test_control; >> + unsigned sx_alpha_test_control; >> }; >> >> struct r600_vertex_element >> diff --git a/src/gallium/drivers/r600/r600_resource.h >> b/src/gallium/drivers/r600/r600_resource.h >> index a7570c7..59ec025 100644 >> --- a/src/gallium/drivers/r600/r600_resource.h >> +++ b/src/gallium/drivers/r600/r600_resource.h >> @@ -64,6 +64,13 @@ struct r600_resource_texture { >> struct r600_resource_texture *flushed_depth_texture; >> boolean is_flushing_texture; >> struct radeon_surface surface; >> + unsigned db_prefetch_limit; >> + unsigned db_htile_surface; >> + unsigned db_preload_control; >> + struct r600_resource_texture *hyperz; >> + float >> depth_clear_value[PIPE_MAX_TEXTURE_LEVELS]; >> + /* first depth clear initialize the htile buffer */ >> + bool >> htile_initialized[PIPE_MAX_TEXTURE_LEVELS]; >> }; >> >> #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != >> V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != >> V_038000_ARRAY_LINEAR_ALIGNED) >> diff --git a/src/gallium/drivers/r600/r600_state.c >> b/src/gallium/drivers/r600/r600_state.c >> index 3d5835c..8925a23 100644 >> --- a/src/gallium/drivers/r600/r600_state.c >> +++ b/src/gallium/drivers/r600/r600_state.c >> @@ -1581,6 +1581,7 @@ static void r600_db(struct r600_context *rctx, struct >> r600_pipe_state *rstate, >> struct r600_resource_texture *rtex; >> struct r600_surface *surf; >> unsigned level, pitch, slice, format, offset, array_mode; >> + unsigned db_depth_info; >> >> if (state->zsbuf == NULL) >> return; >> @@ -1625,6 +1626,27 @@ static void r600_db(struct r600_context *rctx, struct >> r600_pipe_state *rstate, >> >> format = r600_translate_dbformat(state->zsbuf->format); >> assert(format != ~0); >> + db_depth_info = S_028010_ARRAY_MODE(array_mode) | >> S_028010_FORMAT(format); >> + >> + /* hyperz */ >> + if (rtex->hyperz) { >> + uint64_t htile_offset = >> rtex->hyperz->surface.level[level].offset; >> + >> + rctx->db_misc_state.hyperz = true; >> + rctx->db_misc_state.db_htile_surface_mask = 0xffffffff; >> + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); >> + db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1); >> + r600_pipe_state_add_reg_bo(rstate, >> R_028014_DB_HTILE_DATA_BASE, >> + htile_offset >> 8, >> &rtex->hyperz->resource, >> + RADEON_USAGE_READWRITE); >> + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, >> rtex->db_preload_control); >> + r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, >> rtex->db_prefetch_limit); >> + } else { >> + if (rctx->db_misc_state.hyperz) { >> + rctx->db_misc_state.hyperz = FALSE; >> + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); >> + } >> + } >> >> r600_pipe_state_add_reg_bo(rstate, R_02800C_DB_DEPTH_BASE, >> offset >> 8, &rtex->resource, >> RADEON_USAGE_READWRITE); >> @@ -1638,8 +1660,8 @@ static void r600_db(struct r600_context *rctx, struct >> r600_pipe_state *rstate, >> >> S_028004_SLICE_MAX(state->zsbuf->u.tex.last_layer)); >> } >> r600_pipe_state_add_reg_bo(rstate, R_028010_DB_DEPTH_INFO, >> - S_028010_ARRAY_MODE(array_mode) | >> S_028010_FORMAT(format), >> - &rtex->resource, RADEON_USAGE_READWRITE); >> + db_depth_info, >> + &rtex->resource, RADEON_USAGE_READWRITE); >> r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, >> (surf->aligned_height / 8) - 1); > > I was doing something different and wanted to look how you set > DB_PREFETCH_LIMIT and then I noticed this. No matter how you set it > for HyperZ, it's always rewritten here. I think the second call to > r600_pipe_state_add_reg with R_028D34_DB_PREFETCH_LIMIT should be > removed or at least be done conditionally (not sure if that's needed). > > One more thing. It looks like DB_PREFETCH_LIMIT and DB_PRELOAD_CONTROL > should be different for each mipmap level. > > Marek
Yeah another left over from various try, anyway it's not the issue, and yes preload/prefetch should be per mip level too. Cheers, Jerome _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev