Since apps typically begin rendering with a call to glClear(), it is likely that when brw_workaround_depthstencil_alignment() moves a miplevel to a temporary buffer, it can avoid doing a blit, since the contents of the miplevel are about to be erased.
This patch adds the necessary plumbing to determine when brw_workaround_depthstencil_alignment() is being called as a consequence of glClear(), and avoids the unnecessary blit when it is safe to do so. Reviewed-by: Chad Versace <chad.vers...@linux.intel.com> Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> v2: Eliminate unnecessary call to _mesa_is_depthstencil_format(). Fix handling of depth buffer in depth/stencil format. --- src/mesa/drivers/dri/i965/brw_clear.c | 4 +++- src/mesa/drivers/dri/i965/brw_context.h | 3 ++- src/mesa/drivers/dri/i965/brw_draw.c | 2 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 24 +++++++++++++++++++----- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/dri/intel/intel_fbo.c | 10 ++++++++-- src/mesa/drivers/dri/intel/intel_fbo.h | 3 ++- 7 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index cde1a06..e740f65 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -223,6 +223,8 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) { struct brw_context *brw = brw_context(ctx); struct intel_context *intel = &brw->intel; + struct gl_framebuffer *fb = ctx->DrawBuffer; + bool partial_clear = ctx->Scissor.Enabled && !noop_scissor(ctx, fb); if (!_mesa_check_conditional_render(ctx)) return; @@ -232,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) } intel_prepare_render(intel); - brw_workaround_depthstencil_alignment(brw); + brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask); if (mask & BUFFER_BIT_DEPTH) { if (brw_fast_clear_depth(ctx)) { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d2e2ade..8069567 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1130,7 +1130,8 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt, struct intel_mipmap_tree *stencil_mt, uint32_t *out_tile_mask_x, uint32_t *out_tile_mask_y); -void brw_workaround_depthstencil_alignment(struct brw_context *brw); +void brw_workaround_depthstencil_alignment(struct brw_context *brw, + GLbitfield clear_mask); /*====================================================================== * brw_queryobj.c diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index e408185..809bcc5 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -439,7 +439,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx, /* This workaround has to happen outside of brw_upload_state() because it * may flush the batchbuffer for a blit, affecting the state flags. */ - brw_workaround_depthstencil_alignment(brw); + brw_workaround_depthstencil_alignment(brw, 0); /* Resolves must occur after updating renderbuffers, updating context state, * and finalizing textures but before setting up any hardware state for diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1024c42..c0d6243 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -41,6 +41,7 @@ #include "brw_defines.h" #include "main/fbobject.h" +#include "main/glformats.h" /* Constant single cliprect for framebuffer object or DRI2 drawing */ static void upload_drawing_rect(struct brw_context *brw) @@ -328,7 +329,8 @@ get_stencil_miptree(struct intel_renderbuffer *irb) } void -brw_workaround_depthstencil_alignment(struct brw_context *brw) +brw_workaround_depthstencil_alignment(struct brw_context *brw, + GLbitfield clear_mask) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; @@ -341,10 +343,22 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; uint32_t stencil_draw_x = 0, stencil_draw_y = 0; + bool invalidate_depth = clear_mask & GL_DEPTH_BUFFER_BIT; + bool invalidate_stencil = clear_mask & GL_STENCIL_BUFFER_BIT; if (depth_irb) depth_mt = depth_irb->mt; + if (depth_irb && invalidate_depth + && _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL) { + /* Depth buffer is in depth/stencil format, so it's only safe to + * invalidate it if we're also clearing stencil, and both depth_irb and + * stencil_irb point to the same miptree. + */ + invalidate_depth = invalidate_stencil && depth_irb && stencil_irb + && depth_irb->mt == stencil_irb->mt; + } + uint32_t tile_mask_x, tile_mask_y; brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, &tile_mask_x, &tile_mask_y); @@ -373,8 +387,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) perf_debug("HW workaround: blitting depth level %d to a temporary " "to fix alignment (depth tile offset %d,%d)\n", depth_irb->mt_level, tile_x, tile_y); - - intel_renderbuffer_move_to_temp(intel, depth_irb); + intel_renderbuffer_move_to_temp(intel, depth_irb, invalidate_depth); /* In the case of stencil_irb being the same packed depth/stencil * texture but not the same rb, make it point at our rebased mt, too. */ @@ -435,7 +448,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) "to fix alignment (stencil tile offset %d,%d)\n", stencil_irb->mt_level, stencil_tile_x, stencil_tile_y); - intel_renderbuffer_move_to_temp(intel, stencil_irb); + intel_renderbuffer_move_to_temp(intel, stencil_irb, invalidate_stencil); stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, @@ -459,7 +472,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) tile_x, tile_y, stencil_tile_x, stencil_tile_y); - intel_renderbuffer_move_to_temp(intel, depth_irb); + intel_renderbuffer_move_to_temp(intel, depth_irb, + invalidate_depth); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 7979487..2d722d2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1224,7 +1224,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, * select the image. So, instead, we just make a new single-level * miptree and render into that. */ - intel_renderbuffer_move_to_temp(intel, irb); + intel_renderbuffer_move_to_temp(intel, irb, false); mt = irb->mt; } } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 7186978..410c393 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -991,7 +991,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel, void intel_renderbuffer_move_to_temp(struct intel_context *intel, - struct intel_renderbuffer *irb) + struct intel_renderbuffer *irb, + bool invalidate) { struct intel_texture_image *intel_image = intel_texture_image(irb->tex_image); @@ -1009,7 +1010,12 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel, irb->mt->num_samples, false /* force_y_tiling */); - intel_miptree_copy_teximage(intel, intel_image, new_mt); + /* If the invalidate flag is set, we don't need to blit the data across + * because it is about to be overwritten. + */ + if (!invalidate) + intel_miptree_copy_teximage(intel, intel_image, new_mt); + intel_miptree_reference(&irb->mt, intel_image->mt); intel_renderbuffer_set_draw_offset(irb); intel_miptree_release(&new_mt); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index ce744bf..9313c35 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -198,7 +198,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel, struct intel_renderbuffer *irb); void intel_renderbuffer_move_to_temp(struct intel_context *intel, - struct intel_renderbuffer *irb); + struct intel_renderbuffer *irb, + bool invalidate); unsigned intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples); -- 1.8.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev