On Fri, Jun 16, 2017 at 03:41:27PM -0700, Jason Ekstrand wrote: > This commit replaces the complex and confusing set of disable flags with > two fairly straightforward fields which describe the intended auxiliary > surface usage and whether or not the miptree supports fast clears. > Right now, supports_fast_clear can be entirely derived from aux_usage > but that will not always be the case. > > This commit makes functional changes. One of these changes is that it > re-enables multisampled fast-clears which were accidentally disabled in > cec30a666930ddb8476a9452a89364a24979ff62 around a year ago. It should > also enable CCS_E for window-system buffers which are Y-tiled. They > will still get a full resolve like CCS_D but we will at least get some > of the advantage of compression. > --- > src/mesa/drivers/dri/i965/brw_blorp.c | 4 +- > src/mesa/drivers/dri/i965/intel_fbo.c | 2 +- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190 > +++++++++++++------------- > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 43 +++--- > 4 files changed, 120 insertions(+), 119 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > index 00092ee..9bd25f0 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > @@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw, struct > gl_framebuffer *fb, > if (set_write_disables(irb, ctx->Color.ColorMask[buf], > color_write_disable)) > can_fast_clear = false; > > - if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS || > + if (!irb->mt->supports_fast_clear || > !brw_is_color_fast_clear_compatible(brw, irb->mt, > &ctx->Color.ClearColor)) > can_fast_clear = false; > > @@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw, struct > gl_framebuffer *fb, > */ > if (!irb->mt->mcs_buf) { > assert(!intel_miptree_is_lossless_compressed(brw, irb->mt)); > - if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) { > + if (!intel_miptree_alloc_ccs(brw, irb->mt)) { > /* MCS allocation failed--probably this will only happen in > * out-of-memory conditions. But in any case, try to recover > * by falling back to a non-blorp clear technique. > diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c > b/src/mesa/drivers/dri/i965/intel_fbo.c > index ee4aba9..6a64bcb 100644 > --- a/src/mesa/drivers/dri/i965/intel_fbo.c > +++ b/src/mesa/drivers/dri/i965/intel_fbo.c > @@ -555,7 +555,7 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw, > > intel_renderbuffer_set_draw_offset(irb); > > - if (intel_miptree_wants_hiz_buffer(brw, mt)) { > + if (mt->aux_usage == ISL_AUX_USAGE_HIZ && !mt->hiz_buf) { > intel_miptree_alloc_hiz(brw, mt); > if (!mt->hiz_buf) > return false; > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 0f6d542..101317f 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -64,7 +64,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw, > */ > static enum intel_msaa_layout > compute_msaa_layout(struct brw_context *brw, mesa_format format, > - enum intel_aux_disable aux_disable) > + uint32_t layout_flags) > { > /* Prior to Gen7, all MSAA surfaces used IMS layout. */ > if (brw->gen < 7) > @@ -90,7 +90,7 @@ compute_msaa_layout(struct brw_context *brw, mesa_format > format, > */ > if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) { > return INTEL_MSAA_LAYOUT_UMS; > - } else if (aux_disable & INTEL_AUX_DISABLE_MCS) { > + } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) { > /* We can't use the CMS layout because it uses an aux buffer, the > MCS > * buffer. So fallback to UMS, which is identical to CMS without the > * MCS. */ > @@ -148,9 +148,6 @@ intel_miptree_supports_ccs(struct brw_context *brw, > if (brw->gen < 7) > return false; > > - if (mt->aux_disable & INTEL_AUX_DISABLE_MCS) > - return false; > - > /* This function applies only to non-multisampled render targets. */ > if (mt->num_samples > 1) > return false; > @@ -215,6 +212,26 @@ intel_miptree_supports_ccs(struct brw_context *brw, > return true; > } > > +static bool > +intel_miptree_supports_hiz(struct brw_context *brw, > + struct intel_mipmap_tree *mt) > +{ > + if (!brw->has_hiz) > + return false; > + > + switch (mt->format) { > + case MESA_FORMAT_Z_FLOAT32: > + case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: > + case MESA_FORMAT_Z24_UNORM_X8_UINT: > + case MESA_FORMAT_Z24_UNORM_S8_UINT: > + case MESA_FORMAT_Z_UNORM16: > + return true; > + default: > + return false; > + } > +} > + > + > /* On Gen9 support for color buffer compression was extended to single > * sampled surfaces. This is a helper considering both auxiliary buffer > * type and number of samples telling if the given miptree represents > @@ -320,10 +337,9 @@ intel_miptree_create_layout(struct brw_context *brw, > mt->logical_width0 = width0; > mt->logical_height0 = height0; > mt->logical_depth0 = depth0; > - mt->aux_disable = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0 ? > - INTEL_AUX_DISABLE_ALL : INTEL_AUX_DISABLE_NONE; > - mt->aux_disable |= INTEL_AUX_DISABLE_CCS; > mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0; > + mt->aux_usage = ISL_AUX_USAGE_NONE; > + mt->supports_fast_clear = false; > mt->aux_state = NULL; > mt->cpp = _mesa_get_format_bytes(format); > mt->num_samples = num_samples; > @@ -337,7 +353,7 @@ intel_miptree_create_layout(struct brw_context *brw, > int depth_multiply = 1; > if (num_samples > 1) { > /* Adjust width/height/depth for MSAA */ > - mt->msaa_layout = compute_msaa_layout(brw, format, mt->aux_disable); > + mt->msaa_layout = compute_msaa_layout(brw, format, layout_flags); > if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) { > /* From the Ivybridge PRM, Volume 1, Part 1, page 108: > * "If the surface is multisampled and it is a depth or stencil > @@ -460,8 +476,7 @@ intel_miptree_create_layout(struct brw_context *brw, > if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) && > _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && > (brw->must_use_separate_stencil || > - (brw->has_separate_stencil && > - intel_miptree_wants_hiz_buffer(brw, mt)))) { > + (brw->has_separate_stencil && intel_miptree_supports_hiz(brw, mt)))) { > uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; > if (brw->gen == 6) { > stencil_flags |= MIPTREE_LAYOUT_TILING_ANY; > @@ -530,14 +545,44 @@ intel_miptree_create_layout(struct brw_context *brw, > return NULL; > } > > - if (mt->aux_disable & INTEL_AUX_DISABLE_MCS) > - assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); > - > return mt; > } > > > /** > + * Choose the aux usage for this miptree. This function must be called > fairly > + * late in the miptree create process after we have a tiling. > + */ > +static void > +intel_miptree_choose_aux_usage(struct brw_context *brw, > + struct intel_mipmap_tree *mt) > +{ > + assert(mt->aux_usage == ISL_AUX_USAGE_NONE); > + > + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { > + mt->aux_usage = ISL_AUX_USAGE_MCS; > + } else if (intel_tiling_supports_ccs(brw, mt->tiling) && > + intel_miptree_supports_ccs(brw, mt)) { > + if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) && > + brw->gen >= 9 && !mt->is_scanout &&
In the commit message you said that this patch enables CCS_E for winsys buffers. I don't see how that happens as we check for is_scanout here (which gets set by intel_miptree_create_layout() by intel_update_winsys_renderbuffer_miptree() passing MIPTREE_LAYOUT_FOR_SCANOUT). Other than that this looks good: Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > + intel_miptree_supports_ccs_e(brw, mt)) { > + mt->aux_usage = ISL_AUX_USAGE_CCS_E; > + } else { > + mt->aux_usage = ISL_AUX_USAGE_CCS_D; > + } > + } else if (intel_miptree_supports_hiz(brw, mt)) { > + mt->aux_usage = ISL_AUX_USAGE_HIZ; > + } > + > + /* We can do fast-clear on all auxiliary surface types that are > + * allocated through the normal texture creation paths. > + */ > + if (mt->aux_usage != ISL_AUX_USAGE_NONE) > + mt->supports_fast_clear = true; > +} > + > + > +/** > * Choose an appropriate uncompressed format for a requested > * compressed format, if unsupported. > */ > @@ -670,6 +715,9 @@ miptree_create(struct brw_context *brw, > if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) > mt->bo->cache_coherent = false; > > + if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)) > + intel_miptree_choose_aux_usage(brw, mt); > + > return mt; > } > > @@ -726,29 +774,14 @@ intel_miptree_create(struct brw_context *brw, > } > } > > - /* If this miptree is capable of supporting fast color clears, set > - * fast_clear_state appropriately to ensure that fast clears will occur. > - * Allocation of the MCS miptree will be deferred until the first fast > - * clear actually occurs or when compressed single sampled buffer is > - * written by the GPU for the first time. > + /* Since CCS_E can compress more than just clear color, we create the CCS > + * for it up-front. For CCS_D which only compresses clears, we create the > + * CCS on-demand when a clear occurs that wants one. > */ > - if (intel_tiling_supports_ccs(brw, mt->tiling) && > - intel_miptree_supports_ccs(brw, mt)) { > - mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS; > - assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1); > - > - /* On Gen9+ clients are not currently capable of consuming compressed > - * single-sampled buffers. Disabling compression allows us to skip > - * resolves. > - */ > - const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC; > - const bool is_lossless_compressed = > - unlikely(!lossless_compression_disabled) && > - brw->gen >= 9 && !mt->is_scanout && > - intel_miptree_supports_ccs_e(brw, mt); > - > - if (is_lossless_compressed) { > - intel_miptree_alloc_ccs(brw, mt, is_lossless_compressed); > + if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) { > + if (!intel_miptree_alloc_ccs(brw, mt)) { > + intel_miptree_release(&mt); > + return NULL; > } > } > > @@ -805,6 +838,21 @@ intel_miptree_create_for_bo(struct brw_context *brw, > mt->offset = offset; > mt->tiling = tiling; > > + if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)) { > + intel_miptree_choose_aux_usage(brw, mt); > + > + /* Since CCS_E can compress more than just clear color, we create the > + * CCS for it up-front. For CCS_D which only compresses clears, we > + * create the CCS on-demand when a clear occurs that wants one. > + */ > + if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) { > + if (!intel_miptree_alloc_ccs(brw, mt)) { > + intel_miptree_release(&mt); > + return NULL; > + } > + } > + } > + > return mt; > } > > @@ -849,16 +897,6 @@ intel_update_winsys_renderbuffer_miptree(struct > brw_context *intel, > if (!singlesample_mt) > goto fail; > > - /* If this miptree is capable of supporting fast color clears, set > - * mcs_state appropriately to ensure that fast clears will occur. > - * Allocation of the MCS miptree will be deferred until the first fast > - * clear actually occurs. > - */ > - if (intel_tiling_supports_ccs(intel, singlesample_mt->tiling) && > - intel_miptree_supports_ccs(intel, singlesample_mt)) { > - singlesample_mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS; > - } > - > if (num_samples == 0) { > intel_miptree_release(&irb->mt); > irb->mt = singlesample_mt; > @@ -913,7 +951,7 @@ intel_miptree_create_for_renderbuffer(struct brw_context > *brw, > if (!mt) > goto fail; > > - if (intel_miptree_wants_hiz_buffer(brw, mt)) { > + if (mt->aux_usage == ISL_AUX_USAGE_HIZ) { > ok = intel_miptree_alloc_hiz(brw, mt); > if (!ok) > goto fail; > @@ -1492,7 +1530,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw, > { > assert(brw->gen >= 7); /* MCS only used on Gen7+ */ > assert(mt->mcs_buf == NULL); > - assert((mt->aux_disable & INTEL_AUX_DISABLE_MCS) == 0); > + assert(mt->aux_usage == ISL_AUX_USAGE_MCS); > > /* Choose the correct format for the MCS buffer. All that really matters > * is that we allocate the right buffer size, since we'll always be > @@ -1551,11 +1589,11 @@ intel_miptree_alloc_mcs(struct brw_context *brw, > > bool > intel_miptree_alloc_ccs(struct brw_context *brw, > - struct intel_mipmap_tree *mt, > - bool is_ccs_e) > + struct intel_mipmap_tree *mt) > { > assert(mt->mcs_buf == NULL); > - assert(!(mt->aux_disable & (INTEL_AUX_DISABLE_MCS | > INTEL_AUX_DISABLE_CCS))); > + assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E || > + mt->aux_usage == ISL_AUX_USAGE_CCS_D); > > struct isl_surf temp_main_surf; > struct isl_surf temp_ccs_surf; > @@ -1590,7 +1628,8 @@ intel_miptree_alloc_ccs(struct brw_context *brw, > * not use the gpu access flag which can cause an unnecessary delay if the > * backing pages happened to be just used by the GPU. > */ > - const uint32_t alloc_flags = is_ccs_e ? 0 : BO_ALLOC_FOR_RENDER; > + const uint32_t alloc_flags = > + mt->aux_usage == ISL_AUX_USAGE_CCS_E ? 0 : BO_ALLOC_FOR_RENDER; > > buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "ccs-miptree", buf->size, > I915_TILING_Y, buf->pitch, alloc_flags); > @@ -1607,7 +1646,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw, > * used for lossless compression which requires similar initialisation > * as multi-sample compression. > */ > - if (is_ccs_e) { > + if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) { > /* Hardware sets the auxiliary buffer to all zeroes when it does full > * resolve. Initialize it accordingly in case the first renderer is > * cpu (or other none compression aware party). > @@ -1868,36 +1907,11 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, > } > > bool > -intel_miptree_wants_hiz_buffer(struct brw_context *brw, > - struct intel_mipmap_tree *mt) > -{ > - if (!brw->has_hiz) > - return false; > - > - if (mt->hiz_buf != NULL) > - return false; > - > - if (mt->aux_disable & INTEL_AUX_DISABLE_HIZ) > - return false; > - > - switch (mt->format) { > - case MESA_FORMAT_Z_FLOAT32: > - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: > - case MESA_FORMAT_Z24_UNORM_X8_UINT: > - case MESA_FORMAT_Z24_UNORM_S8_UINT: > - case MESA_FORMAT_Z_UNORM16: > - return true; > - default: > - return false; > - } > -} > - > -bool > intel_miptree_alloc_hiz(struct brw_context *brw, > struct intel_mipmap_tree *mt) > { > assert(mt->hiz_buf == NULL); > - assert((mt->aux_disable & INTEL_AUX_DISABLE_HIZ) == 0); > + assert(mt->aux_usage == ISL_AUX_USAGE_HIZ); > > enum isl_aux_state **aux_state = > create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID); > @@ -2016,7 +2030,7 @@ intel_miptree_check_color_resolve(const struct > brw_context *brw, > unsigned level, unsigned layer) > { > > - if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) || !mt->mcs_buf) > + if (!mt->mcs_buf) > return; > > /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */ > @@ -2645,7 +2659,6 @@ intel_miptree_make_shareable(struct brw_context *brw, > 0, INTEL_REMAINING_LAYERS, false, false); > > if (mt->mcs_buf) { > - mt->aux_disable |= (INTEL_AUX_DISABLE_CCS | INTEL_AUX_DISABLE_MCS); > brw_bo_unreference(mt->mcs_buf->bo); > free(mt->mcs_buf); > mt->mcs_buf = NULL; > @@ -2659,7 +2672,6 @@ intel_miptree_make_shareable(struct brw_context *brw, > } > > if (mt->hiz_buf) { > - mt->aux_disable |= INTEL_AUX_DISABLE_HIZ; > intel_miptree_hiz_buffer_free(mt->hiz_buf); > mt->hiz_buf = NULL; > > @@ -2674,6 +2686,8 @@ intel_miptree_make_shareable(struct brw_context *brw, > free(mt->aux_state); > mt->aux_state = NULL; > } > + > + mt->aux_usage = ISL_AUX_USAGE_NONE; > } > > > @@ -3716,17 +3730,7 @@ intel_miptree_get_aux_isl_surf(struct brw_context *brw, > aux_pitch = mt->mcs_buf->pitch; > aux_qpitch = mt->mcs_buf->qpitch; > > - if (mt->num_samples > 1) { > - assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS); > - *usage = ISL_AUX_USAGE_MCS; > - } else if (intel_miptree_is_lossless_compressed(brw, mt)) { > - assert(brw->gen >= 9); > - *usage = ISL_AUX_USAGE_CCS_E; > - } else if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) == 0) { > - *usage = ISL_AUX_USAGE_CCS_D; > - } else { > - unreachable("Invalid MCS miptree"); > - } > + *usage = mt->aux_usage; > } else if (mt->hiz_buf) { > aux_pitch = mt->hiz_buf->aux_base.pitch; > aux_qpitch = mt->hiz_buf->aux_base.qpitch; > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > index aa33967..f34be9a 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > @@ -287,16 +287,6 @@ enum miptree_array_layout { > GEN6_HIZ_STENCIL, > }; > > -enum intel_aux_disable { > - INTEL_AUX_DISABLE_NONE = 0, > - INTEL_AUX_DISABLE_HIZ = 1 << 1, > - INTEL_AUX_DISABLE_MCS = 1 << 2, > - INTEL_AUX_DISABLE_CCS = 1 << 3, > - INTEL_AUX_DISABLE_ALL = INTEL_AUX_DISABLE_HIZ | > - INTEL_AUX_DISABLE_MCS | > - INTEL_AUX_DISABLE_CCS > -}; > - > /** > * Miptree aux buffer. These buffers are associated with a miptree, but the > * format is managed by the hardware. > @@ -576,6 +566,25 @@ struct intel_mipmap_tree > struct intel_miptree_hiz_buffer *hiz_buf; > > /** > + * \brief The type of auxiliary compression used by this miptree. > + * > + * This describes the type of auxiliary compression that is intended to be > + * used by this miptree. An aux usage of ISL_AUX_USAGE_NONE means that > + * auxiliary compression is permanently disabled. An aux usage other than > + * ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has > actually > + * been allocated nor does it imply that auxiliary compression will always > + * be enabled for this surface. For instance, with CCS_D, we may allocate > + * the CCS on-the-fly and it may not be used for texturing if the miptree > + * is fully resolved. > + */ > + enum isl_aux_usage aux_usage; > + > + /** > + * \brief Whether or not this miptree supports fast clears. > + */ > + bool supports_fast_clear; > + > + /** > * \brief Maps miptree slices to their current aux state > * > * This two-dimensional array is indexed as [level][layer] and stores an > @@ -631,13 +640,6 @@ struct intel_mipmap_tree > union isl_color_value fast_clear_color; > > /** > - * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS > - * buffer. This is useful for sharing the miptree bo with an external > client > - * that doesn't understand auxiliary buffers. > - */ > - enum intel_aux_disable aux_disable; > - > - /** > * Tells if the underlying buffer is to be also consumed by entities other > * than the driver. This allows logic to turn off features such as > lossless > * compression which is not currently understood by client applications. > @@ -655,8 +657,7 @@ intel_miptree_is_lossless_compressed(const struct > brw_context *brw, > > bool > intel_miptree_alloc_ccs(struct brw_context *brw, > - struct intel_mipmap_tree *mt, > - bool is_ccs_e); > + struct intel_mipmap_tree *mt); > > enum { > MIPTREE_LAYOUT_ACCELERATED_UPLOAD = 1 << 0, > @@ -814,10 +815,6 @@ intel_miptree_copy_teximage(struct brw_context *brw, > * functions on a miptree without HiZ. In that case, each function is a > no-op. > */ > > -bool > -intel_miptree_wants_hiz_buffer(struct brw_context *brw, > - struct intel_mipmap_tree *mt); > - > /** > * \brief Allocate the miptree's embedded HiZ miptree. > * \see intel_mipmap_tree:hiz_mt > -- > 2.5.0.400.gff86faf > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev