On Wed, Nov 23, 2016 at 01:10:59PM -0800, Jason Ekstrand wrote: > On Wed, Nov 23, 2016 at 1:16 AM, Topi Pohjolainen > <[1]topi.pohjolai...@gmail.com> wrote: > > One can now also delete intel_get_non_msrt_mcs_alignment(). > Signed-off-by: Topi Pohjolainen <[2]topi.pohjolai...@intel.com> > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 138 > +++++++------------------- > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 - > 2 files changed, 38 insertions(+), 104 deletions(-) > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index a4a7ee0..9428e7b 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -101,66 +101,6 @@ compute_msaa_layout(struct brw_context *brw, > mesa_format format, > } > } > - > -/** > - * For single-sampled render targets ("non-MSRT"), the MCS buffer > is a > - * scaled-down bitfield representation of the color buffer which is > capable of > - * recording when blocks of the color buffer are equal to the clear > value. > - * This function returns the block size that will be used by the > MCS buffer > - * corresponding to a certain color miptree. > - * > - * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render > Target(s)", > - * beneath the "Fast Color Clear" bullet (p327): > - * > - * The following table describes the RT alignment > - * > - * Pixels Lines > - * TiledY RT CL > - * bpp > - * 32 8 4 > - * 64 4 4 > - * 128 2 4 > - * TiledX RT CL > - * bpp > - * 32 16 2 > - * 64 8 2 > - * 128 4 2 > - * > - * This alignment has the following uses: > - * > - * - For figuring out the size of the MCS buffer. Each 4k tile in > the MCS > - * buffer contains 128 blocks horizontally and 256 blocks > vertically. > - * > - * - For figuring out alignment restrictions for a fast clear > operation. Fast > - * clear operations must always clear aligned multiples of 16 > blocks > - * horizontally and 32 blocks vertically. > - * > - * - For scaling down the coordinates sent through the render > pipeline during > - * a fast clear. X coordinates must be scaled down by 8 times > the block > - * width, and Y coordinates by 16 times the block height. > - * > - * - For scaling down the coordinates sent through the render > pipeline during > - * a "Render Target Resolve" operation. X coordinates must be > scaled down > - * by half the block width, and Y coordinates by half the block > height. > - */ > -void > -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree > *mt, > - unsigned *width_px, unsigned > *height) > -{ > - switch (mt->tiling) { > - default: > - unreachable("Non-MSRT MCS requires X or Y tiling"); > - /* In release builds, fall through */ > - case I915_TILING_Y: > - *width_px = 32 / mt->cpp; > - *height = 4; > - break; > - case I915_TILING_X: > - *width_px = 64 / mt->cpp; > - *height = 2; > - } > -} > - > bool > intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, > unsigned tiling) > @@ -1654,55 +1594,53 @@ intel_miptree_alloc_non_msrt_mcs(struct > brw_context *brw, > assert(!mt->disable_aux_buffers); > assert(!mt->no_ccs); > - /* The format of the MCS buffer is opaque to the driver; all > that matters > - * is that we get its size and pitch right. We'll pretend that > the format > - * is R32. Since an MCS tile covers 128 blocks horizontally, > and a Y-tiled > - * R32 buffer is 32 pixels across, we'll need to scale the width > down by > - * the block width and then a further factor of 4. Since an MCS > tile > - * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 > rows high, > - * we'll need to scale the height down by the block height and > then a > - * further factor of 8. > - */ > - const mesa_format format = MESA_FORMAT_R_UINT32; > - unsigned block_width_px; > - unsigned block_height; > - intel_get_non_msrt_mcs_alignment(mt, &block_width_px, > &block_height); > - unsigned width_divisor = block_width_px * 4; > - unsigned height_divisor = block_height * 8; > - > - /* The Skylake MCS is twice as tall as the Broadwell MCS. > - * > - * In pre-Skylake, each bit in the MCS contained the state of 2 > cachelines > - * in the main surface. In Skylake, it's two bits. The extra > bit > - * doubles the MCS height, not width, because in Skylake the MCS > is always > - * Y-tiled. > + struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); > + if (!buf) > + return false; > + > + struct isl_surf temp_main_surf; > + struct isl_surf temp_ccs_surf; > + > + /* Create first an ISL presentation for the main color surface > and let ISL > + * calculate equivalent CCS surface against it. > */ > - if (brw->gen >= 9) > - height_divisor /= 2; > + intel_miptree_get_isl_surf(brw, mt, &temp_main_surf); > + if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf, > &temp_ccs_surf)) > + return false; > > You're leaking the aux_buffer here. Maybe move this to before > allocating the aux_buffer?
Auts. Good catch. Allocation can actually be moved below just after the assert. > > - unsigned mcs_width = > - ALIGN(mt->logical_width0, width_divisor) / width_divisor; > - unsigned mcs_height = > - ALIGN(mt->logical_height0, height_divisor) / height_divisor; > - assert(mt->logical_depth0 == 1); > + assert(temp_ccs_surf.size && > + (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0)); > + > + buf->size = temp_ccs_surf.size; > + buf->pitch = temp_ccs_surf.row_pitch; > + buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf); > - uint32_t layout_flags = > - (brw->gen >= 8) ? MIPTREE_LAYOUT_FORCE_HALIGN16 : 0; > /* In case of compression mcs buffer needs to be initialised > requiring the > * buffer to be immediately mapped to cpu space for writing. > Therefore do > * not use the gpu access flag which can cause an unnecessary > delay if the > * backing pages happened to be just used by the GPU. > */ > - if (!is_lossless_compressed) > - layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD; > - > - mt->mcs_buf = intel_mcs_miptree_buf_create(brw, mt, > - format, > - mcs_width, > - mcs_height, > - layout_flags); > - if (!mt->mcs_buf) > + const uint32_t alloc_flags = > + is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER; > + uint32_t tiling = I915_TILING_Y; > + unsigned long pitch; > + > + /* ISL has stricter set of alignment rules then the drm > allocator. > + * Therefore one can pass the ISL dimensions in terms of bytes > instead of > + * trying to recalculate based on different format block sizes. > + */ > + buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree", > + buf->pitch, buf->size / > buf->pitch, > + 1, &tiling, &pitch, > alloc_flags); > + if (buf->bo) { > + assert(pitch == buf->pitch); > + assert(tiling == I915_TILING_Y); > + } else { > + free(buf); > return false; > + } > + > + mt->mcs_buf = buf; > /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers > are > * used for lossless compression which requires similar > initialisation > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > index 51ab664..38c00f8 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > @@ -656,10 +656,6 @@ struct intel_mipmap_tree > GLuint refcount; > }; > -void > -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree > *mt, > - unsigned *width_px, unsigned > *height); > - > bool > intel_miptree_is_lossless_compressed(const struct brw_context *brw, > const struct intel_mipmap_tree > *mt); > -- > 2.5.5 > _______________________________________________ > mesa-dev mailing list > [3]mesa-dev@lists.freedesktop.org > [4]https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > References > > 1. mailto:topi.pohjolai...@gmail.com > 2. mailto:topi.pohjolai...@intel.com > 3. mailto:mesa-dev@lists.freedesktop.org > 4. https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev