--- src/intel/blorp/blorp_clear.c | 199 +++++++++++++++++++----------------------- 1 file changed, 88 insertions(+), 111 deletions(-)
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 17d47a1..8d729a2 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -193,104 +193,7 @@ get_fast_clear_rect(const struct isl_device *dev, /* Only single sampled surfaces need to (and actually can) be resolved. */ if (aux_surf->usage == ISL_SURF_USAGE_CCS_BIT) { - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render - * Target(s)", beneath the "Fast Color Clear" bullet (p327): - * - * Clear pass must have a clear rectangle that must follow - * alignment rules in terms of pixels and lines as shown in the - * table below. Further, the clear-rectangle height and width - * must be multiple of the following dimensions. If the height - * and width of the render target being cleared do not meet these - * requirements, an MCS buffer can be created such that it - * follows the requirement and covers the RT. - * - * The alignment size in the table that follows is related to the - * alignment size that is baked into the CCS surface format but with X - * alignment multiplied by 16 and Y alignment multiplied by 32. - */ - x_align = isl_format_get_layout(aux_surf->format)->bw; - y_align = isl_format_get_layout(aux_surf->format)->bh; - - x_align *= 16; - - /* SKL+ line alignment requirement for Y-tiled are half those of the prior - * generations. - */ - if (dev->info->gen >= 9) - y_align *= 16; - else - y_align *= 32; - - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render - * Target(s)", beneath the "Fast Color Clear" bullet (p327): - * - * In order to optimize the performance MCS buffer (when bound to - * 1X RT) clear similarly to MCS buffer clear for MSRT case, - * clear rect is required to be scaled by the following factors - * in the horizontal and vertical directions: - * - * The X and Y scale down factors in the table that follows are each - * equal to half the alignment value computed above. - */ - x_scaledown = x_align / 2; - y_scaledown = y_align / 2; - - if (ISL_DEV_IS_HASWELL(dev)) { - /* The following text was added in the Haswell PRM, "3D Media GPGPU - * Engine" >> "MCS Buffer for Render Target(s)" >> Table "Color Clear - * of Non-MultiSampler Render Target Restrictions": - * - * "Clear rectangle must be aligned to two times the number of - * pixels in the table shown below due to 16X16 hashing across the - * slice." - * - * It has persisted in the documentation for all platforms up until - * Cannonlake and possibly even beyond. However, we believe that it - * is only needed on Haswell. - * - * There are a couple possible explanations for this restriction: - * - * 1) If you assume that the hardware is writing to the CCS as - * bytes, then the x/y_align computed above gives you an alignment - * in the CCS of 8x8 bytes and, if 16x16 is needed for hashing, we - * need to multiply by 2. - * - * 2) Haswell is a bit unique in that it's CCS tiling does not line - * up with Y-tiling on a cache-line granularity. Instead, it has - * an extra bit of swizzling in bit 9. Also, bit 6 swizzling - * applies to the CCS on Haswell. This means that Haswell CTS - * does not match on a cache-line granularity but it does match on - * a 2x2 cache line granularity. - * - * Clearly, the first explanation seems to follow documentation the - * best but they may be related. In any case, empirical evidence - * seems to confirm that it is, indeed required on Haswell. - * - * On Broadwell things get a bit stickier. Broadwell adds support - * for mip-mapped CCS with an alignment in the CCS of 256x128. For a - * 32bpb main surface, the above computation will yield a x/y_align - * of 128x128 for a Y-tiled main surface and 256x64 for X-tiled. In - * either case, if we double the alignment, we will get an alignment - * bigger than horizontal and vertical alignment of the CCS and fast - * clears of one LOD may leak into others. - * - * Starting with Skylake, the image alignment for the CCS is only - * 128x64 which is exactly the x/h_align computed above if the main - * surface has a 32bpb format. Also, the "Render Target Resolve" - * page in the bspec (not the PRM) says, "The Resolve Rectangle size - * is same as Clear Rectangle size from SKL+". The x/y_align - * computed above (without doubling) match the resolve rectangle - * calculation perfectly. - * - * Finally, to confirm all this, a full test run was performed on - * Feb. 9, 2018 with this doubling removed and the only platform - * which seemed to be affected was Haswell. The run consisted of - * piglit, dEQP, the Vulkan CTS 1.0.2, the OpenGL 4.5 CTS, and the - * OpenGL ES 3.2 CTS. - */ - x_align *= 2; - y_align *= 2; - } + unreachable("This function only supports MCS fast-clear"); } else { assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT); @@ -826,14 +729,8 @@ blorp_ccs_op(struct blorp_batch *batch, enum isl_format format, enum isl_aux_op ccs_op) { - if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { - blorp_fast_clear(batch, surf, format, level, start_layer, num_layers, - 0, 0, - minify(surf->surf->logical_level0_px.w, level), - minify(surf->surf->logical_level0_px.h, level)); - return; - } else if (ISL_DEV_GEN(batch->blorp->isl_dev) < 10 && - ccs_op == ISL_AUX_OP_AMBIGUATE) { + if (ISL_DEV_GEN(batch->blorp->isl_dev) < 10 && + ccs_op == ISL_AUX_OP_AMBIGUATE) { /* Prior to Cannonlake, the ambiguate is not available as a hardware * operation. Instead, we have to fake it by carefully binding the CCS * as a render target and clearing it to 0. We leave that complicated @@ -863,6 +760,11 @@ blorp_ccs_op(struct blorp_batch *batch, isl_format_get_layout(params.dst.aux_surf.format); assert(aux_fmtl->txc == ISL_TXC_CCS); + /* The PRM Sections entitled "Fast Color Clear" and "Render Target Resolve" + * contain tables for the scale down factor for fast clear and resolve + * rectangles. The values in those tables are easily computed from the + * CCS element block size. + */ unsigned x_scaledown, y_scaledown; if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) { x_scaledown = aux_fmtl->bw * 8; @@ -871,16 +773,91 @@ blorp_ccs_op(struct blorp_batch *batch, x_scaledown = aux_fmtl->bw * 8; y_scaledown = aux_fmtl->bh * 16; } else { - x_scaledown = aux_fmtl->bw / 2; - y_scaledown = aux_fmtl->bh / 2; + assert(ISL_DEV_GEN(batch->blorp->isl_dev) == 7); + if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { + x_scaledown = aux_fmtl->bw * 8; + y_scaledown = aux_fmtl->bh * 16; + } else { + x_scaledown = aux_fmtl->bw / 2; + y_scaledown = aux_fmtl->bh / 2; + } } + + unsigned x_align = x_scaledown; + unsigned y_align = y_scaledown; + if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { + /* The PRM Section entitled "Fast Color Clear" contains tables for the + * scaledown and alignment factors for fast clear operations. In all + * cases, the alignment table is 2x the scaledown table. + */ + x_align *= 2; + y_align *= 2; + + if (ISL_DEV_IS_HASWELL(batch->blorp->isl_dev)) { + /* The following text was added in the Haswell PRM, "3D Media GPGPU + * Engine" >> "MCS Buffer for Render Target(s)" >> Table "Color Clear + * of Non-MultiSampler Render Target Restrictions": + * + * "Clear rectangle must be aligned to two times the number of + * pixels in the table shown below due to 16X16 hashing across the + * slice." + * + * It has persisted in the documentation for all platforms up until + * Cannonlake and possibly even beyond. However, we believe that it + * is only needed on Haswell. + * + * There are a couple possible explanations for this restriction: + * + * 1) If you assume that the hardware is writing to the CCS as + * bytes, then the x/y_align computed above gives you an alignment + * in the CCS of 8x8 bytes and, if 16x16 is needed for hashing, we + * need to multiply by 2. + * + * 2) Haswell is a bit unique in that it's CCS tiling does not line + * up with Y-tiling on a cache-line granularity. Instead, it has + * an extra bit of swizzling in bit 9. Also, bit 6 swizzling + * applies to the CCS on Haswell. This means that Haswell CTS + * does not match on a cache-line granularity but it does match on + * a 2x2 cache line granularity. + * + * Clearly, the first explanation seems to follow documentation the + * best but they may be related. In any case, empirical evidence + * seems to confirm that it is, indeed required on Haswell. + * + * On Broadwell things get a bit stickier. Broadwell adds support + * for mip-mapped CCS with an alignment in the CCS of 256x128. For a + * 32bpb main surface, the above computation will yield a x/y_align + * of 128x128 for a Y-tiled main surface and 256x64 for X-tiled. In + * either case, if we double the alignment, we will get an alignment + * bigger than horizontal and vertical alignment of the CCS and fast + * clears of one LOD may leak into others. + * + * Starting with Skylake, the image alignment for the CCS is only + * 128x64 which is exactly the x/h_align computed above if the main + * surface has a 32bpb format. Also, the "Render Target Resolve" + * page in the bspec (not the PRM) says, "The Resolve Rectangle size + * is same as Clear Rectangle size from SKL+". The x/y_align + * computed above (without doubling) match the resolve rectangle + * calculation perfectly. + * + * Finally, to confirm all this, a full test run was performed on + * Feb. 9, 2018 with this doubling removed and the only platform + * which seemed to be affected was Haswell. The run consisted of + * piglit, dEQP, the Vulkan CTS 1.0.2, the OpenGL 4.5 CTS, and the + * OpenGL ES 3.2 CTS. + */ + x_align *= 2; + y_align *= 2; + } + } + params.x0 = params.y0 = 0; params.x1 = minify(params.dst.aux_surf.logical_level0_px.width, level); params.y1 = minify(params.dst.aux_surf.logical_level0_px.height, level); - params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown; - params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown; + params.x1 = ALIGN(params.x1, x_align) / x_scaledown; + params.y1 = ALIGN(params.y1, y_align) / y_scaledown; - assert(ccs_op != ISL_AUX_OP_FAST_CLEAR); + memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float)); params.fast_clear_op = ccs_op; params.num_layers = num_layers; -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev