This patch will use a new calculation to determine if a surface can be blitted from or to. Previously, the "total_height" member was used. Total_height in the case of 2d, 3d, and cube map arrays is the height of each slice/layer/face. Since the GL map APIS only ever deal with a slice at a time however, the determining factor is really the height of one slice.
This patch also has a side effect of not needing to set potentially large texture objects to the CPU domain, which implies we do not need to clflush the entire objects. (See references below for a kernel patch to achieve the same thing) With both the Y-tiled surfaces, and the removal of excessive clflushes, this improves the terrain benchmark on Cherryview (data collected by Jordan) Difference at 95.0% confidence 17.9236 +/- 0.252116 153.005% +/- 2.1522% (Student's t, pooled s = 0.205889) With the performance governor, and HI-Z raw stall optimization, the improvement is even more stark on Braswell. Jordan was extremely helpful in creating this patch. Consider him co-author. References: http://patchwork.freedesktop.org/patch/38909/ Cc: Jordan Justen <jordan.l.jus...@intel.com> Signed-off-by: Ben Widawsky <b...@bwidawsk.net> --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 71 +++++++++++++++++++++------ 1 file changed, 56 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 639309b..1319f1e 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -86,6 +86,27 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target) } } +static uint32_t +compute_real_blit_height(struct intel_mipmap_tree *mt) +{ + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + assert(mt->logical_depth0); + return mt->qpitch; + case GL_TEXTURE_3D: + /* FIXME 3d textures don't have a qpitch. I think it's simply the tiled + * aligned mt->physical_height0. Since 3D textures aren't used often, just + * print the perf debug from the caller and bail + */ + /* fallthrough */ + default: + return mt->total_height; + } +} /** * For single-sampled render targets ("non-MSRT"), the MCS buffer is a @@ -416,6 +437,17 @@ intel_miptree_create_layout(struct brw_context *brw, return mt; } +static bool +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt) +{ + /* FIXME: Add 3d texture support */ + if (mt->target == GL_TEXTURE_3D && mt->total_height >= 32768) { + return true; + } + + return compute_real_blit_height(mt) >= 32768; +} + /** * \brief Helper function for intel_miptree_create(). */ @@ -473,10 +505,22 @@ intel_miptree_choose_tiling(struct brw_context *brw, if (minimum_pitch < 64) return I915_TILING_NONE; - if (ALIGN(minimum_pitch, 512) >= 32768 || - mt->total_width >= 32768 || mt->total_height >= 32768) { - perf_debug("%dx%d miptree too large to blit, falling back to untiled", - mt->total_width, mt->total_height); + if (ALIGN(minimum_pitch, 512) >= 32768 || miptree_exceeds_blit_height(mt)) { + if (mt->format == GL_TEXTURE_3D) { + perf_debug("Unsupported large 3D texture blit. " + "Falling back to untiled.\n"); + } else { + /* qpitch should always be greater than or equal to the tile aligned + * maximum of lod0 height. That is sufficient to determine if we can + * blit, but the most optimal value is potentially less. + */ + if (mt->physical_height0 < 32768) { + perf_debug("Potentially skipped a blittable buffers %d\n", + mt->physical_height0); + } + perf_debug("%dx%d miptree too large to blit, falling back to untiled", + mt->total_width, mt->total_height); + } return I915_TILING_NONE; } @@ -620,11 +664,14 @@ intel_miptree_create(struct brw_context *brw, BO_ALLOC_FOR_RENDER : 0)); mt->pitch = pitch; + uint32_t size = ALIGN(compute_real_blit_height(mt) * mt->pitch, 512); + assert(size <= mt->bo->size); + /* If the BO is too large to fit in the aperture, we need to use the * BLT engine to support it. The BLT paths can't currently handle Y-tiling, * so we need to fall back to X. */ - if (y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) { + if (y_or_x && size >= brw->max_gtt_map_object_size) { perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", mt->total_width, mt->total_height); @@ -1748,6 +1795,8 @@ intel_miptree_map_gtt(struct brw_context *brw, intptr_t x = map->x; intptr_t y = map->y; + assert(mt->bo->size < brw->max_gtt_map_object_size); + /* For compressed formats, the stride is the number of bytes per * row of blocks. intel_miptree_get_image_offset() already does * the divide. @@ -2247,16 +2296,8 @@ static bool can_blit_slice(struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice) { - uint32_t image_x; - uint32_t image_y; - intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); - if (image_x >= 32768 || image_y >= 32768) - return false; - - if (mt->pitch >= 32768) - return false; - - return true; + return compute_real_blit_height(mt) < 32768 && + mt->pitch < 32768; } /** -- 2.2.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev