Splitting intel_miptree_map() like so should help with the yuck factor. Though don't we also need to treat the stencil_mt to a similar treatment to avoid slow reads?
Note the map should really record what method intel_miptree_map() used so that is can be unwound correctly without chasing the same decision tree (too easy for mistakes to occur). --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 150 +++++++++++------- 1 file changed, 95 insertions(+), 55 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 89074a64930..1314547cc6c 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -49,6 +49,17 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE +static void __intel_miptree_map(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + struct intel_miptree_map *map); +static void __intel_miptree_unmap(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + struct intel_miptree_map *map); + static void *intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt, GLbitfield mode); @@ -3441,27 +3452,31 @@ intel_miptree_map_depthstencil(struct brw_context *brw, if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { uint32_t *packed_map = map->ptr; uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT); - uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT); unsigned int s_image_x, s_image_y; - unsigned int z_image_x, z_image_y; + + struct intel_miptree_map z_map = { + .mode = GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, + .x = map->x, + .y = map->y, + .w = map->w, + .h = map->h, + }; + __intel_miptree_map(brw, z_mt, level, slice, &z_map); intel_miptree_get_image_offset(s_mt, level, slice, &s_image_x, &s_image_y); - intel_miptree_get_image_offset(z_mt, level, slice, - &z_image_x, &z_image_y); for (uint32_t y = 0; y < map->h; y++) { + uint32_t *z_line = + (uint32_t *)((uint8_t *)z_map.ptr + z_map.stride * y); for (uint32_t x = 0; x < map->w; x++) { int map_x = map->x + x, map_y = map->y + y; ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch, map_x + s_image_x, map_y + s_image_y, brw->has_swizzling); - ptrdiff_t z_offset = ((map_y + z_image_y) * - (z_mt->surf.row_pitch / 4) + - (map_x + z_image_x)); uint8_t s = s_map[s_offset]; - uint32_t z = z_map[z_offset]; + uint32_t z = z_line[x]; if (map_z32f_x24s8) { packed_map[(y * map->w + x) * 2 + 0] = z; @@ -3472,13 +3487,13 @@ intel_miptree_map_depthstencil(struct brw_context *brw, } } + __intel_miptree_unmap(brw, z_mt, level, slice, &z_map); intel_miptree_unmap_raw(s_mt); - intel_miptree_unmap_raw(z_mt); DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, - z_mt, map->x + z_image_x, map->y + z_image_y, + z_mt, map->x, map->y, s_mt, map->x + s_image_x, map->y + s_image_y, map->ptr, map->stride); } else { @@ -3502,44 +3517,47 @@ intel_miptree_unmap_depthstencil(struct brw_context *brw, if (map->mode & GL_MAP_WRITE_BIT) { uint32_t *packed_map = map->ptr; uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT); - uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT); unsigned int s_image_x, s_image_y; - unsigned int z_image_x, z_image_y; intel_miptree_get_image_offset(s_mt, level, slice, &s_image_x, &s_image_y); - intel_miptree_get_image_offset(z_mt, level, slice, - &z_image_x, &z_image_y); + + struct intel_miptree_map z_map = { + .mode = GL_MAP_WRITE_BIT | BRW_MAP_DIRECT_BIT | GL_MAP_INVALIDATE_RANGE_BIT, + .x = map->x, + .y = map->y, + .w = map->w, + .h = map->h, + }; + __intel_miptree_map(brw, z_mt, level, slice, &z_map); for (uint32_t y = 0; y < map->h; y++) { + uint32_t *z_line = + (uint32_t *)((uint8_t *)z_map.ptr + z_map.stride * y); for (uint32_t x = 0; x < map->w; x++) { ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch, x + s_image_x + map->x, y + s_image_y + map->y, brw->has_swizzling); - ptrdiff_t z_offset = ((y + z_image_y + map->y) * - (z_mt->surf.row_pitch / 4) + - (x + z_image_x + map->x)); if (map_z32f_x24s8) { - z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; + z_line[x] = packed_map[(y * map->w + x) * 2 + 0]; s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1]; } else { uint32_t packed = packed_map[y * map->w + x]; s_map[s_offset] = packed >> 24; - z_map[z_offset] = packed; + z_line[x] = packed; } } } + __intel_miptree_unmap(brw, z_mt, level, slice, &z_map); intel_miptree_unmap_raw(s_mt); - intel_miptree_unmap_raw(z_mt); DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, - z_mt, _mesa_get_format_name(z_mt->format), - map->x + z_image_x, map->y + z_image_y, + z_mt, _mesa_get_format_name(z_mt->format), map->x, map->y, s_mt, map->x + s_image_x, map->y + s_image_y, map->ptr, map->stride); } @@ -3636,6 +3654,38 @@ use_intel_mipree_map_blit(struct brw_context *brw, return false; } +static void +__intel_miptree_map(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + struct intel_miptree_map *map) +{ + assert(mt->surf.samples == 1); + + intel_miptree_access_raw(brw, mt, level, slice, + map->mode & GL_MAP_WRITE_BIT); + + if (mt->format == MESA_FORMAT_S_UINT8) { + intel_miptree_map_s8(brw, mt, map, level, slice); + } else if (mt->etc_format != MESA_FORMAT_NONE && + !(map->mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_map_etc(brw, mt, map, level, slice); + } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_map_depthstencil(brw, mt, map, level, slice); + } else if (use_intel_mipree_map_blit(brw, mt, map->mode, level, slice)) { + intel_miptree_map_blit(brw, mt, map, level, slice); +#if defined(USE_SSE41) + } else if (!(map->mode & GL_MAP_WRITE_BIT) && + !mt->compressed && cpu_has_sse4_1 && + (mt->surf.row_pitch % 16 == 0)) { + intel_miptree_map_movntdqa(brw, mt, map, level, slice); +#endif + } else { + intel_miptree_map_gtt(brw, mt, map, level, slice); + } +} + /** * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may * exceed 32 bits but to diminish the likelihood subtle bugs in pointer @@ -3671,27 +3721,7 @@ intel_miptree_map(struct brw_context *brw, return; } - intel_miptree_access_raw(brw, mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - if (mt->format == MESA_FORMAT_S_UINT8) { - intel_miptree_map_s8(brw, mt, map, level, slice); - } else if (mt->etc_format != MESA_FORMAT_NONE && - !(mode & BRW_MAP_DIRECT_BIT)) { - intel_miptree_map_etc(brw, mt, map, level, slice); - } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { - intel_miptree_map_depthstencil(brw, mt, map, level, slice); - } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { - intel_miptree_map_blit(brw, mt, map, level, slice); -#if defined(USE_SSE41) - } else if (!(mode & GL_MAP_WRITE_BIT) && - !mt->compressed && cpu_has_sse4_1 && - (mt->surf.row_pitch % 16 == 0)) { - intel_miptree_map_movntdqa(brw, mt, map, level, slice); -#endif - } else { - intel_miptree_map_gtt(brw, mt, map, level, slice); - } + __intel_miptree_map(brw, mt, level, slice, map); *out_ptr = map->ptr; *out_stride = map->stride; @@ -3700,19 +3730,13 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_release_map(mt, level, slice); } -void -intel_miptree_unmap(struct brw_context *brw, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int slice) +static void +__intel_miptree_unmap(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + struct intel_miptree_map *map) { - struct intel_miptree_map *map = mt->level[level].slice[slice].map; - - assert(mt->surf.samples == 1); - - if (!map) - return; - DBG("%s: mt %p (%s) level %d slice %d\n", __func__, mt, _mesa_get_format_name(mt->format), level, slice); @@ -3732,6 +3756,22 @@ intel_miptree_unmap(struct brw_context *brw, } else { intel_miptree_unmap_gtt(mt); } +} + +void +intel_miptree_unmap(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map *map = mt->level[level].slice[slice].map; + + assert(mt->surf.samples == 1); + + if (!map) + return; + + __intel_miptree_unmap(brw, mt, level, slice, map); intel_miptree_release_map(mt, level, slice); } -- 2.17.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev