On Thu, Jul 28, 2016 at 07:04:08AM -0700, Jason Ekstrand wrote: > On Jul 28, 2016 7:59 AM, "Pohjolainen, Topi" > <[1]topi.pohjolai...@intel.com> wrote: > > > > On Tue, Jul 26, 2016 at 03:02:20PM -0700, Jason Ekstrand wrote: > > > Since the dawn of time, blorp has used offsets directly to get at > different > > > mip levels and array slices of surfaces. This isn't really > necessary since > > > we can just use the base level/layer provided in the surface > state. While > > > it may have simplified blorp's original design, we haven't been > using the > > > blorp path for surface state on gen8 thanks to render compression > and > > > there's really no good need for it most of the time. This commit > restricts > > > such surface munging to the cases of fake W-tiling and fake > interleaved > > > multisampling. > > > --- > > > src/mesa/drivers/dri/i965/brw_blorp.c | 74 ++--------- > > > src/mesa/drivers/dri/i965/brw_blorp.h | 6 + > > > src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 185 > +++++++++++++++++++-------- > > > 3 files changed, 152 insertions(+), 113 deletions(-) > > > > > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > > > index 64e507a..215f765 100644 > > > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > > > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > > > @@ -90,7 +90,7 @@ get_image_offset_sa_gen6_stencil(const struct > isl_surf *surf, > > > *y_offset_sa = y; > > > } > > > > > > -static void > > > +void > > > > I noticed that this is now only used in brw_blorp_blit.cpp, should we > just > > move it there instead? > > It gets deleted entirely later. > > > > blorp_get_image_offset_sa(struct isl_device *dev, const struct > isl_surf *surf, > > > uint32_t level, uint32_t layer, > > > uint32_t *x_offset_sa, > > > @@ -100,60 +100,11 @@ blorp_get_image_offset_sa(struct isl_device > *dev, const struct isl_surf *surf, > > > get_image_offset_sa_gen6_stencil(surf, level, layer, > > > x_offset_sa, y_offset_sa); > > > } else { > > > - /* Using base_array_layer for Z in 3-D surfaces is a bit > abusive, but it > > > - * will go away soon enough. > > > - */ > > > - uint32_t z = 0; > > > - if (surf->dim == ISL_SURF_DIM_3D) { > > > - z = layer; > > > - layer = 0; > > > - } > > > - > > > - isl_surf_get_image_offset_sa(surf, level, layer, z, > > > + isl_surf_get_image_offset_sa(surf, level, layer, 0, > > > x_offset_sa, y_offset_sa); > > > } > > > } > > > > > > -static void > > > -surf_apply_level_layer_offsets(struct isl_device *dev, struct > isl_surf *surf, > > > - struct isl_view *view, uint32_t > *byte_offset, > > > - uint32_t *tile_x_sa, uint32_t > *tile_y_sa) > > > -{ > > > - /* This only makes sense for a single level and array slice */ > > > - assert(view->levels == 1 && view->array_len == 1); > > > - > > > - uint32_t x_offset_sa, y_offset_sa; > > > - blorp_get_image_offset_sa(dev, surf, view->base_level, > > > - view->base_array_layer, > > > - &x_offset_sa, &y_offset_sa); > > > - > > > - isl_tiling_get_intratile_offset_sa(dev, surf->tiling, > view->format, > > > - surf->row_pitch, > x_offset_sa, y_offset_sa, > > > - byte_offset, tile_x_sa, > tile_y_sa); > > > - > > > - /* Now that that's done, we have a very bare 2-D surface */ > > > - surf->dim = ISL_SURF_DIM_2D; > > > - surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D; > > > - > > > - surf->logical_level0_px.width = > > > - minify(surf->logical_level0_px.width, view->base_level); > > > - surf->logical_level0_px.height = > > > - minify(surf->logical_level0_px.height, view->base_level); > > > - surf->logical_level0_px.depth = 1; > > > - surf->logical_level0_px.array_len = 1; > > > - surf->levels = 1; > > > - > > > - /* Alignment doesn't matter since we have 1 miplevel and 1 > array slice so > > > - * just pick something that works for everybody. > > > - */ > > > - surf->image_alignment_el = isl_extent3d(4, 4, 1); > > > - > > > - /* TODO: surf->physcal_level0_extent_sa? */ > > > > I was wondering about this in the introducing patch... > > > > > - > > > - view->base_level = 0; > > > - view->base_array_layer = 0; > > > -} > > > - > > > void > > > brw_blorp_surface_info_init(struct brw_context *brw, > > > struct brw_blorp_surface_info *info, > > > @@ -191,8 +142,6 @@ brw_blorp_surface_info_init(struct brw_context > *brw, > > > .format = ISL_FORMAT_UNSUPPORTED, /* Set later */ > > > .base_level = level, > > > > This still prevents surf_convert_to_single_slice() from skipping the > munging, > > I think. Shouldn't we drop it also? > > I'm confused. If it las a level other than zero then we need the > munging. The only reason to skip is because there are cases where we > call convert_two_single_slice twice.
Right you are, my mistake, I got confused. > > > > .levels = 1, > > > - .base_array_layer = layer / layer_multiplier, > > > - .array_len = 1, > > > .channel_select = { > > > ISL_CHANNEL_SELECT_RED, > > > ISL_CHANNEL_SELECT_GREEN, > > > @@ -201,12 +150,21 @@ brw_blorp_surface_info_init(struct > brw_context *brw, > > > }, > > > }; > > > > > > - if (brw->gen >= 8 && !is_render_target && info->surf.dim == > ISL_SURF_DIM_3D) { > > > - /* On gen8+ we use actual 3-D textures so we need to pass > the layer > > > - * through to the sampler. > > > + if (!is_render_target && > > > + (info->surf.dim == ISL_SURF_DIM_3D || > > > + info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) { > > > + /* 3-D textures don't support base_array layer and neither > do 2-D > > > + * multisampled textures on IVB so we need to pass it > through the > > > + * sampler in those cases. These are also two cases where > we are > > > + * guaranteed that we won't be doing any funny surface > hacks. > > > */ > > > + info->view.base_array_layer = 0; > > > + info->view.array_len = > MAX2(info->surf.logical_level0_px.depth, > > > + > info->surf.logical_level0_px.array_len); > > > info->z_offset = layer / layer_multiplier; > > > } else { > > > + info->view.base_array_layer = layer / layer_multiplier; > > > + info->view.array_len = 1; > > > info->z_offset = 0; > > > } > > > > > > @@ -252,10 +210,6 @@ brw_blorp_surface_info_init(struct brw_context > *brw, > > > break; > > > } > > > } > > > - > > > - surf_apply_level_layer_offsets(&brw->isl_dev, &info->surf, > &info->view, > > > - &info->bo_offset, > > > - &info->tile_x_sa, > &info->tile_y_sa); > > > } > > > > > > > > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h > b/src/mesa/drivers/dri/i965/brw_blorp.h > > > index ec12dfe..706d53e 100644 > > > --- a/src/mesa/drivers/dri/i965/brw_blorp.h > > > +++ b/src/mesa/drivers/dri/i965/brw_blorp.h > > > @@ -338,6 +338,12 @@ brw_blorp_compile_nir_shader(struct > brw_context *brw, struct nir_shader *nir, > > > struct brw_blorp_prog_data > *prog_data, > > > unsigned *program_size); > > > > > > +void > > > +blorp_get_image_offset_sa(struct isl_device *dev, const struct > isl_surf *surf, > > > + uint32_t level, uint32_t layer, > > > + uint32_t *x_offset_sa, > > > + uint32_t *y_offset_sa); > > > + > > > uint32_t > > > brw_blorp_emit_surface_state(struct brw_context *brw, > > > const struct brw_blorp_surface_info > *surface, > > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > > > index a35cdb3..007c061 100644 > > > --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > > > +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > > > @@ -1586,6 +1586,115 @@ swizzle_to_scs(GLenum swizzle) > > > return (enum isl_channel_select)((swizzle + 4) & 7); > > > } > > > > > > +static void > > > +surf_convert_to_single_slice(struct brw_context *brw, > > > + struct brw_blorp_surface_info *info) > > > +{ > > > + /* This only makes sense for a single level and array slice */ > > > + assert(info->view.levels == 1 && info->view.array_len == 1); > > > + > > > + /* Just bail if we have nothing to do. */ > > > + if (info->surf.dim == ISL_SURF_DIM_2D && > > > + info->view.base_level == 0 && info->view.base_array_layer > == 0 && > > > + info->surf.levels == 0 && > info->surf.logical_level0_px.array_len == 0) > > > + return; > > > + > > > + uint32_t x_offset_sa, y_offset_sa; > > > + blorp_get_image_offset_sa(&brw->isl_dev, &info->surf, > info->view.base_level, > > > + info->view.base_array_layer, > > > + &x_offset_sa, &y_offset_sa); > > > + > > > + isl_tiling_get_intratile_offset_sa(&brw->isl_dev, > info->surf.tiling, > > > + info->view.format, > info->surf.row_pitch, > > > + x_offset_sa, y_offset_sa, > > > + &info->bo_offset, > > > + &info->tile_x_sa, > &info->tile_y_sa); > > > + > > > + /* TODO: Once this file gets converted to C, we shouls just use > designated > > > + * initializers. > > > + */ > > > + struct isl_surf_init_info init_info = isl_surf_init_info(); > > > + > > > + init_info.dim = ISL_SURF_DIM_2D; > > > + init_info.format = ISL_FORMAT_R8_UINT; > > > + init_info.width = > > > + minify(info->surf.logical_level0_px.width, > info->view.base_level); > > > + init_info.height = > > > + minify(info->surf.logical_level0_px.height, > info->view.base_level); > > > + init_info.depth = 1; > > > + init_info.levels = 1; > > > + init_info.array_len = 1; > > > + init_info.samples = info->surf.samples; > > > + init_info.min_pitch = info->surf.row_pitch; > > > + init_info.usage = info->surf.usage; > > > + init_info.tiling_flags = 1 << info->surf.tiling; > > > + > > > + isl_surf_init_s(&brw->isl_dev, &info->surf, &init_info); > > > + assert(info->surf.row_pitch == init_info.min_pitch); > > > + > > > + /* The view is also different now. */ > > > + info->view.base_level = 0; > > > + info->view.levels = 1; > > > + info->view.base_array_layer = 0; > > > + info->view.array_len = 1; > > > +} > > > + > > > +static void > > > +surf_fake_interleaved_msaa(struct brw_context *brw, > > > + struct brw_blorp_surface_info *info) > > > +{ > > > + assert(info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); > > > + > > > + /* First, we need to convert it to a simple 1-level 1-layer 2-D > surface */ > > > + surf_convert_to_single_slice(brw, info); > > > + > > > + info->surf.logical_level0_px = info->surf.phys_level0_sa; > > > + info->surf.samples = 1; > > > + info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE; > > > +} > > > + > > > +static void > > > +surf_retile_w_to_y(struct brw_context *brw, > > > + struct brw_blorp_surface_info *info) > > > +{ > > > + assert(info->surf.tiling == ISL_TILING_W); > > > + > > > + /* First, we need to convert it to a simple 1-level 1-layer 2-D > surface */ > > > + surf_convert_to_single_slice(brw, info); > > > + > > > + /* On gen7+, we don't have interleaved multisampling for color > render > > > + * targets so we have to fake it. > > > + * > > > + * TODO: Are we sure we don't also need to fake it on gen6? > > > + */ > > > + if (brw->gen > 6 && info->surf.msaa_layout == > ISL_MSAA_LAYOUT_INTERLEAVED) { > > > + info->surf.logical_level0_px = info->surf.phys_level0_sa; > > > + info->surf.samples = 1; > > > + info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE; > > > + } > > > + > > > + if (brw->gen == 6) { > > > + /* Gen6 stencil buffers have a very large alignment coming > in from the > > > + * miptree. It's out-of-bounds for what the surface state > can handle. > > > + * Since we have a single layer and level, it doesn't really > matter as > > > + * long as we don't pass a bogus value into > isl_surf_fill_state(). > > > + */ > > > + info->surf.image_alignment_el = isl_extent3d(4, 2, 1); > > > + } > > > + > > > + /* Now that we've converted everything to a simple 2-D surface > with only > > > + * one miplevel, we can go about retiling it. > > > + */ > > > + const unsigned x_align = 8, y_align = info->surf.samples != 0 ? > 8 : 4; > > > + info->surf.tiling = ISL_TILING_Y0; > > > + info->surf.logical_level0_px.width = > > > + ALIGN(info->surf.logical_level0_px.width, x_align) * 2; > > > + info->surf.logical_level0_px.height = > > > + ALIGN(info->surf.logical_level0_px.height, y_align) / 2; > > > + info->tile_x_sa *= 2; > > > + info->tile_y_sa /= 2; > > > +} > > > + > > > /** > > > * Note: if the src (or dst) is a 2D multisample array texture on > Gen7+ using > > > * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer > (dst_layer) is > > > @@ -1782,7 +1891,10 @@ brw_blorp_blit_miptrees(struct brw_context > *brw, > > > /* For some texture types, we need to pass the layer through > the sampler. */ > > > params.wm_inputs.src_z = params.src.z_offset; > > > > > > - if (brw->gen > 6 && dst_mt->msaa_layout == > INTEL_MSAA_LAYOUT_IMS) { > > > + if (brw->gen > 6 && > > > + params.dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) > { > > > + assert(params.dst.surf.samples > 1); > > > + > > > /* We must expand the rectangle we send through the > rendering pipeline, > > > * to account for the fact that we are mapping the > destination region as > > > * single-sampled when it is in fact multisampled. We must > also align > > > @@ -1795,71 +1907,41 @@ brw_blorp_blit_miptrees(struct brw_context > *brw, > > > * If it's UMS, then we have no choice but to set up the > rendering > > > * pipeline as multisampled. > > > */ > > > - assert(params.dst.surf.msaa_layout = > ISL_MSAA_LAYOUT_INTERLEAVED); > > > switch (params.dst.surf.samples) { > > > case 2: > > > params.x0 = ROUND_DOWN_TO(params.x0 * 2, 4); > > > params.y0 = ROUND_DOWN_TO(params.y0, 4); > > > params.x1 = ALIGN(params.x1 * 2, 4); > > > params.y1 = ALIGN(params.y1, 4); > > > - params.dst.surf.logical_level0_px.width *= 2; > > > break; > > > case 4: > > > params.x0 = ROUND_DOWN_TO(params.x0 * 2, 4); > > > params.y0 = ROUND_DOWN_TO(params.y0 * 2, 4); > > > params.x1 = ALIGN(params.x1 * 2, 4); > > > params.y1 = ALIGN(params.y1 * 2, 4); > > > - params.dst.surf.logical_level0_px.width *= 2; > > > - params.dst.surf.logical_level0_px.height *= 2; > > > break; > > > case 8: > > > params.x0 = ROUND_DOWN_TO(params.x0 * 4, 8); > > > params.y0 = ROUND_DOWN_TO(params.y0 * 2, 4); > > > params.x1 = ALIGN(params.x1 * 4, 8); > > > params.y1 = ALIGN(params.y1 * 2, 4); > > > - params.dst.surf.logical_level0_px.width *= 4; > > > - params.dst.surf.logical_level0_px.height *= 2; > > > break; > > > case 16: > > > params.x0 = ROUND_DOWN_TO(params.x0 * 4, 8); > > > params.y0 = ROUND_DOWN_TO(params.y0 * 4, 8); > > > params.x1 = ALIGN(params.x1 * 4, 8); > > > params.y1 = ALIGN(params.y1 * 4, 8); > > > - params.dst.surf.logical_level0_px.width *= 4; > > > - params.dst.surf.logical_level0_px.height *= 4; > > > break; > > > default: > > > unreachable("Unrecognized sample count in > brw_blorp_blit_params ctor"); > > > } > > > > > > - /* Gen7's rendering hardware only supports the IMS layout > for depth and > > > - * stencil render targets. Blorp always maps its > destination surface as > > > - * a color render target (even if it's actually a depth or > stencil > > > - * buffer). So if the destination is IMS, we'll have to map > it as a > > > - * single-sampled texture and interleave the samples > ourselves. > > > - */ > > > - params.dst.surf.samples = 1; > > > - params.dst.surf.msaa_layout = ISL_MSAA_LAYOUT_NONE; > > > + surf_fake_interleaved_msaa(brw, ¶ms.dst); > > > > > > wm_prog_key.use_kill = true; > > > } > > > > > > if (params.dst.surf.tiling == ISL_TILING_W) { > > > - /* We need to fake W-tiling with Y-tiling */ > > > - params.dst.surf.tiling = ISL_TILING_Y0; > > > - > > > - wm_prog_key.dst_tiled_w = true; > > > > This is just moved further down, right? > > I believe so yes > > > > - > > > - if (params.dst.surf.samples > 1) { > > > - /* If the destination surface is a W-tiled multisampled > stencil > > > - * buffer that we're mapping as Y tiled, then we need to > arrange for > > > - * the WM program to run once per sample rather than once > per pixel, > > > - * because the memory layout of related samples doesn't > match between > > > - * W and Y tiling. > > > - */ > > > - wm_prog_key.persample_msaa_dispatch = true; > > > - } > > > - > > > /* We must modify the rectangle we send through the > rendering pipeline > > > * (and the size and x/y offset of the destination surface), > to account > > > * for the fact that we are mapping it as Y-tiled when it is > in fact > > > @@ -1911,39 +1993,36 @@ brw_blorp_blit_miptrees(struct brw_context > *brw, > > > params.y0 = ROUND_DOWN_TO(params.y0, y_align) / 2; > > > params.x1 = ALIGN(params.x1, x_align) * 2; > > > params.y1 = ALIGN(params.y1, y_align) / 2; > > > - params.dst.surf.logical_level0_px.width = > > > - ALIGN(params.dst.surf.logical_level0_px.width, x_align) * > 2; > > > - params.dst.surf.logical_level0_px.height = > > > - ALIGN(params.dst.surf.logical_level0_px.height, y_align) > / 2; > > > - params.dst.tile_x_sa *= 2; > > > - params.dst.tile_y_sa /= 2; > > > + > > > + /* Retile the surface to Y-tiled */ > > > + surf_retile_w_to_y(brw, ¶ms.dst); > > > + > > > + wm_prog_key.dst_tiled_w = true; > > > wm_prog_key.use_kill = true; > > > + > > > + if (params.dst.surf.samples > 1) { > > > + /* If the destination surface is a W-tiled multisampled > stencil > > > + * buffer that we're mapping as Y tiled, then we need to > arrange for > > > + * the WM program to run once per sample rather than once > per pixel, > > > + * because the memory layout of related samples doesn't > match between > > > + * W and Y tiling. > > > + */ > > > + wm_prog_key.persample_msaa_dispatch = true; > > > + } > > > } > > > > > > if (brw->gen < 8 && params.src.surf.tiling == ISL_TILING_W) { > > > /* On Haswell and earlier, we have to fake W-tiled sources > as Y-tiled. > > > * Broadwell adds support for sampling from stencil. > > > - */ > > > - params.src.surf.tiling = ISL_TILING_Y0; > > > - > > > - wm_prog_key.src_tiled_w = true; > > > > Same as this? > > Yes > > > > - > > > - /* We must modify the size and x/y offset of the source > surface to > > > - * account for the fact that we are mapping it as Y-tiled > when it is in > > > - * fact W tiled. > > > * > > > * See the comments above concerning x/y offset alignment > for the > > > * destination surface. > > > * > > > * TODO: what if this makes the texture size too large? > > > */ > > > - const unsigned x_align = 8, y_align = > params.src.surf.samples != 0 ? 8 : 4; > > > - params.src.surf.logical_level0_px.width = > > > - ALIGN(params.src.surf.logical_level0_px.width, x_align) * > 2; > > > - params.src.surf.logical_level0_px.height = > > > - ALIGN(params.src.surf.logical_level0_px.height, y_align) > / 2; > > > - params.src.tile_x_sa *= 2; > > > - params.src.tile_y_sa /= 2; > > > + surf_retile_w_to_y(brw, ¶ms.src); > > > + > > > + wm_prog_key.src_tiled_w = true; > > > } > > > > > > /* tex_samples and rt_samples are the sample counts that are > set up in > > > -- > > > 2.5.0.400.gff86faf > > > > > > _______________________________________________ > > > mesa-dev mailing list > > > [2]mesa-dev@lists.freedesktop.org > > > [3]https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > References > > 1. mailto:topi.pohjolai...@intel.com > 2. mailto:mesa-dev@lists.freedesktop.org > 3. https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev