On 22 May 2012 08:19, Kenneth Graunke <kenn...@whitecape.org> wrote: > On 05/11/2012 11:03 AM, Paul Berry wrote: > >> Starting in Gen7, there are two possible layouts for MSAA surfaces: >> >> - Interleaved, in which additional samples are accommodated by scaling >> up the width and height of the surface. This is the only layout >> available in Gen6. On Gen7 it is used for depth and stencil >> surfaces only. >> >> - Sliced, in which the surface is stored as a 2D array, with array >> slice n containing all pixel data for sample n. On Gen7 this layout >> is used for color surfaces. >> >> The "Sliced" layout has an additional requirement: it must be used in >> ARYSPC_LOD0 mode, which means that the surface doesn't leave any extra >> room between array slices for miplevels other than 0. >> >> This patch modifies the surface allocation functions to use the >> correct layout when allocating MSAA surfaces in Gen7, and to set the >> array offsets properly when using ARYSPC_LOD0 mode. It also modifies >> the code that populates SURFACE_STATE structures to ensure that >> ARYSPC_LOD0 mode is selected in the appropriate circumstances. >> --- >> src/mesa/drivers/dri/i965/brw_**blorp.cpp | 1 + >> src/mesa/drivers/dri/i965/brw_**blorp.h | 5 + >> src/mesa/drivers/dri/i965/brw_**tex_layout.c | 10 ++- >> src/mesa/drivers/dri/i965/brw_**wm_surface_state.c | 3 +- >> src/mesa/drivers/dri/i965/**gen7_blorp.cpp | 2 + >> src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c | 10 ++ >> src/mesa/drivers/dri/intel/**intel_mipmap_tree.c | 102 >> ++++++++++++++++----- >> src/mesa/drivers/dri/intel/**intel_mipmap_tree.h | 23 +++++- >> src/mesa/drivers/dri/intel/**intel_tex_image.c | 3 +- >> src/mesa/drivers/dri/intel/**intel_tex_validate.c | 3 +- >> 10 files changed, 134 insertions(+), 28 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> b/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> index f6aff44..84f92b4 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> @@ -58,6 +58,7 @@ brw_blorp_surface_info::set(**struct intel_mipmap_tree >> *mt, >> { >> brw_blorp_mip_info::set(mt, level, layer); >> this->num_samples = mt->num_samples; >> + this->array_spacing_lod0 = mt->array_spacing_lod0; >> >> if (mt->format == MESA_FORMAT_S8) { >> /* The miptree is a W-tiled stencil buffer. Surface states can't >> be set >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.h >> b/src/mesa/drivers/dri/i965/**brw_blorp.h >> index 8dabf2c..3e37081 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp.h >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.h >> @@ -99,6 +99,11 @@ public: >> bool map_stencil_as_y_tiled; >> >> unsigned num_samples; >> + >> + /* Setting this flag indicates that the surface should be set up in >> + * ARYSPC_LOD0 mode. Ignored prior to Gen7. >> + */ >> + bool array_spacing_lod0; >> }; >> >> >> diff --git a/src/mesa/drivers/dri/i965/**brw_tex_layout.c >> b/src/mesa/drivers/dri/i965/**brw_tex_layout.c >> index 8bf1d3d..f742131 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_tex_layout.c >> +++ b/src/mesa/drivers/dri/i965/**brw_tex_layout.c >> @@ -49,7 +49,10 @@ brw_miptree_layout_texture_**array(struct >> intel_context *intel, >> >> h0 = ALIGN(mt->height0, mt->align_h); >> h1 = ALIGN(minify(mt->height0), mt->align_h); >> - qpitch = (h0 + h1 + (intel->gen>= 7 ? 12 : 11) * mt->align_h); >> + if (mt->array_spacing_lod0) >> + qpitch = h0; >> + else >> + qpitch = (h0 + h1 + (intel->gen>= 7 ? 12 : 11) * mt->align_h); >> if (mt->compressed) >> qpitch /= 4; >> >> @@ -165,7 +168,10 @@ brw_miptree_layout(struct intel_context *intel, >> struct intel_mipmap_tree *mt) >> break; >> >> default: >> - i945_miptree_layout_2d(mt); >> + if (mt->num_samples> 0&& !mt->msaa_is_interleaved) >> >> + brw_miptree_layout_texture_**array(intel, mt); >> + else >> + i945_miptree_layout_2d(mt); >> break; >> } >> DBG("%s: %dx%dx%d\n", __FUNCTION__, >> diff --git a/src/mesa/drivers/dri/i965/**brw_wm_surface_state.c >> b/src/mesa/drivers/dri/i965/**brw_wm_surface_state.c >> index 849da85..6e745cf 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_wm_surface_state.c >> +++ b/src/mesa/drivers/dri/i965/**brw_wm_surface_state.c >> @@ -955,7 +955,8 @@ brw_update_renderbuffer_**surface(struct brw_context >> *brw, >> intel_image->base.Base.Level, >> width, height, depth, >> true, >> - 0 /* num_samples */); >> + 0 /* num_samples */, >> + false /* msaa_is_interleaved */); >> >> intel_miptree_copy_teximage(**intel, intel_image, new_mt); >> intel_miptree_reference(&irb->**mt, intel_image->mt); >> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> index 3775eec..dfc2272 100644 >> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> @@ -149,6 +149,8 @@ gen7_blorp_emit_surface_state(**struct brw_context >> *brw, >> >> surf->ss0.surface_format = format; >> surf->ss0.surface_type = BRW_SURFACE_2D; >> + surf->ss0.surface_array_**spacing = surface->array_spacing_lod0 ? >> + GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL; >> >> /* reloc */ >> surf->ss1.base_addr = region->bo->offset; /* No tile offsets needed */ >> diff --git a/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> b/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> index 5aa62bd..9a01ddf 100644 >> --- a/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> +++ b/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> @@ -142,6 +142,10 @@ gen7_update_texture_surface(**struct gl_context >> *ctx, GLuint unit) >> return; >> } >> >> + /* We don't support MSAA for textures. */ >> + assert(!mt->array_spacing_**lod0); >> + assert(mt->num_samples == 0); >> + >> intel_miptree_get_dimensions_**for_image(firstImage,&width,&** >> height,&depth); >> >> surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, >> @@ -296,6 +300,9 @@ gen7_update_renderbuffer_**surface(struct >> brw_context *brw, >> sizeof(*surf), 32,&brw->wm.surf_offset[unit])**; >> >> memset(surf, 0, sizeof(*surf)); >> >> + /* Render targets can't use MSAA interleaved layout */ >> + assert(!irb->mt->msaa_is_**interleaved); >> + >> if (irb->mt->align_h == 4) >> surf->ss0.vertical_alignment = 1; >> if (irb->mt->align_w == 8) >> @@ -324,6 +331,9 @@ gen7_update_renderbuffer_**surface(struct >> brw_context *brw, >> } >> >> surf->ss0.surface_type = BRW_SURFACE_2D; >> + surf->ss0.surface_array_**spacing = irb->mt->array_spacing_lod0 ? >> + GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL; >> + >> /* reloc */ >> surf->ss1.base_addr = intel_renderbuffer_tile_** >> offsets(irb,&tile_x,&tile_y); >> surf->ss1.base_addr += region->bo->offset; /* reloc */ >> diff --git a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> index 36d7b77..4132351 100644 >> --- a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> +++ b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> @@ -73,7 +73,8 @@ intel_miptree_create_internal(**struct intel_context >> *intel, >> GLuint height0, >> GLuint depth0, >> bool for_region, >> - GLuint num_samples) >> + GLuint num_samples, >> + bool msaa_is_interleaved) >> { >> struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); >> int compress_byte = 0; >> @@ -95,8 +96,14 @@ intel_miptree_create_internal(**struct intel_context >> *intel, >> mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt-> >> **format); >> mt->num_samples = num_samples; >> mt->compressed = compress_byte ? 1 : 0; >> + mt->msaa_is_interleaved = msaa_is_interleaved; >> mt->refcount = 1; >> >> + /* array_spacing_lod0 is only used for non-interleaved MSAA surfaces. >> + * TODO: can we use it elsewhere? >> + */ >> + mt->array_spacing_lod0 = num_samples> 0&& !msaa_is_interleaved; >> >> + >> if (target == GL_TEXTURE_CUBE_MAP) { >> assert(depth0 == 1); >> mt->depth0 = 6; >> @@ -109,6 +116,8 @@ intel_miptree_create_internal(**struct intel_context >> *intel, >> (intel->must_use_separate_**stencil || >> (intel->has_separate_stencil&& >> intel->vtbl.is_hiz_depth_**format(intel, format)))) { >> + /* MSAA stencil surfaces are always interleaved. */ >> + bool msaa_is_interleaved = num_samples> 0; >> mt->stencil_mt = intel_miptree_create(intel, >> mt->target, >> MESA_FORMAT_S8, >> @@ -118,7 +127,8 @@ intel_miptree_create_internal(**struct intel_context >> *intel, >> mt->height0, >> mt->depth0, >> true, >> - num_samples); >> + num_samples, >> + msaa_is_interleaved); >> if (!mt->stencil_mt) { >> intel_miptree_release(&mt); >> return NULL; >> @@ -165,7 +175,8 @@ intel_miptree_create(struct intel_context *intel, >> GLuint height0, >> GLuint depth0, >> bool expect_accelerated_upload, >> - GLuint num_samples) >> + GLuint num_samples, >> + bool msaa_is_interleaved) >> { >> struct intel_mipmap_tree *mt; >> uint32_t tiling = I915_TILING_NONE; >> @@ -207,7 +218,7 @@ intel_miptree_create(struct intel_context *intel, >> mt = intel_miptree_create_internal(**intel, target, format, >> first_level, last_level, width0, >> height0, depth0, >> - false, num_samples); >> + false, num_samples, >> msaa_is_interleaved); >> /* >> * pitch == 0 || height == 0 indicates the null texture >> */ >> @@ -243,7 +254,8 @@ intel_miptree_create_for_**region(struct >> intel_context *intel, >> mt = intel_miptree_create_internal(**intel, target, format, >> 0, 0, >> region->width, region->height, 1, >> - true, 0 /* num_samples */); >> + true, 0 /* num_samples */, >> + false /* msaa_is_interleaved */); >> if (!mt) >> return mt; >> >> @@ -252,6 +264,31 @@ intel_miptree_create_for_**region(struct >> intel_context *intel, >> return mt; >> } >> >> +/** >> + * Determine whether the MSAA surface being created should use an >> interleaved >> + * layout or a sliced layout, based on the chip generation and the >> surface >> + * type. >> + */ >> +static bool >> +msaa_format_is_interleaved(**struct intel_context *intel, gl_format >> format) >> +{ >> + /* Prior to Gen7, all surfaces used interleaved layout. */ >> + if (intel->gen< 7) >> + return true; >> + >> + /* In Gen7, interleaved layout is only used for depth and stencil >> + * buffers. >> + */ >> + switch (_mesa_get_format_base_format(**format)) { >> + case GL_DEPTH_COMPONENT: >> + case GL_STENCIL_INDEX: >> + case GL_DEPTH_STENCIL: >> + return true; >> + default: >> + return false; >> + } >> +} >> + >> struct intel_mipmap_tree* >> intel_miptree_create_for_**renderbuffer(struct intel_context *intel, >> gl_format format, >> @@ -260,25 +297,43 @@ intel_miptree_create_for_**renderbuffer(struct >> intel_context *intel, >> uint32_t num_samples) >> { >> struct intel_mipmap_tree *mt; >> - >> - /* Adjust width/height for MSAA. Note: since samples are interleaved >> in a >> - * complex pattern that repeats for every 2x2 block of pixels, we >> need to >> - * expand the width and height to even numbers before the width/height >> - * adjustment, otherwise some of the samples in the last row and >> column >> - * will fall outside the boundary of the texture. >> - */ >> - if (num_samples> 4) { >> - num_samples = 8; >> - width = ALIGN(width, 2) * 4; >> - height = ALIGN(height, 2) * 2; >> - } else if (num_samples> 0) { >> - num_samples = 4; >> - width = ALIGN(width, 2) * 2; >> - height = ALIGN(height, 2) * 2; >> + uint32_t depth = 1; >> + bool msaa_is_interleaved = false; >> + >> + if (num_samples> 0) { >> + /* Adjust width/height/depth for MSAA */ >> + msaa_is_interleaved = msaa_format_is_interleaved(**intel, format); >> + if (msaa_is_interleaved) { >> + /* Note: since samples are interleaved in a complex pattern that >> + * repeats for every 2x2 block of pixels, we need to expand the >> width >> + * and height to even numbers before the width/height >> adjustment, >> + * otherwise some of the samples in the last row and column >> will fall >> + * outside the boundary of the texture. >> + */ >> + switch (num_samples) { >> + case 4: >> + width = ALIGN(width, 2) * 2; >> + height = ALIGN(height, 2) * 2; >> + break; >> + case 8: >> + width = ALIGN(width, 2) * 4; >> + height = ALIGN(height, 2) * 2; >> + break; >> + default: >> + /* num_samples should already have been quantized to 0, 4, or >> + * 8. >> + */ >> + assert(false); >> + } >> + } else { >> + /* Non-interleaved */ >> + depth = num_samples; >> + } >> } >> >> mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0, >> - width, height, 1, true, num_samples); >> + width, height, depth, true, num_samples, >> + msaa_is_interleaved); >> >> return mt; >> } >> @@ -552,6 +607,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel, >> GLuint num_samples) >> { >> assert(mt->hiz_mt == NULL); >> + /* MSAA HiZ surfaces are always interleaved. */ >> + bool msaa_is_interleaved = num_samples> 0; >> mt->hiz_mt = intel_miptree_create(intel, >> mt->target, >> MESA_FORMAT_X8_Z24, >> @@ -561,7 +618,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel, >> mt->height0, >> mt->depth0, >> true, >> - num_samples); >> + num_samples, >> + msaa_is_interleaved); >> >> if (!mt->hiz_mt) >> return false; >> diff --git a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> index ca1666d..3883d2b 100644 >> --- a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> +++ b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> @@ -172,6 +172,26 @@ struct intel_mipmap_tree >> GLuint num_samples; >> bool compressed; >> >> + /** >> + * For 1D array, 2D array, cube, and 2D multisampled surfaces on >> Gen7: true >> + * if the surface only contains LOD 0, and hence no space is for LOD's >> + * other than 0 in between array slices. >> + * >> + * Corresponds to the surface_array_spacing bit in gen7_surface_state. >> + */ >> + bool array_spacing_lod0; >> > > It seems like we ought to use ARYSPC_LOD0 mode for all non-mipmapped > renderbuffers and textures...it should save memory. If we did that, I > think we could probably just use mt->first_level != mt->last_level rather > than adding a new flag. > > Then again, I haven't thought through all the ramifications of that: I > know there's some period of time where the user has defined LOD0, and > you're not sure whether they're going to defined LOD1, 2, 3 too or just not > do mipmapping. It would suck to allocate something in > ARYSPC_LOD0/non-mipmap-capable mode and then have to redo it. But maybe we > have to redo it already? >
I think you're right that we have to redo it anyhow, because even without using ARYSPC_LOD0, if we speculatively allocate a texture that just has LOD0, then we haven't reserved enough memory for the other mip levels of the last array slice. Your suggestion seems reasonable. The only thing I can think of that might get in the way is if there are restrictions in the bspec requiring us to disable ARYSPC_LOD0 at certain times. I'll try to dig through the bspec today and check that. > > Not terribly familiar with this code. > > Other than that suggestion, your patch seems fine. Assuming there's some > reason not to do that, then this patch is: > > Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> > > + /** >> + * For MSAA buffers, there are two possible layouts: >> + * - Interleaved, in which the additional samples are accommodated >> + * by scaling up the width and height of the surface. >> + * - Sliced, in which the surface is stored as a 2D array, with >> + * array slice n containing all pixel data for sample n. >> + * >> + * This value is true if num_samples> 0 and the format is >> interleaved. >> + */ >> + bool msaa_is_interleaved; >> + >> /* Derived from the above: >> */ >> GLuint total_width; >> @@ -233,7 +253,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct >> intel_context *intel, >> GLuint height0, >> GLuint depth0, >> bool >> expect_accelerated_upload, >> - GLuint num_samples); >> + GLuint num_samples, >> + bool msaa_is_interleaved); >> >> struct intel_mipmap_tree * >> intel_miptree_create_for_**region(struct intel_context *intel, >> diff --git a/src/mesa/drivers/dri/intel/**intel_tex_image.c >> b/src/mesa/drivers/dri/intel/**intel_tex_image.c >> index bb2217f..f5d075b 100644 >> --- a/src/mesa/drivers/dri/intel/**intel_tex_image.c >> +++ b/src/mesa/drivers/dri/intel/**intel_tex_image.c >> @@ -100,7 +100,8 @@ intel_miptree_create_for_**teximage(struct >> intel_context *intel, >> height, >> depth, >> expect_accelerated_upload, >> - 0 /* num_samples */); >> + 0 /* num_samples */, >> + false /* msaa_is_interleaved */); >> } >> >> /* There are actually quite a few combinations this will work for, >> diff --git a/src/mesa/drivers/dri/intel/**intel_tex_validate.c >> b/src/mesa/drivers/dri/intel/**intel_tex_validate.c >> index cadba29..c2d7d67 100644 >> --- a/src/mesa/drivers/dri/intel/**intel_tex_validate.c >> +++ b/src/mesa/drivers/dri/intel/**intel_tex_validate.c >> @@ -87,7 +87,8 @@ intel_finalize_mipmap_tree(**struct intel_context >> *intel, GLuint unit) >> height, >> depth, >> true, >> - 0 /* num_samples */); >> + 0 /* num_samples */, >> + false /* msaa_is_interleaved >> */); >> if (!intelObj->mt) >> return false; >> } >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev