This one is going to take a bit more testing and internal review before adopting.
> On Nov 12, 2016, at 5:00 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > > This is a bit of a mega-commit, but unfortunately there's no great way > to break this up since a lot of different pieces have to match up. Here > we do the following: > - change surface layout to match swr's Load/StoreTile expectations > - fix sampler settings to respect all sampler view parameters > - fix stencil sampling to read from secondary resource > - respect pipe surface format, level, and layer settings > - fix resource map/unmap based on the new layout logic > - fix resource map/unmap to copy proper parts of stencil values in and > out of the matching depth texture > > These fix a massive quantity of piglits, including all the > tex-miplevel-selection ones. > > Note that the swr native miptree layout isn't extremely space-efficient, > and we end up using it for all textures, not just the renderable ones. A > back-of-the-envelope calculation suggests about 10%-25% increased memory > usage for miptrees, depending on the number of LODs. Single-LOD textures > should be unaffected. > > There are a handful of regressions as a result of this change: > - fbo-generatemipmap-formats on compressed textures with irregular > sizes fails. The 2+ levels appear as if their offsets were off by a > bit. No idea why, despite a lot of staring. I suspect the fact that > this test was passing before is pure coincidence as well. > - Some textureGrad tests, these failures match llvmpipe. (There are > debug settings allowing improved gallivm sampling accurancy.) > - Some layered clearing tests as swr doesn't currently support that. It > was getting lucky before because enough other things were broken. > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > --- > src/gallium/drivers/swr/swr_context.cpp | 103 ++++++++++++----- > src/gallium/drivers/swr/swr_draw.cpp | 4 +- > src/gallium/drivers/swr/swr_resource.h | 8 +- > src/gallium/drivers/swr/swr_screen.cpp | 188 +++++++++++++++++++++----------- > src/gallium/drivers/swr/swr_shader.cpp | 28 ++++- > src/gallium/drivers/swr/swr_state.cpp | 166 +++++++++++++++++----------- > 6 files changed, 337 insertions(+), 160 deletions(-) > > diff --git a/src/gallium/drivers/swr/swr_context.cpp > b/src/gallium/drivers/swr/swr_context.cpp > index 6bc6de4..fc8e74a 100644 > --- a/src/gallium/drivers/swr/swr_context.cpp > +++ b/src/gallium/drivers/swr/swr_context.cpp > @@ -139,21 +139,35 @@ swr_transfer_map(struct pipe_context *pipe, > if (!pt) > return NULL; > pipe_resource_reference(&pt->resource, resource); > + pt->usage = (pipe_transfer_usage)usage; > pt->level = level; > pt->box = *box; > - pt->stride = spr->row_stride[level]; > - pt->layer_stride = spr->img_stride[level]; > - > - /* if we're mapping the depth/stencil, copy in stencil */ > - if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT > - && spr->has_stencil) { > - for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { > - spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; > - } > - } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT > - && spr->has_stencil) { > - for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { > - spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i]; > + pt->stride = spr->swr.pitch; > + pt->layer_stride = spr->swr.qpitch * spr->swr.pitch; > + > + /* if we're mapping the depth/stencil, copy in stencil for the section > + * being read in > + */ > + if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) { > + size_t zbase, sbase; > + for (int z = box->z; z < box->z + box->depth; z++) { > + zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch + > + spr->mip_offsets[level]; > + sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch > + > + spr->secondary_mip_offsets[level]; > + for (int y = box->y; y < box->y + box->height; y++) { > + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { > + for (int x = box->x; x < box->x + box->width; x++) > + spr->swr.pBaseAddress[zbase + 4 * x + 3] = > + spr->secondary.pBaseAddress[sbase + x]; > + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) > { > + for (int x = box->x; x < box->x + box->width; x++) > + spr->swr.pBaseAddress[zbase + 8 * x + 4] = > + spr->secondary.pBaseAddress[sbase + x]; > + } > + zbase += spr->swr.pitch; > + sbase += spr->secondary.pitch; > + } > } > } > > @@ -167,23 +181,60 @@ swr_transfer_map(struct pipe_context *pipe, > } > > static void > -swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) > +swr_transfer_flush_region(struct pipe_context *pipe, > + struct pipe_transfer *transfer, > + const struct pipe_box *flush_box) > { > assert(transfer->resource); > + assert(transfer->usage & PIPE_TRANSFER_WRITE); > > - struct swr_resource *res = swr_resource(transfer->resource); > - /* if we're mapping the depth/stencil, copy out stencil */ > - if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT > - && res->has_stencil) { > - for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { > - res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3]; > - } > - } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT > - && res->has_stencil) { > - for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { > - res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4]; > + struct swr_resource *spr = swr_resource(transfer->resource); > + if (!spr->has_depth || !spr->has_stencil) > + return; > + > + size_t zbase, sbase; > + struct pipe_box box = *flush_box; > + box.x += transfer->box.x; > + box.y += transfer->box.y; > + box.z += transfer->box.z; > + for (int z = box.z; z < box.z + box.depth; z++) { > + zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch + > + spr->mip_offsets[transfer->level]; > + sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch + > + spr->secondary_mip_offsets[transfer->level]; > + for (int y = box.y; y < box.y + box.height; y++) { > + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { > + for (int x = box.x; x < box.x + box.width; x++) > + spr->secondary.pBaseAddress[sbase + x] = > + spr->swr.pBaseAddress[zbase + 4 * x + 3]; > + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { > + for (int x = box.x; x < box.x + box.width; x++) > + spr->secondary.pBaseAddress[sbase + x] = > + spr->swr.pBaseAddress[zbase + 8 * x + 4]; > + } > + zbase += spr->swr.pitch; > + sbase += spr->secondary.pitch; > } > } > +} > + > +static void > +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) > +{ > + assert(transfer->resource); > + > + struct swr_resource *spr = swr_resource(transfer->resource); > + /* if we're mapping the depth/stencil, copy in stencil for the section > + * being written out > + */ > + if (transfer->usage & PIPE_TRANSFER_WRITE && > + !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && > + spr->has_depth && spr->has_stencil) { > + struct pipe_box box; > + u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, > + transfer->box.depth, &box); > + swr_transfer_flush_region(pipe, transfer, &box); > + } > > pipe_resource_reference(&transfer->resource, NULL); > FREE(transfer); > @@ -425,8 +476,8 @@ swr_create_context(struct pipe_screen *p_screen, void > *priv, unsigned flags) > ctx->pipe.surface_destroy = swr_surface_destroy; > ctx->pipe.transfer_map = swr_transfer_map; > ctx->pipe.transfer_unmap = swr_transfer_unmap; > + ctx->pipe.transfer_flush_region = swr_transfer_flush_region; > > - ctx->pipe.transfer_flush_region = u_default_transfer_flush_region; > ctx->pipe.buffer_subdata = u_default_buffer_subdata; > ctx->pipe.texture_subdata = u_default_texture_subdata; > > diff --git a/src/gallium/drivers/swr/swr_draw.cpp > b/src/gallium/drivers/swr/swr_draw.cpp > index 39378e6..ba10bd5 100644 > --- a/src/gallium/drivers/swr/swr_draw.cpp > +++ b/src/gallium/drivers/swr/swr_draw.cpp > @@ -282,7 +282,9 @@ swr_store_dirty_resource(struct pipe_context *pipe, > swr_draw_context *pDC = &ctx->swrDC; > SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; > for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) > - if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) { > + if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress || > + (spr->secondary.pBaseAddress && > + renderTargets[i].pBaseAddress == spr->secondary.pBaseAddress)) > { > swr_store_render_target(pipe, i, post_tile_state); > > /* Mesa thinks depth/stencil are fused, so we'll never get an > diff --git a/src/gallium/drivers/swr/swr_resource.h > b/src/gallium/drivers/swr/swr_resource.h > index 00001e9..41abd77 100644 > --- a/src/gallium/drivers/swr/swr_resource.h > +++ b/src/gallium/drivers/swr/swr_resource.h > @@ -41,17 +41,13 @@ struct swr_resource { > bool has_depth; > bool has_stencil; > > - UINT alignedWidth; > - UINT alignedHeight; > - > SWR_SURFACE_STATE swr; > SWR_SURFACE_STATE secondary; /* for faking depth/stencil merged formats */ > > struct sw_displaytarget *display_target; > > - unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS]; > - unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS]; > - unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; > + size_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; > + size_t secondary_mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; > > enum swr_resource_status status; > }; > diff --git a/src/gallium/drivers/swr/swr_screen.cpp > b/src/gallium/drivers/swr/swr_screen.cpp > index accd6a2..73deb03 100644 > --- a/src/gallium/drivers/swr/swr_screen.cpp > +++ b/src/gallium/drivers/swr/swr_screen.cpp > @@ -44,6 +44,8 @@ extern "C" { > > #include "jit_api.h" > > +#include "memory/TilingFunctions.h" > + > #include <stdio.h> > #include <map> > > @@ -721,12 +723,14 @@ swr_displaytarget_layout(struct swr_screen *screen, > struct swr_resource *res) > struct sw_winsys *winsys = screen->winsys; > struct sw_displaytarget *dt; > > + const unsigned width = align(res->swr.width, res->swr.halign); > + const unsigned height = align(res->swr.height, res->swr.valign); > + > UINT stride; > dt = winsys->displaytarget_create(winsys, > res->base.bind, > res->base.format, > - res->alignedWidth, > - res->alignedHeight, > + width, height, > 64, NULL, > &stride); > > @@ -740,14 +744,14 @@ swr_displaytarget_layout(struct swr_screen *screen, > struct swr_resource *res) > > /* Clear the display target surface */ > if (map) > - memset(map, 0, res->alignedHeight * stride); > + memset(map, 0, height * stride); > > winsys->displaytarget_unmap(winsys, dt); > > return TRUE; > } > > -static boolean > +static bool > swr_texture_layout(struct swr_screen *screen, > struct swr_resource *res, > boolean allocate) > @@ -763,87 +767,149 @@ swr_texture_layout(struct swr_screen *screen, > if (res->has_stencil && !res->has_depth) > fmt = PIPE_FORMAT_R8_UINT; > > + /* We always use the SWR layout. For 2D and 3D textures this looks like: > + * > + * |<------- pitch ------->| > + * +=======================+------- > + * |Array 0 | ^ > + * | | | > + * | Level 0 | | > + * | | | > + * | | qpitch > + * +-----------+-----------+ | > + * | | L2L2L2L2 | | > + * | Level 1 | L3L3 | | > + * | | L4 | v > + * +===========+===========+------- > + * |Array 1 | > + * | | > + * | Level 0 | > + * | | > + * | | > + * +-----------+-----------+ > + * | | L2L2L2L2 | > + * | Level 1 | L3L3 | > + * | | L4 | > + * +===========+===========+ > + * > + * The overall width in bytes is known as the pitch, while the overall > + * height in rows is the qpitch. Array slices are laid out logically below > + * one another, qpitch rows apart. For 3D surfaces, the "level" values are > + * just invalid for the higher array numbers (since depth is also > + * minified). 1D and 1D array surfaces are stored effectively the same > way, > + * except that pitch never plays into it. All the levels are logically > + * adjacent to each other on the X axis. > + * > + * Each level's sizes are subject to the valign and halign settings of the > + * surface. For compressed formats that swr is unaware of, we will use an > + * appropriately-sized uncompressed format, and scale the widths/heights. > + * > + * This surface is stored inside res->swr. For depth/stencil textures, > + * res->secondary will have an identically-laid-out but R8_UINT-formatted > + * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp > + * texels, to simplify map/unmap logic which copies the stencil values > + * in/out. > + */ > + > res->swr.width = pt->width0; > res->swr.height = pt->height0; > - res->swr.depth = pt->depth0; > res->swr.type = swr_convert_target_type(pt->target); > res->swr.tileMode = SWR_TILE_NONE; > res->swr.format = mesa_to_swr_format(fmt); > - res->swr.numSamples = (1 << pt->nr_samples); > + res->swr.numSamples = std::max(1u, pt->nr_samples); > > - SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format); > - > - size_t total_size = 0; > - unsigned width = pt->width0; > - unsigned height = pt->height0; > - unsigned depth = pt->depth0; > - unsigned layers = pt->array_size; > - > - for (int level = 0; level <= pt->last_level; level++) { > - unsigned alignedWidth, alignedHeight; > - unsigned num_slices; > + if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { > + res->swr.halign = KNOB_MACROTILE_X_DIM; > + res->swr.valign = KNOB_MACROTILE_Y_DIM; > + } else { > + res->swr.halign = 1; > + res->swr.valign = 1; > + } > > - if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { > - alignedWidth = align(width, KNOB_MACROTILE_X_DIM); > - alignedHeight = align(height, KNOB_MACROTILE_Y_DIM); > - } else { > - alignedWidth = width; > - alignedHeight = height; > + // The pitch is the overall width of the texture in bytes. Most of the > time > + // this is the pitch of level 0 since all the other levels fit underneath > + // it. However in some degenerate situations, the width of level1 + level2 > + // may be larger. In that case, we use those widths. This can happen if, > + // e.g. halign is 32, and the width of level 0 is 32 or less. In that > case, > + // the aligned levels 1 and 2 will also be 32 each, adding up to 64. > + unsigned width_blocks = util_format_get_nblocksx(fmt, pt->width0); > + unsigned width = align(width_blocks, res->swr.halign); > + if (pt->last_level > 1) { > + width = std::max<uint32_t>( > + width, > + align(u_minify(width_blocks, 1), res->swr.halign) + > + align(u_minify(width_blocks, 2), res->swr.halign)); > + } > + res->swr.pitch = width * util_format_get_blocksize(fmt); > + > + > + // The qpitch is controlled by either the height of the second LOD, or the > + // combination of all the later LODs. > + unsigned height_blocks = util_format_get_nblocksy(fmt, pt->height0); > + res->swr.qpitch = align(height_blocks, res->swr.valign); > + if (pt->last_level == 1) { > + res->swr.qpitch += align(u_minify(height_blocks, 1), res->swr.valign); > + } else if (pt->last_level > 1) { > + unsigned height = u_minify(height_blocks, 1); > + unsigned level1 = align(height, res->swr.valign); > + unsigned level2 = 0; > + for (int level = 2; level <= pt->last_level; level++) { > + height = u_minify(height, 1); > + level2 += align(height, res->swr.valign); > } > + res->swr.qpitch += std::max(level1, level2); > + } > > - if (level == 0) { > - res->alignedWidth = alignedWidth; > - res->alignedHeight = alignedHeight; > + if (pt->target == PIPE_TEXTURE_3D) > + res->swr.depth = pt->depth0; > + else > + res->swr.depth = pt->array_size; > + > + // Fix up swr format if necessary so that LOD offset computation works > + if (res->swr.format == (SWR_FORMAT)-1) { > + res->swr.width = util_format_get_nblocksx(fmt, res->swr.width); > + res->swr.height = util_format_get_nblocksy(fmt, res->swr.height); > + switch (util_format_get_blocksize(fmt)) { > + default: > + unreachable("Unexpected format block size"); > + case 1: res->swr.format = R8_UINT; break; > + case 2: res->swr.format = R16_UINT; break; > + case 4: res->swr.format = R32_UINT; break; > + case 8: res->swr.format = R32G32_UINT; break; > + case 16: res->swr.format = R32G32B32A32_UINT; break; > } > + } > > - res->row_stride[level] = util_format_get_stride(fmt, alignedWidth); > - res->img_stride[level] = > - res->row_stride[level] * util_format_get_nblocksy(fmt, > alignedHeight); > - res->mip_offsets[level] = total_size; > - > - if (pt->target == PIPE_TEXTURE_3D) > - num_slices = depth; > - else if (pt->target == PIPE_TEXTURE_1D_ARRAY > - || pt->target == PIPE_TEXTURE_2D_ARRAY > - || pt->target == PIPE_TEXTURE_CUBE > - || pt->target == PIPE_TEXTURE_CUBE_ARRAY) > - num_slices = layers; > - else > - num_slices = 1; > - > - total_size += res->img_stride[level] * num_slices; > - if (total_size > SWR_MAX_TEXTURE_SIZE) > - return FALSE; > - > - width = u_minify(width, 1); > - height = u_minify(height, 1); > - depth = u_minify(depth, 1); > + for (int level = 0; level <= pt->last_level; level++) { > + res->mip_offsets[level] = > + ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->swr); > } > > - res->swr.halign = res->alignedWidth; > - res->swr.valign = res->alignedHeight; > - res->swr.pitch = res->row_stride[0]; > + size_t total_size = > + (size_t)res->swr.depth * res->swr.qpitch * res->swr.pitch; > + if (total_size > SWR_MAX_TEXTURE_SIZE) > + return false; > > if (allocate) { > res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); > > if (res->has_depth && res->has_stencil) { > - SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); > - res->secondary.width = pt->width0; > - res->secondary.height = pt->height0; > - res->secondary.depth = pt->depth0; > - res->secondary.type = SURFACE_2D; > - res->secondary.tileMode = SWR_TILE_NONE; > + res->secondary = res->swr; > res->secondary.format = R8_UINT; > - res->secondary.numSamples = (1 << pt->nr_samples); > - res->secondary.pitch = res->alignedWidth * finfo.Bpp; > + res->secondary.pitch = res->swr.pitch / > util_format_get_blocksize(fmt); > + > + for (int level = 0; level <= pt->last_level; level++) { > + res->secondary_mip_offsets[level] = > + ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, > &res->secondary); > + } > > res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( > - res->alignedHeight * res->secondary.pitch, 64); > + res->secondary.depth * res->secondary.qpitch * > + res->secondary.pitch, 64); > } > } > > - return TRUE; > + return true; > } > > static boolean > diff --git a/src/gallium/drivers/swr/swr_shader.cpp > b/src/gallium/drivers/swr/swr_shader.cpp > index 38a916e..f639df3 100644 > --- a/src/gallium/drivers/swr/swr_shader.cpp > +++ b/src/gallium/drivers/swr/swr_shader.cpp > @@ -34,6 +34,7 @@ > #include "builder.h" > > #include "tgsi/tgsi_strings.h" > +#include "util/u_format.h" > #include "gallivm/lp_bld_init.h" > #include "gallivm/lp_bld_flow.h" > #include "gallivm/lp_bld_struct.h" > @@ -41,6 +42,7 @@ > > #include "swr_context.h" > #include "swr_context_llvm.h" > +#include "swr_resource.h" > #include "swr_state.h" > #include "swr_screen.h" > > @@ -85,18 +87,36 @@ swr_generate_sampler_key(const struct lp_tgsi_info &info, > info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; > for (unsigned i = 0; i < key.nr_sampler_views; i++) { > if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { > + const struct pipe_sampler_view *view = > + ctx->sampler_views[shader_type][i]; > lp_sampler_static_texture_state( > - &key.sampler[i].texture_state, > - ctx->sampler_views[shader_type][i]); > + &key.sampler[i].texture_state, view); > + if (view) { > + struct swr_resource *swr_res = swr_resource(view->texture); > + const struct util_format_description *desc = > + util_format_description(view->format); > + if (swr_res->has_depth && swr_res->has_stencil && > + !util_format_has_depth(desc)) > + key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; > + } > } > } > } else { > key.nr_sampler_views = key.nr_samplers; > for (unsigned i = 0; i < key.nr_sampler_views; i++) { > if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { > + const struct pipe_sampler_view *view = > + ctx->sampler_views[shader_type][i]; > lp_sampler_static_texture_state( > - &key.sampler[i].texture_state, > - ctx->sampler_views[shader_type][i]); > + &key.sampler[i].texture_state, view); > + if (view) { > + struct swr_resource *swr_res = swr_resource(view->texture); > + const struct util_format_description *desc = > + util_format_description(view->format); > + if (swr_res->has_depth && swr_res->has_stencil && > + !util_format_has_depth(desc)) > + key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; > + } > } > } > } > diff --git a/src/gallium/drivers/swr/swr_state.cpp > b/src/gallium/drivers/swr/swr_state.cpp > index 783afba..2c7f3be 100644 > --- a/src/gallium/drivers/swr/swr_state.cpp > +++ b/src/gallium/drivers/swr/swr_state.cpp > @@ -701,25 +701,46 @@ swr_update_texture_state(struct swr_context *ctx, > for (unsigned i = 0; i < num_sampler_views; i++) { > struct pipe_sampler_view *view = > ctx->sampler_views[shader_type][i]; > + struct swr_jit_texture *jit_tex = &textures[i]; > > + memset(jit_tex, 0, sizeof(*jit_tex)); > if (view) { > struct pipe_resource *res = view->texture; > struct swr_resource *swr_res = swr_resource(res); > - struct swr_jit_texture *jit_tex = &textures[i]; > - memset(jit_tex, 0, sizeof(*jit_tex)); > + SWR_SURFACE_STATE *swr = &swr_res->swr; > + size_t *mip_offsets = swr_res->mip_offsets; > + if (swr_res->has_depth && swr_res->has_stencil && > + !util_format_has_depth(util_format_description(view->format))) { > + swr = &swr_res->secondary; > + mip_offsets = swr_res->secondary_mip_offsets; > + } > + > jit_tex->width = res->width0; > jit_tex->height = res->height0; > - jit_tex->depth = res->depth0; > - jit_tex->first_level = view->u.tex.first_level; > - jit_tex->last_level = view->u.tex.last_level; > - jit_tex->base_ptr = swr_res->swr.pBaseAddress; > + jit_tex->base_ptr = swr->pBaseAddress; > + if (view->target != PIPE_BUFFER) { > + jit_tex->first_level = view->u.tex.first_level; > + jit_tex->last_level = view->u.tex.last_level; > + if (view->target == PIPE_TEXTURE_3D) > + jit_tex->depth = res->depth0; > + else > + jit_tex->depth = > + view->u.tex.last_layer - view->u.tex.first_layer + 1; > + jit_tex->base_ptr += view->u.tex.first_layer * > + swr->qpitch * swr->pitch; > + } else { > + unsigned view_blocksize = > util_format_get_blocksize(view->format); > + jit_tex->base_ptr += view->u.buf.offset; > + jit_tex->width = view->u.buf.size / view_blocksize; > + jit_tex->depth = 1; > + } > > for (unsigned level = jit_tex->first_level; > level <= jit_tex->last_level; > level++) { > - jit_tex->row_stride[level] = swr_res->row_stride[level]; > - jit_tex->img_stride[level] = swr_res->img_stride[level]; > - jit_tex->mip_offsets[level] = swr_res->mip_offsets[level]; > + jit_tex->row_stride[level] = swr->pitch; > + jit_tex->img_stride[level] = swr->qpitch * swr->pitch; > + jit_tex->mip_offsets[level] = mip_offsets[level]; > } > } > } > @@ -789,6 +810,61 @@ swr_update_constants(struct swr_context *ctx, enum > pipe_shader_type shaderType) > } > } > > +static bool > +swr_change_rt(struct swr_context *ctx, > + unsigned attachment, > + const struct pipe_surface *sf) > +{ > + swr_draw_context *pDC = &ctx->swrDC; > + struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment]; > + > + /* Do nothing if the render target hasn't changed */ > + if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr) > + return false; > + > + /* Deal with disabling RT up front */ > + if (!sf || !sf->texture) { > + /* If detaching attachment, mark tiles as RESOLVED so core > + * won't try to load from non-existent target. */ > + swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED); > + *rt = {0}; > + return true; > + } > + > + const struct swr_resource *swr = swr_resource(sf->texture); > + const SWR_SURFACE_STATE *swr_surface = &swr->swr; > + SWR_FORMAT fmt = mesa_to_swr_format(sf->format); > + > + if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) { > + swr_surface = &swr->secondary; > + fmt = swr_surface->format; > + } > + > + if (rt->pBaseAddress == swr_surface->pBaseAddress && > + rt->format == fmt && > + rt->lod == sf->u.tex.level && > + rt->arrayIndex == sf->u.tex.first_layer) > + return false; > + > + bool need_fence = false; > + > + /* StoreTile for changed target */ > + if (rt->pBaseAddress) { > + /* If changing attachment to a new target, mark tiles as > + * INVALID so they are reloaded from surface. */ > + swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID); > + need_fence = true; > + } > + > + /* Make new attachment */ > + *rt = *swr_surface; > + rt->format = fmt; > + rt->lod = sf->u.tex.level; > + rt->arrayIndex = sf->u.tex.first_layer; > + > + return need_fence; > +} > + > void > swr_update_derived(struct pipe_context *pipe, > const struct pipe_draw_info *p_draw_info) > @@ -807,64 +883,30 @@ swr_update_derived(struct pipe_context *pipe, > /* Render Targets */ > if (ctx->dirty & SWR_NEW_FRAMEBUFFER) { > struct pipe_framebuffer_state *fb = &ctx->framebuffer; > - SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0}; > - UINT i; > + const struct util_format_description *desc = NULL; > + bool need_fence = false; > > /* colorbuffer targets */ > - if (fb->nr_cbufs) > - for (i = 0; i < fb->nr_cbufs; ++i) > - if (fb->cbufs[i]) { > - struct swr_resource *colorBuffer = > - swr_resource(fb->cbufs[i]->texture); > - new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr; > - } > - > - /* depth/stencil target */ > - if (fb->zsbuf) { > - struct swr_resource *depthStencilBuffer = > - swr_resource(fb->zsbuf->texture); > - if (depthStencilBuffer->has_depth) { > - new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr; > - > - if (depthStencilBuffer->has_stencil) > - new_attachment[SWR_ATTACHMENT_STENCIL] = > - &depthStencilBuffer->secondary; > - > - } else if (depthStencilBuffer->has_stencil) > - new_attachment[SWR_ATTACHMENT_STENCIL] = > &depthStencilBuffer->swr; > + if (fb->nr_cbufs) { > + for (unsigned i = 0; i < fb->nr_cbufs; ++i) > + need_fence |= swr_change_rt( > + ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]); > } > + for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i) > + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL); > > - /* Make the attachment updates */ > - swr_draw_context *pDC = &ctx->swrDC; > - SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; > - unsigned need_fence = FALSE; > - for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) { > - void *new_base = nullptr; > - if (new_attachment[i]) > - new_base = new_attachment[i]->pBaseAddress; > - > - /* StoreTile for changed target */ > - if (renderTargets[i].pBaseAddress != new_base) { > - if (renderTargets[i].pBaseAddress) { > - /* If changing attachment to a new target, mark tiles as > - * INVALID so they are reloaded from surface. > - * If detaching attachment, mark tiles as RESOLVED so core > - * won't try to load from non-existent target. */ > - enum SWR_TILE_STATE post_state = (new_attachment[i] > - ? SWR_TILE_INVALID : SWR_TILE_RESOLVED); > - swr_store_render_target(pipe, i, post_state); > - > - need_fence |= TRUE; > - } > + /* depth/stencil target */ > + if (fb->zsbuf) > + desc = util_format_description(fb->zsbuf->format); > + if (fb->zsbuf && util_format_has_depth(desc)) > + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf); > + else > + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL); > > - /* Make new attachment */ > - if (new_attachment[i]) > - renderTargets[i] = *new_attachment[i]; > - else > - if (renderTargets[i].pBaseAddress) > - renderTargets[i] = {0}; > - } > - } > + if (fb->zsbuf && util_format_has_stencil(desc)) > + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf); > + else > + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL); > > /* This fence ensures any attachment changes are resolved before the > * next draw */ > -- > 2.7.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev