From: Marek Olšák <marek.ol...@amd.com> This mainly removes and simplifies code that is no longer needed.
There were some issues with the DB->CB stencil copy on gfx10, so let's just use a fragment shader blit for all ZS mappings. It's more reliable. --- src/gallium/drivers/radeonsi/si_blit.c | 29 +--- src/gallium/drivers/radeonsi/si_pipe.h | 9 +- src/gallium/drivers/radeonsi/si_state.c | 2 +- src/gallium/drivers/radeonsi/si_texture.c | 166 +++++++--------------- 4 files changed, 52 insertions(+), 154 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 5806342cca9..638f2ee4d24 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -173,45 +173,20 @@ si_blit_dbcb_copy(struct si_context *sctx, } sctx->decompression_enabled = false; sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); return fully_copied_levels; } -void si_blit_decompress_depth(struct pipe_context *ctx, - struct si_texture *texture, - struct si_texture *staging, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer, - unsigned first_sample, unsigned last_sample) -{ - const struct util_format_description *desc; - unsigned planes = 0; - - assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); - - desc = util_format_description(staging->buffer.b.b.format); - - if (util_format_has_depth(desc)) - planes |= PIPE_MASK_Z; - if (util_format_has_stencil(desc)) - planes |= PIPE_MASK_S; - - si_blit_dbcb_copy( - (struct si_context *)ctx, texture, staging, planes, - u_bit_consecutive(first_level, last_level - first_level + 1), - first_layer, last_layer, first_sample, last_sample); -} - /* Helper function for si_blit_decompress_zs_in_place. */ static void si_blit_decompress_zs_planes_in_place(struct si_context *sctx, struct si_texture *texture, unsigned planes, unsigned level_mask, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer; @@ -348,21 +323,21 @@ si_decompress_depth(struct si_context *sctx, u_log_printf(sctx->log, "\n------------------------------------------------\n" "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n", first_level, last_level, levels_z, levels_s); /* We may have to allocate the flushed texture here when called from * si_decompress_subresource. */ if (copy_planes && (tex->flushed_depth_texture || - si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) { + si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b))) { struct si_texture *dst = tex->flushed_depth_texture; unsigned fully_copied_levels; unsigned levels = 0; assert(tex->flushed_depth_texture); if (util_format_is_depth_and_stencil(dst->buffer.b.b.format)) copy_planes = PIPE_MASK_Z | PIPE_MASK_S; if (copy_planes & PIPE_MASK_Z) { @@ -1242,21 +1217,21 @@ static void si_blit(struct pipe_context *ctx, assert(util_blitter_is_blit_supported(sctx->blitter, info)); /* The driver doesn't decompress resources automatically while * u_blitter is rendering. */ vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, info->src.level, info->src.format); vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, info->dst.level, info->dst.format); - si_decompress_subresource(ctx, info->src.resource, info->mask, + si_decompress_subresource(ctx, info->src.resource, PIPE_MASK_RGBAZS, info->src.level, info->src.box.z, info->src.box.z + info->src.box.depth - 1); if (sctx->screen->debug_flags & DBG(FORCE_DMA) && util_try_blit_via_copy_region(ctx, info)) return; si_blitter_begin(sctx, SI_BLIT | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 11678e1b4cb..8512c27b2cd 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1202,26 +1202,20 @@ void si_blitter_end(struct si_context *sctx); void si_init_blit_functions(struct si_context *sctx); void si_decompress_textures(struct si_context *sctx, unsigned shader_mask); void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box); void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex); -void si_blit_decompress_depth(struct pipe_context *ctx, - struct si_texture *texture, - struct si_texture *staging, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer, - unsigned first_sample, unsigned last_sample); /* si_buffer.c */ bool si_rings_is_buffer_referenced(struct si_context *sctx, struct pb_buffer *buf, enum radeon_bo_usage usage); void *si_buffer_map_sync_with_rings(struct si_context *sctx, struct si_resource *resource, unsigned usage); void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, @@ -1445,22 +1439,21 @@ bool si_prepare_for_dma_blit(struct si_context *sctx, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct si_texture *src, unsigned src_level, const struct pipe_box *src_box); void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex); void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex); bool si_init_flushed_depth_texture(struct pipe_context *ctx, - struct pipe_resource *texture, - struct si_texture **staging); + struct pipe_resource *texture); void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex, struct u_log_context *log); struct pipe_resource *si_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ); bool vi_dcc_formats_compatible(enum pipe_format format1, enum pipe_format format2); bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, unsigned level, enum pipe_format view_format); void vi_disable_dcc_if_incompatible_format(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b9fc77f7918..3996d280470 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4120,21 +4120,21 @@ si_create_sampler_view_custom(struct pipe_context *ctx, state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE) last_layer = state->u.tex.first_layer; /* Texturing with separate depth and stencil. */ pipe_format = state->format; /* Depth/stencil texturing sometimes needs separate texture. */ if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { if (!tex->flushed_depth_texture && - !si_init_flushed_depth_texture(ctx, texture, NULL)) { + !si_init_flushed_depth_texture(ctx, texture)) { pipe_resource_reference(&view->base.texture, NULL); FREE(view); return NULL; } assert(tex->flushed_depth_texture); /* Override format for the case where the flushed texture * contains only Z or only S. */ diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index dd383635675..be2562c45b4 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -121,59 +121,58 @@ static void si_copy_region_with_blit(struct pipe_context *pipe, blit.src.box = *src_box; blit.dst.resource = dst; blit.dst.format = dst->format; blit.dst.level = dst_level; blit.dst.box.x = dstx; blit.dst.box.y = dsty; blit.dst.box.z = dstz; blit.dst.box.width = src_box->width; blit.dst.box.height = src_box->height; blit.dst.box.depth = src_box->depth; - blit.mask = util_format_get_mask(src->format) & - util_format_get_mask(dst->format); + blit.mask = util_format_get_mask(dst->format); blit.filter = PIPE_TEX_FILTER_NEAREST; if (blit.mask) { pipe->blit(pipe, &blit); } } /* Copy from a full GPU texture to a transfer's staging one. */ static void si_copy_to_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer) { struct si_context *sctx = (struct si_context*)ctx; struct pipe_transfer *transfer = (struct pipe_transfer*)stransfer; struct pipe_resource *dst = &stransfer->staging->b.b; struct pipe_resource *src = transfer->resource; - if (src->nr_samples > 1) { + if (src->nr_samples > 1 || ((struct si_texture*)src)->is_depth) { si_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, src, transfer->level, &transfer->box); return; } sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, &transfer->box); } /* Copy from a transfer's staging texture to a full GPU one. */ static void si_copy_from_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer) { struct si_context *sctx = (struct si_context*)ctx; struct pipe_transfer *transfer = (struct pipe_transfer*)stransfer; struct pipe_resource *dst = transfer->resource; struct pipe_resource *src = &stransfer->staging->b.b; struct pipe_box sbox; u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox); - if (dst->nr_samples > 1) { + if (dst->nr_samples > 1 || ((struct si_texture*)dst)->is_depth) { si_copy_region_with_blit(ctx, dst, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, src, 0, &sbox); return; } if (util_format_is_compressed(dst->format)) { sbox.width = util_format_get_nblocksx(dst->format, sbox.width); sbox.height = util_format_get_nblocksx(dst->format, sbox.height); } @@ -1707,80 +1706,71 @@ static struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen, sscreen->info.max_alignment, &stride, &offset); if (!buf) return NULL; return si_texture_from_winsys_buffer(sscreen, templ, buf, stride, offset, usage, true); } bool si_init_flushed_depth_texture(struct pipe_context *ctx, - struct pipe_resource *texture, - struct si_texture **staging) + struct pipe_resource *texture) { struct si_texture *tex = (struct si_texture*)texture; struct pipe_resource resource; - struct si_texture **flushed_depth_texture = staging ? - staging : &tex->flushed_depth_texture; enum pipe_format pipe_format = texture->format; - if (!staging) { - if (tex->flushed_depth_texture) - return true; /* it's ready */ - - if (!tex->can_sample_z && tex->can_sample_s) { - switch (pipe_format) { - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - /* Save memory by not allocating the S plane. */ - pipe_format = PIPE_FORMAT_Z32_FLOAT; - break; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_S8_UINT_Z24_UNORM: - /* Save memory bandwidth by not copying the - * stencil part during flush. - * - * This potentially increases memory bandwidth - * if an application uses both Z and S texturing - * simultaneously (a flushed Z24S8 texture - * would be stored compactly), but how often - * does that really happen? - */ - pipe_format = PIPE_FORMAT_Z24X8_UNORM; - break; - default:; - } - } else if (!tex->can_sample_s && tex->can_sample_z) { - assert(util_format_has_stencil(util_format_description(pipe_format))); - - /* DB->CB copies to an 8bpp surface don't work. */ - pipe_format = PIPE_FORMAT_X24S8_UINT; + assert(!tex->flushed_depth_texture); + + if (!tex->can_sample_z && tex->can_sample_s) { + switch (pipe_format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + /* Save memory by not allocating the S plane. */ + pipe_format = PIPE_FORMAT_Z32_FLOAT; + break; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + /* Save memory bandwidth by not copying the + * stencil part during flush. + * + * This potentially increases memory bandwidth + * if an application uses both Z and S texturing + * simultaneously (a flushed Z24S8 texture + * would be stored compactly), but how often + * does that really happen? + */ + pipe_format = PIPE_FORMAT_Z24X8_UNORM; + break; + default:; } + } else if (!tex->can_sample_s && tex->can_sample_z) { + assert(util_format_has_stencil(util_format_description(pipe_format))); + + /* DB->CB copies to an 8bpp surface don't work. */ + pipe_format = PIPE_FORMAT_X24S8_UINT; } memset(&resource, 0, sizeof(resource)); resource.target = texture->target; resource.format = pipe_format; resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = texture->depth0; resource.array_size = texture->array_size; resource.last_level = texture->last_level; resource.nr_samples = texture->nr_samples; - resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; + resource.usage = PIPE_USAGE_DEFAULT; resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; resource.flags = texture->flags | SI_RESOURCE_FLAG_FLUSHED_DEPTH; - if (staging) - resource.flags |= SI_RESOURCE_FLAG_TRANSFER; - - *flushed_depth_texture = (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource); - if (*flushed_depth_texture == NULL) { + tex->flushed_depth_texture = (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource); + if (!tex->flushed_depth_texture) { PRINT_ERR("failed to create temporary texture to hold flushed depth\n"); return false; } return true; } /** * Initialize the pipe_resource descriptor to be of the same size as the box, * which is supposed to hold a subregion of the texture "orig" at the given * mipmap level. @@ -1873,22 +1863,24 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct si_texture *tex = (struct si_texture*)texture; struct si_transfer *trans; struct si_resource *buf; unsigned offset = 0; char *map; bool use_staging_texture = false; assert(!(texture->flags & SI_RESOURCE_FLAG_TRANSFER)); assert(box->width && box->height && box->depth); - /* Depth textures use staging unconditionally. */ - if (!tex->is_depth) { + if (tex->is_depth) { + /* Depth textures use staging unconditionally. */ + use_staging_texture = true; + } else { /* Degrade the tile mode if we get too many transfers on APUs. * On dGPUs, the staging texture is always faster. * Only count uploads that are at least 4x4 pixels large. */ if (!sctx->screen->info.has_dedicated_vram && level == 0 && box->width >= 4 && box->height >= 4 && p_atomic_inc_return(&tex->num_level0_transfers) == 10) { bool can_invalidate = si_can_invalidate_texture(sctx->screen, tex, @@ -1929,90 +1921,36 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, } trans = CALLOC_STRUCT(si_transfer); if (!trans) return NULL; pipe_resource_reference(&trans->b.b.resource, texture); trans->b.b.level = level; trans->b.b.usage = usage; trans->b.b.box = *box; - if (tex->is_depth) { - struct si_texture *staging_depth; - - if (tex->buffer.b.b.nr_samples > 1) { - /* MSAA depth buffers need to be converted to single sample buffers. - * - * Mapping MSAA depth buffers can occur if ReadPixels is called - * with a multisample GLX visual. - * - * First downsample the depth buffer to a temporary texture, - * then decompress the temporary one to staging. - * - * Only the region being mapped is transfered. - */ - struct pipe_resource resource; - - si_init_temp_resource_from_box(&resource, texture, box, level, 0); - - if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { - PRINT_ERR("failed to create temporary texture to hold untiled copy\n"); - goto fail_trans; - } - - if (usage & PIPE_TRANSFER_READ) { - struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); - if (!temp) { - PRINT_ERR("failed to create a temporary depth texture\n"); - goto fail_trans; - } - - si_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); - si_blit_decompress_depth(ctx, (struct si_texture*)temp, staging_depth, - 0, 0, 0, box->depth, 0, 0); - pipe_resource_reference(&temp, NULL); - } - - /* Just get the strides. */ - si_texture_get_offset(sctx->screen, staging_depth, level, NULL, - &trans->b.b.stride, - &trans->b.b.layer_stride); - } else { - /* XXX: only readback the rectangle which is being mapped? */ - /* XXX: when discard is true, no need to read back from depth texture */ - if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) { - PRINT_ERR("failed to create temporary texture to hold untiled copy\n"); - goto fail_trans; - } - - si_blit_decompress_depth(ctx, tex, staging_depth, - level, level, - box->z, box->z + box->depth - 1, - 0, 0); - - offset = si_texture_get_offset(sctx->screen, staging_depth, - level, box, - &trans->b.b.stride, - &trans->b.b.layer_stride); - } - - trans->staging = &staging_depth->buffer; - buf = trans->staging; - } else if (use_staging_texture) { + if (use_staging_texture) { struct pipe_resource resource; struct si_texture *staging; si_init_temp_resource_from_box(&resource, texture, box, level, SI_RESOURCE_FLAG_TRANSFER); resource.usage = (usage & PIPE_TRANSFER_READ) ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; + /* Since depth-stencil textures don't support linear tiling, + * blit from ZS to color and vice versa. u_blitter will do + * the packing for these formats. + */ + if (tex->is_depth) + resource.format = util_blitter_get_color_format_for_zs(resource.format); + /* Create the temporary texture. */ staging = (struct si_texture*)ctx->screen->resource_create(ctx->screen, &resource); if (!staging) { PRINT_ERR("failed to create temporary texture to hold untiled copy\n"); goto fail_trans; } trans->staging = &staging->buffer; /* Just get the strides. */ si_texture_get_offset(sctx->screen, staging, 0, NULL, @@ -2063,30 +2001,22 @@ static void si_texture_transfer_unmap(struct pipe_context *ctx, /* Always unmap texture CPU mappings on 32-bit architectures, so that * we don't run out of the CPU address space. */ if (sizeof(void*) == 4) { struct si_resource *buf = stransfer->staging ? stransfer->staging : &tex->buffer; sctx->ws->buffer_unmap(buf->buf); } - if ((transfer->usage & PIPE_TRANSFER_WRITE) && stransfer->staging) { - if (tex->is_depth && tex->buffer.b.b.nr_samples <= 1) { - ctx->resource_copy_region(ctx, texture, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - &stransfer->staging->b.b, transfer->level, - &transfer->box); - } else { - si_copy_from_staging_texture(ctx, stransfer); - } - } + if ((transfer->usage & PIPE_TRANSFER_WRITE) && stransfer->staging) + si_copy_from_staging_texture(ctx, stransfer); if (stransfer->staging) { sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size; si_resource_reference(&stransfer->staging, NULL); } /* Heuristic for {upload, draw, upload, draw, ..}: * * Flush the gfx IB if we've allocated too much texture storage. * -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev