From: Marek Olšák <marek.ol...@amd.com> This is the simpler codepath - just disable RB and pipe alignment for DCC. --- src/amd/common/ac_gpu_info.c | 2 + src/amd/common/ac_gpu_info.h | 3 + src/amd/common/ac_surface.c | 25 +++++++- src/amd/common/ac_surface.h | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 6 ++ src/gallium/drivers/radeonsi/si_texture.c | 74 ++++++++++++++++++++-- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 +++ 7 files changed, 113 insertions(+), 7 deletions(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index fc8c6a09d2f..a6d249a6d2f 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -496,20 +496,22 @@ void ac_print_gpu_info(struct radeon_info *info) info->pci_domain, info->pci_bus, info->pci_dev, info->pci_func); printf(" pci_id = 0x%x\n", info->pci_id); printf(" family = %i\n", info->family); printf(" chip_class = %i\n", info->chip_class); printf(" num_compute_rings = %u\n", info->num_compute_rings); printf(" num_sdma_rings = %i\n", info->num_sdma_rings); printf(" clock_crystal_freq = %i\n", info->clock_crystal_freq); printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size); + printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); + printf("Memory info:\n"); printf(" pte_fragment_size = %u\n", info->pte_fragment_size); printf(" gart_page_size = %u\n", info->gart_page_size); printf(" gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024*1024)); printf(" vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024*1024)); printf(" vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024)); printf(" gds_size = %u kB\n", info->gds_size / 1024); printf(" gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024); printf(" max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024)); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index b1ef9c53734..99fed520618 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -49,20 +49,23 @@ struct radeon_info { /* Device info. */ const char *name; uint32_t pci_id; enum radeon_family family; enum chip_class chip_class; uint32_t num_compute_rings; uint32_t num_sdma_rings; uint32_t clock_crystal_freq; uint32_t tcc_cache_line_size; + /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ + bool use_display_dcc_unaligned; + /* Memory info. */ uint32_t pte_fragment_size; uint32_t gart_page_size; uint64_t gart_size; uint64_t vram_size; uint64_t vram_vis_size; unsigned gds_size; unsigned gds_gfx_partition_size; uint64_t max_alloc_size; uint32_t min_alloc_size; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 91004e032a3..6802ab2badb 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -471,21 +471,22 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) assert(index < 16); return index; } static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf) { unsigned num_channels = config->info.num_channels; unsigned bpe = surf->bpe; - if (surf->flags & RADEON_SURF_SCANOUT && + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 && surf->blk_h == 1) { /* subsampled */ if (surf->blk_w == 2 && surf->blk_h == 1) return true; if (/* RGBA8 or RGBA16F */ (bpe >= 4 && bpe <= 8 && num_channels == 4) || /* R5G6B5 or R5G5B5A1 */ (bpe == 2 && num_channels >= 3) || @@ -1208,21 +1209,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, din.numFrags = in->numFrags; din.numMipLevels = in->numMipLevels; din.dataSurfaceSize = out.surfSize; ret = Addr2ComputeDccInfo(addrlib, &din, &dout); if (ret != ADDR_OK) return ret; surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned; surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; - surf->u.gfx9.dcc_pitch_max = dout.pitch - 1; + surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; surf->dcc_size = dout.dccRamSize; surf->dcc_alignment = dout.dccRamBaseAlign; surf->num_dcc_levels = in->numMipLevels; /* Disable DCC for levels that are in the mip tail. * * There are two issues that this is intended to * address: * * 1. Multiple mip levels may share a cache line. This @@ -1444,20 +1445,33 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.numSlices = config->info.depth; else if (config->is_cube) AddrSurfInfoIn.numSlices = 6; else AddrSurfInfoIn.numSlices = config->info.array_size; /* This is propagated to HTILE/DCC/CMASK. */ AddrSurfInfoIn.flags.metaPipeUnaligned = 0; AddrSurfInfoIn.flags.metaRbUnaligned = 0; + /* The display hardware can only read DCC with RB_ALIGNED=0 and + * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. + * + * The CB block requires RB_ALIGNED=1 except 1 RB chips. + * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes + * after rendering, so PIPE_ALIGNED=1 is recommended. + */ + if (info->use_display_dcc_unaligned && is_color_surface && + AddrSurfInfoIn.flags.display) { + AddrSurfInfoIn.flags.metaPipeUnaligned = 1; + AddrSurfInfoIn.flags.metaRbUnaligned = 1; + } + switch (mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: assert(config->info.samples <= 1); assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; break; case RADEON_SURF_MODE_1D: case RADEON_SURF_MODE_2D: if (surf->flags & RADEON_SURF_IMPORTED) { @@ -1516,20 +1530,27 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR; /* Query whether the surface is displayable. */ bool displayable = false; if (!config->is_3d && !config->is_cube) { r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode, surf->bpe * 8, &displayable); if (r) return r; + + /* Display needs unaligned DCC. */ + if (info->use_display_dcc_unaligned && + surf->num_dcc_levels && + (surf->u.gfx9.dcc.pipe_aligned || + surf->u.gfx9.dcc.rb_aligned)) + displayable = false; } surf->is_displayable = displayable; switch (surf->u.gfx9.surf.swizzle_mode) { /* S = standard. */ case ADDR_SW_256B_S: case ADDR_SW_4KB_S: case ADDR_SW_64KB_S: case ADDR_SW_VAR_S: case ADDR_SW_64KB_S_T: diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 7ae166c70a3..eb50c37c3c2 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -142,21 +142,21 @@ struct gfx9_surf_layout { enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */ uint16_t surf_pitch; /* in blocks */ uint16_t surf_height; uint64_t surf_offset; /* 0 unless imported with an offset */ /* The size of the 2D plane containing all mipmap levels. */ uint64_t surf_slice_size; /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */ uint32_t offset[RADEON_SURF_MAX_LEVELS]; - uint16_t dcc_pitch_max; /* (mip chain pitch - 1) */ + uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */ uint64_t stencil_offset; /* separate stencil */ }; struct radeon_surf { /* Format properties. */ unsigned blk_w:4; unsigned blk_h:4; unsigned bpe:5; /* Number of mipmap levels where DCC is enabled starting from level 0. diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 82feef39487..4e53c992fdf 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -210,20 +210,26 @@ struct radeon_bo_metadata { unsigned tile_split; unsigned mtilea; unsigned num_banks; unsigned stride; bool scanout; } legacy; struct { /* surface flags */ unsigned swizzle_mode:5; + + /* DCC flags */ + /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */ + unsigned dcc_offset_256B:24; + unsigned dcc_pitch_max:14; /* (mip chain pitch - 1) for DCN */ + unsigned dcc_independent_64B:1; } gfx9; } u; /* Additional metadata associated with the buffer, in bytes. * The maximum size is 64 * 4. This is opaque for the winsys & kernel. * Supported by amdgpu only. */ uint32_t size_metadata; uint32_t metadata[64]; }; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 581f90a7b2f..cb62f153e59 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -30,20 +30,21 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" #include "util/u_resource.h" #include "util/u_surface.h" #include "util/u_transfer.h" #include "util/os_time.h" #include <errno.h> #include <inttypes.h> #include "state_tracker/drm_driver.h" #include "amd/common/sid.h" +#include "amd/common/gfx9d.h" static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen, const struct pipe_resource *templ, bool tc_compatible_htile); bool si_prepare_for_dma_blit(struct si_context *sctx, struct si_texture *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, @@ -344,20 +345,25 @@ static void si_get_display_metadata(struct si_screen *sscreen, if (sscreen->info.chip_class >= GFX9) { if (metadata->u.gfx9.swizzle_mode > 0) *array_mode = RADEON_SURF_MODE_2D; else *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; *is_scanout = metadata->u.gfx9.swizzle_mode == 0 || metadata->u.gfx9.swizzle_mode % 4 == 2; surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode; + + if (metadata->u.gfx9.dcc_offset_256B) { + surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max; + assert(metadata->u.gfx9.dcc_independent_64B == 1); + } } else { surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; surf->u.legacy.bankw = metadata->u.legacy.bankw; surf->u.legacy.bankh = metadata->u.legacy.bankh; surf->u.legacy.tile_split = metadata->u.legacy.tile_split; surf->u.legacy.mtilea = metadata->u.legacy.mtilea; surf->u.legacy.num_banks = metadata->u.legacy.num_banks; if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) *array_mode = RADEON_SURF_MODE_2D; @@ -610,20 +616,29 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex) { struct radeon_surf *surface = &tex->surface; struct pipe_resource *res = &tex->buffer.b.b; struct radeon_bo_metadata md; memset(&md, 0, sizeof(md)); if (sscreen->info.chip_class >= GFX9) { md.u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; + + if (tex->dcc_offset && !tex->dcc_separate_buffer) { + uint64_t dcc_offset = tex->dcc_offset; + + assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); + md.u.gfx9.dcc_offset_256B = dcc_offset >> 8; + md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max; + md.u.gfx9.dcc_independent_64B = 1; + } } else { md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; md.u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; md.u.legacy.pipe_config = surface->u.legacy.pipe_config; md.u.legacy.bankw = surface->u.legacy.bankw; md.u.legacy.bankh = surface->u.legacy.bankh; md.u.legacy.tile_split = surface->u.legacy.tile_split; md.u.legacy.mtilea = surface->u.legacy.mtilea; @@ -699,29 +714,65 @@ static void si_get_opaque_metadata(struct si_screen *sscreen, return; /* Return if DCC is enabled. The texture should be set up with it * already. */ if (md->size_metadata >= 10 * 4 && /* at least 2(header) + 8(desc) dwords */ md->metadata[0] != 0 && md->metadata[1] == si_get_bo_metadata_word1(sscreen) && G_008F28_COMPRESSION_EN(desc[6])) { tex->dcc_offset = (uint64_t)desc[7] << 8; + + if (sscreen->info.chip_class >= GFX9) { + /* Fix up parameters for displayable DCC. Some state + * trackers don't set the SCANOUT flag when importing + * displayable images, so we have to recover the correct + * parameters here. + */ + tex->surface.u.gfx9.dcc.pipe_aligned = + G_008F24_META_PIPE_ALIGNED(desc[5]); + tex->surface.u.gfx9.dcc.rb_aligned = + G_008F24_META_RB_ALIGNED(desc[5]); + + /* If DCC is unaligned, this can only be a displayable image. */ + if (!tex->surface.u.gfx9.dcc.pipe_aligned && + !tex->surface.u.gfx9.dcc.rb_aligned) + tex->surface.is_displayable = true; + } return; } /* Disable DCC. These are always set by texture_from_handle and must * be cleared here. */ tex->dcc_offset = 0; } +static bool si_has_displayable_dcc(struct si_texture *tex) +{ + struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen; + + if (sscreen->info.chip_class <= VI) + return false; + + /* This needs a cache flush before scanout. + * (it can't be scanned out and rendered to simultaneously) + */ + if (sscreen->info.use_display_dcc_unaligned && + tex->dcc_offset && + !tex->surface.u.gfx9.dcc.pipe_aligned && + !tex->surface.u.gfx9.dcc.rb_aligned) + return true; + + return false; +} + static boolean si_texture_get_handle(struct pipe_screen* screen, struct pipe_context *ctx, struct pipe_resource *resource, struct winsys_handle *whandle, unsigned usage) { struct si_screen *sscreen = (struct si_screen*)screen; struct si_context *sctx; struct si_resource *res = si_resource(resource); struct si_texture *tex = (struct si_texture*)resource; @@ -752,21 +803,24 @@ static boolean si_texture_get_handle(struct pipe_screen* screen, assert(res->b.b.bind & PIPE_BIND_SHARED); assert(res->flags & RADEON_FLAG_NO_SUBALLOC); assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)); assert(tex->surface.tile_swizzle == 0); } /* Since shader image stores don't support DCC on VI, * disable it for external clients that want write * access. */ - if (usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) { + if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) || + /* Displayable DCC requires an explicit flush. */ + (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + si_has_displayable_dcc(tex))) { if (si_texture_disable_dcc(sctx, tex)) { update_metadata = true; /* si_texture_disable_dcc flushes the context */ flush = false; } } if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && (tex->cmask_buffer || tex->dcc_offset)) { /* Eliminate fast clear (both CMASK and DCC) */ @@ -1005,21 +1059,21 @@ void si_print_texture_info(struct si_screen *sscreen, tex->surface.htile_alignment, tex->surface.u.gfx9.htile.rb_aligned, tex->surface.u.gfx9.htile.pipe_aligned); } if (tex->dcc_offset) { u_log_printf(log, " DCC: offset=%"PRIu64", size=%u, " "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", tex->dcc_offset, tex->surface.dcc_size, tex->surface.dcc_alignment, - tex->surface.u.gfx9.dcc_pitch_max, + tex->surface.u.gfx9.display_dcc_pitch_max, tex->surface.num_dcc_levels); } if (tex->surface.u.gfx9.stencil_offset) { u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n", tex->surface.u.gfx9.stencil_offset, tex->surface.u.gfx9.stencil.swizzle_mode, tex->surface.u.gfx9.stencil.epitch); } return; @@ -1192,22 +1246,23 @@ si_texture_create_object(struct pipe_screen *screen, if (!tex->surface.fmask_size || !tex->surface.cmask_size) goto error; } /* Shared textures must always set up DCC here. * If it's not present, it will be disabled by * apply_opaque_metadata later. */ if (tex->surface.dcc_size && (buf || !(sscreen->debug_flags & DBG(NO_DCC))) && - !(tex->surface.flags & RADEON_SURF_SCANOUT)) { - /* Reserve space for the DCC buffer. */ + (sscreen->info.use_display_dcc_unaligned || + !(tex->surface.flags & RADEON_SURF_SCANOUT))) { + /* Add space for the DCC buffer. */ tex->dcc_offset = align64(tex->size, tex->surface.dcc_alignment); tex->size = tex->dcc_offset + tex->surface.dcc_size; } } /* Now create the backing buffer. */ if (!buf) { si_init_resource_fields(sscreen, resource, tex->size, tex->surface.surf_alignment); @@ -1503,20 +1558,31 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc tex = si_texture_create_object(&sscreen->b, templ, buf, &surface); if (!tex) return NULL; tex->buffer.b.is_shared = true; tex->buffer.external_usage = usage; si_get_opaque_metadata(sscreen, tex, &metadata); + /* Displayable DCC requires an explicit flush. */ + if (dedicated && + !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + si_has_displayable_dcc(tex)) { + /* TODO: do we need to decompress DCC? */ + if (si_texture_discard_dcc(sscreen, tex)) { + /* Update BO metadata after disabling DCC. */ + si_set_tex_bo_metadata(sscreen, tex); + } + } + assert(tex->surface.tile_swizzle == 0); return &tex->buffer.b.b; } static struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle, unsigned usage) { struct si_screen *sscreen = (struct si_screen*)screen; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 58979bd4ea7..c1863057370 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1219,20 +1219,24 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf, assert(bo->bo && "must not be called for slab entries"); r = amdgpu_bo_query_info(bo->bo, &info); if (r) return; tiling_flags = info.metadata.tiling_info; if (bo->ws->info.chip_class >= GFX9) { md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); + + md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B); + md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX); + md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B); } else { md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ md->u.legacy.macrotile = RADEON_LAYOUT_TILED; else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ md->u.legacy.microtile = RADEON_LAYOUT_TILED; md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); @@ -1252,20 +1256,24 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf, struct radeon_bo_metadata *md) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); struct amdgpu_bo_metadata metadata = {0}; uint64_t tiling_flags = 0; assert(bo->bo && "must not be called for slab entries"); if (bo->ws->info.chip_class >= GFX9) { tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode); + + tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B); + tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max); + tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B); } else { if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ else tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config); tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw)); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev