Yes. Marek
On Fri, Mar 30, 2018, 4:47 AM Dieter Nützel <die...@nuetzel-hh.de> wrote: > Hello Marek, > > 2-3 landed. > Is #1 dead after my findings? ;-) > > Dieter > > Am 11.03.2018 19:11, schrieb Marek Olšák: > > From: Marek Olšák <marek.ol...@amd.com> > > > > This should improve the score for the GpuTest Triangle benchmark. > > Vulkan doesn't use this either. > > --- > > src/gallium/drivers/radeon/r600_pipe_common.h | 1 - > > src/gallium/drivers/radeon/r600_texture.c | 11 +------- > > src/gallium/drivers/radeonsi/si_clear.c | 37 > > ++------------------------- > > src/gallium/drivers/radeonsi/si_state.c | 6 ----- > > 4 files changed, 3 insertions(+), 52 deletions(-) > > > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > > b/src/gallium/drivers/radeon/r600_pipe_common.h > > index 7941903..9701757 100644 > > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > > @@ -209,21 +209,20 @@ struct r600_cmask_info { > > struct r600_texture { > > struct r600_resource resource; > > > > struct radeon_surf surface; > > uint64_t size; > > struct r600_texture *flushed_depth_texture; > > > > /* Colorbuffer compression and fast clear. */ > > struct r600_fmask_info fmask; > > struct r600_cmask_info cmask; > > - struct r600_resource *cmask_buffer; > > uint64_t dcc_offset; /* 0 = disabled */ > > unsigned cb_color_info; /* fast clear > enable bit */ > > unsigned color_clear_value[2]; > > unsigned > last_msaa_resolve_target_micro_mode; > > unsigned num_level0_transfers; > > > > /* Depth buffer compression and fast clear. */ > > uint64_t htile_offset; > > float depth_clear_value; > > uint16_t dirty_level_mask; /* each bit says > if that mipmap is > > compressed */ > > diff --git a/src/gallium/drivers/radeon/r600_texture.c > > b/src/gallium/drivers/radeon/r600_texture.c > > index 125e7ef..03bc955 100644 > > --- a/src/gallium/drivers/radeon/r600_texture.c > > +++ b/src/gallium/drivers/radeon/r600_texture.c > > @@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen > > *sscreen, > > { > > if (!rtex->cmask.size) > > return; > > > > assert(rtex->resource.b.b.nr_samples <= 1); > > > > /* Disable CMASK. */ > > memset(&rtex->cmask, 0, sizeof(rtex->cmask)); > > rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; > > rtex->dirty_level_mask = 0; > > - > > rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1); > > > > - if (rtex->cmask_buffer != &rtex->resource) > > - r600_resource_reference(&rtex->cmask_buffer, NULL); > > - > > /* Notify all contexts about the change. */ > > p_atomic_inc(&sscreen->dirty_tex_counter); > > p_atomic_inc(&sscreen->compressed_colortex_counter); > > } > > > > static bool r600_can_disable_dcc(struct r600_texture *rtex) > > { > > /* We can't disable DCC if it can be written by another process. */ > > return rtex->dcc_offset && > > (!rtex->resource.b.is_shared || > > @@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct > > pipe_screen* screen, > > slice_size, whandle); > > } > > > > static void r600_texture_destroy(struct pipe_screen *screen, > > struct pipe_resource *ptex) > > { > > struct r600_texture *rtex = (struct r600_texture*)ptex; > > struct r600_resource *resource = &rtex->resource; > > > > r600_texture_reference(&rtex->flushed_depth_texture, NULL); > > - > > - if (rtex->cmask_buffer != &rtex->resource) { > > - r600_resource_reference(&rtex->cmask_buffer, NULL); > > - } > > pb_reference(&resource->buf, NULL); > > r600_resource_reference(&rtex->dcc_separate_buffer, NULL); > > r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL); > > FREE(rtex); > > } > > > > static const struct u_resource_vtbl r600_texture_vtbl; > > > > /* The number of samples can be specified independently of the > > texture. */ > > void si_texture_get_fmask_info(struct si_screen *sscreen, > > @@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen > > *screen, > > rtex->db_compatible = true; > > > > if (!(sscreen->debug_flags & DBG(NO_HYPERZ))) > > r600_texture_allocate_htile(sscreen, rtex); > > } > > } else { > > if (base->nr_samples > 1) { > > if (!buf) { > > r600_texture_allocate_fmask(sscreen, rtex); > > r600_texture_allocate_cmask(sscreen, rtex); > > - rtex->cmask_buffer = &rtex->resource; > > } > > if (!rtex->fmask.size || !rtex->cmask.size) { > > FREE(rtex); > > return NULL; > > } > > } > > > > /* Shared textures must always set up DCC here. > > * If it's not present, it will be disabled by > > * apply_opaque_metadata later. > > @@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen > > *screen, > > resource->bo_alignment = buf->alignment; > > resource->domains = > > sscreen->ws->buffer_get_initial_domain(resource->buf); > > if (resource->domains & RADEON_DOMAIN_VRAM) > > resource->vram_usage = buf->size; > > else if (resource->domains & RADEON_DOMAIN_GTT) > > resource->gart_usage = buf->size; > > } > > > > if (rtex->cmask.size) { > > /* Initialize the cmask to 0xCC (= compressed state). */ > > - si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b, > > + si_screen_clear_buffer(sscreen, &rtex->resource.b.b, > > rtex->cmask.offset, > rtex->cmask.size, > > 0xCCCCCCCC); > > } > > if (rtex->htile_offset) { > > uint32_t clear_value = 0; > > > > if (sscreen->info.chip_class >= GFX9 || > rtex->tc_compatible_htile) > > clear_value = 0x0000030F; > > > > si_screen_clear_buffer(sscreen, &rtex->resource.b.b, > > diff --git a/src/gallium/drivers/radeonsi/si_clear.c > > b/src/gallium/drivers/radeonsi/si_clear.c > > index 464b9d7..a940aea 100644 > > --- a/src/gallium/drivers/radeonsi/si_clear.c > > +++ b/src/gallium/drivers/radeonsi/si_clear.c > > @@ -26,51 +26,20 @@ > > > > #include "util/u_format.h" > > #include "util/u_pack_color.h" > > #include "util/u_surface.h" > > > > enum { > > SI_CLEAR = SI_SAVE_FRAGMENT_STATE, > > SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE, > > }; > > > > -static void si_alloc_separate_cmask(struct si_screen *sscreen, > > - struct r600_texture *rtex) > > -{ > > - if (rtex->cmask_buffer) > > - return; > > - > > - assert(rtex->cmask.size == 0); > > - > > - si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask); > > - if (!rtex->cmask.size) > > - return; > > - > > - rtex->cmask_buffer = (struct r600_resource *) > > - si_aligned_buffer_create(&sscreen->b, > > - R600_RESOURCE_FLAG_UNMAPPABLE, > > - PIPE_USAGE_DEFAULT, > > - rtex->cmask.size, > > - rtex->cmask.alignment); > > - if (rtex->cmask_buffer == NULL) { > > - rtex->cmask.size = 0; > > - return; > > - } > > - > > - /* update colorbuffer state bits */ > > - rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> > 8; > > - > > - rtex->cb_color_info |= S_028C70_FAST_CLEAR(1); > > - > > - p_atomic_inc(&sscreen->compressed_colortex_counter); > > -} > > - > > static void si_set_clear_color(struct r600_texture *rtex, > > enum pipe_format surface_format, > > const union pipe_color_union *color) > > { > > union util_color uc; > > > > memset(&uc, 0, sizeof(uc)); > > > > if (rtex->surface.bpe == 16) { > > /* DCC fast clear only: > > @@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct > > si_context *sctx, > > > > if (clear_words_needed && too_small) > > continue; > > > > /* DCC fast clear with MSAA should clear CMASK to > 0xC. */ > > if (tex->resource.b.b.nr_samples >= 2 && > tex->cmask.size) { > > /* TODO: This doesn't work with MSAA. */ > > if (clear_words_needed) > > continue; > > > > - si_clear_buffer(&sctx->b.b, > &tex->cmask_buffer->b.b, > > + si_clear_buffer(&sctx->b.b, > &tex->resource.b.b, > > tex->cmask.offset, > tex->cmask.size, > > 0xCCCCCCCC, > R600_COHERENCY_CB_META); > > need_decompress_pass = true; > > } > > > > vi_dcc_clear_level(sctx, tex, 0, reset_value); > > > > if (clear_words_needed) > > need_decompress_pass = true; > > > > @@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct > > si_context *sctx, > > > > /* 128-bit formats are unusupported */ > > if (tex->surface.bpe > 8) { > > continue; > > } > > > > /* RB+ doesn't work with CMASK fast clear on > Stoney. */ > > if (sctx->b.family == CHIP_STONEY) > > continue; > > > > - /* ensure CMASK is enabled */ > > - si_alloc_separate_cmask(sctx->screen, tex); > > if (tex->cmask.size == 0) { > > continue; > > } > > > > /* Do the fast clear. */ > > - si_clear_buffer(&sctx->b.b, > &tex->cmask_buffer->b.b, > > + si_clear_buffer(&sctx->b.b, &tex->resource.b.b, > > tex->cmask.offset, > tex->cmask.size, 0, > > R600_COHERENCY_CB_META); > > need_decompress_pass = true; > > } > > > > if (need_decompress_pass && > > !(tex->dirty_level_mask & (1 << level))) { > > tex->dirty_level_mask |= 1 << level; > > > p_atomic_inc(&sctx->screen->compressed_colortex_counter); > > } > > diff --git a/src/gallium/drivers/radeonsi/si_state.c > > b/src/gallium/drivers/radeonsi/si_state.c > > index 6c82257..aae7332 100644 > > --- a/src/gallium/drivers/radeonsi/si_state.c > > +++ b/src/gallium/drivers/radeonsi/si_state.c > > @@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct > > si_context *sctx, struct r600_atom > > continue; > > } > > > > tex = (struct r600_texture *)cb->base.texture; > > radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > > &tex->resource, > RADEON_USAGE_READWRITE, > > tex->resource.b.b.nr_samples > 1 ? > > > RADEON_PRIO_COLOR_BUFFER_MSAA : > > RADEON_PRIO_COLOR_BUFFER); > > > > - if (tex->cmask_buffer && tex->cmask_buffer != > &tex->resource) { > > - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > > - tex->cmask_buffer, RADEON_USAGE_READWRITE, > > - RADEON_PRIO_CMASK); > > - } > > - > > if (tex->dcc_separate_buffer) > > radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > > tex->dcc_separate_buffer, > > RADEON_USAGE_READWRITE, > > RADEON_PRIO_DCC); > > > > /* Compute mutable surface parameters. */ > > cb_color_base = tex->resource.gpu_address >> 8; > > cb_color_fmask = 0; > > cb_dcc_base = 0; >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev