From: Marek Olšák <marek.ol...@amd.com> This includes reused buffers. Prefer DCC clear if DCC is enabled. --- src/gallium/drivers/radeonsi/si_buffer.c | 8 ++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 3 ++- src/gallium/drivers/radeonsi/si_texture.c | 5 ++++- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 3 --- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 -- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 - 6 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index a03a944..f31767c 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -235,20 +235,28 @@ bool si_alloc_resource(struct si_screen *sscreen, util_range_set_empty(&res->valid_buffer_range); res->TC_L2_dirty = false; /* Print debug information. */ if (sscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n", res->gpu_address, res->gpu_address + res->buf->size, res->buf->size); } + + /* Only clear if DCC is disabled, because DCC clears are more efficient. */ + if (sscreen->debug_flags & DBG_ZERO_VRAM && + res->domains & RADEON_DOMAIN_VRAM && + (res->b.b.target == PIPE_BUFFER || + !((struct si_texture*)res)->dcc_offset)) + si_screen_clear_buffer(sscreen, &res->b.b, 0, res->bo_size, 0); + return true; } static void si_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r600_resource *rbuffer = r600_resource(buf); threaded_resource_deinit(buf); util_range_destroy(&rbuffer->valid_buffer_range); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9c8ed36..6e14066 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -850,21 +850,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, si_init_screen_query_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->debug_flags |= DBG(SI_SCHED); - + if (driQueryOptionb(config->options, "radeonsi_zerovram")) + sscreen->debug_flags |= DBG(ZERO_VRAM); if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info); slab_create_parent(&sscreen->pool_transfers, sizeof(struct si_transfer), 64); sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (sscreen->force_aniso >= 0) { printf("radeonsi: Forcing anisotropy filter to %ix\n", diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index e55fd81..2faeef2 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1250,24 +1250,27 @@ si_texture_create_object(struct pipe_screen *screen, clear_value = 0x0000030F; si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->htile_offset, tex->surface.htile_size, clear_value); } /* Initialize DCC only if the texture is not being imported. */ if (!buf && tex->dcc_offset) { + unsigned clear_value = + sscreen->debug_flags & DBG_ZERO_VRAM ? 0 : 0xFFFFFFFF; + si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->dcc_offset, tex->surface.dcc_size, - 0xFFFFFFFF); + clear_value); } /* Initialize the CMASK base register value. */ tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->cmask_offset) >> 8; if (sscreen->debug_flags & DBG(VM)) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", tex->buffer.gpu_address, tex->buffer.gpu_address + tex->buffer.buf->size, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 68f0562..2842936 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -427,23 +427,20 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, if (!ws->info.has_dedicated_vram) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; if (flags & RADEON_FLAG_NO_CPU_ACCESS) request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && ws->info.has_local_buffers) request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID; - if (ws->zero_all_vram_allocs && - (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)) - request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); if (r) { fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); goto error_bo_alloc; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index dcbc075..7908717 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -59,22 +59,20 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo, &ws->info.max_alignment); if (!ws->addrlib) { fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); goto fail; } ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; ws->debug_all_bos = debug_get_option_all_bos(); ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL; - ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL || - driQueryOptionb(config->options, "radeonsi_zerovram"); return true; fail: amdgpu_device_deinitialize(ws->dev); ws->dev = NULL; return false; } static void do_winsys_deinit(struct amdgpu_winsys *ws) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index c355eff..1682196 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -72,21 +72,20 @@ struct amdgpu_winsys { /* multithreaded IB submission */ struct util_queue cs_queue; struct amdgpu_gpu_info amdinfo; ADDR_HANDLE addrlib; bool check_vm; bool debug_all_bos; bool reserve_vmid; - bool zero_all_vram_allocs; /* List of all allocated buffers */ simple_mtx_t global_bo_list_lock; struct list_head global_bo_list; unsigned num_buffers; /* For returning the same amdgpu_winsys_bo instance for exported * and re-imported buffers. */ struct util_hash_table *bo_export_table; simple_mtx_t bo_export_table_lock; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev