From: Marek Olšák <marek.ol...@amd.com>
This includes reused buffers. Prefer DCC clear if DCC is enabled.
---
src/gallium/drivers/radeonsi/si_buffer.c | 8 ++++++++
src/gallium/drivers/radeonsi/si_pipe.c | 3 ++-
src/gallium/drivers/radeonsi/si_texture.c | 5 ++++-
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 3 ---
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 --
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 -
6 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c
b/src/gallium/drivers/radeonsi/si_buffer.c
index a03a944..f31767c 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -235,20 +235,28 @@ bool si_alloc_resource(struct si_screen *sscreen,
util_range_set_empty(&res->valid_buffer_range);
res->TC_L2_dirty = false;
/* Print debug information. */
if (sscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer
%"PRIu64" bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
+
+ /* Only clear if DCC is disabled, because DCC clears are more
efficient. */
+ if (sscreen->debug_flags & DBG_ZERO_VRAM &&
+ res->domains & RADEON_DOMAIN_VRAM &&
+ (res->b.b.target == PIPE_BUFFER ||
+ !((struct si_texture*)res)->dcc_offset))
+ si_screen_clear_buffer(sscreen, &res->b.b, 0, res->bo_size, 0);
+
return true;
}
static void si_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r600_resource *rbuffer = r600_resource(buf);
threaded_resource_deinit(buf);
util_range_destroy(&rbuffer->valid_buffer_range);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
b/src/gallium/drivers/radeonsi/si_pipe.c
index 9c8ed36..6e14066 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -850,21 +850,22 @@ struct pipe_screen *radeonsi_screen_create(struct
radeon_winsys *ws,
si_init_screen_query_functions(sscreen);
/* Set these flags in debug_flags early, so that the shader cache takes
* them into account.
*/
if (driQueryOptionb(config->options,
"glsl_correct_derivatives_after_discard"))
sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
sscreen->debug_flags |= DBG(SI_SCHED);
-
+ if (driQueryOptionb(config->options, "radeonsi_zerovram"))
+ sscreen->debug_flags |= DBG(ZERO_VRAM);
if (sscreen->debug_flags & DBG(INFO))
ac_print_gpu_info(&sscreen->info);
slab_create_parent(&sscreen->pool_transfers,
sizeof(struct si_transfer), 64);
sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
if (sscreen->force_aniso >= 0) {
printf("radeonsi: Forcing anisotropy filter to %ix\n",
diff --git a/src/gallium/drivers/radeonsi/si_texture.c
b/src/gallium/drivers/radeonsi/si_texture.c
index e55fd81..2faeef2 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -1250,24 +1250,27 @@ si_texture_create_object(struct pipe_screen *screen,
clear_value = 0x0000030F;
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
tex->htile_offset,
tex->surface.htile_size,
clear_value);
}
/* Initialize DCC only if the texture is not being imported. */
if (!buf && tex->dcc_offset) {
+ unsigned clear_value =
+ sscreen->debug_flags & DBG_ZERO_VRAM ? 0 : 0xFFFFFFFF;
+
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
tex->dcc_offset,
tex->surface.dcc_size,
- 0xFFFFFFFF);
+ clear_value);
}
/* Initialize the CMASK base register value. */
tex->cmask_base_address_reg =
(tex->buffer.gpu_address + tex->cmask_offset) >> 8;
if (sscreen->debug_flags & DBG(VM)) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture
%ix%ix%i, %i levels, %i samples, %s\n",
tex->buffer.gpu_address,
tex->buffer.gpu_address + tex->buffer.buf->size,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 68f0562..2842936 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -427,23 +427,20 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct
amdgpu_winsys *ws,
if (!ws->info.has_dedicated_vram)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
ws->info.has_local_buffers)
request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
- if (ws->zero_all_vram_allocs &&
- (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
- request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
goto error_bo_alloc;
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index dcbc075..7908717 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -59,22 +59,20 @@ static bool do_winsys_init(struct amdgpu_winsys *ws,
ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo, &ws->info.max_alignment);
if (!ws->addrlib) {
fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
goto fail;
}
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL;
ws->debug_all_bos = debug_get_option_all_bos();
ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""),
"reserve_vmid") != NULL;
- ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""),
"zerovram") != NULL ||
- driQueryOptionb(config->options, "radeonsi_zerovram");
return true;
fail:
amdgpu_device_deinitialize(ws->dev);
ws->dev = NULL;
return false;
}
static void do_winsys_deinit(struct amdgpu_winsys *ws)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index c355eff..1682196 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -72,21 +72,20 @@ struct amdgpu_winsys {
/* multithreaded IB submission */
struct util_queue cs_queue;
struct amdgpu_gpu_info amdinfo;
ADDR_HANDLE addrlib;
bool check_vm;
bool debug_all_bos;
bool reserve_vmid;
- bool zero_all_vram_allocs;
/* List of all allocated buffers */
simple_mtx_t global_bo_list_lock;
struct list_head global_bo_list;
unsigned num_buffers;
/* For returning the same amdgpu_winsys_bo instance for exported
* and re-imported buffers. */
struct util_hash_table *bo_export_table;
simple_mtx_t bo_export_table_lock;