From: Marek Olšák <marek.ol...@amd.com> https://lists.freedesktop.org/archives/amd-gfx/2017-June/010591.html --- src/gallium/drivers/radeon/r600_buffer_common.c | 4 ---- src/gallium/drivers/radeon/radeon_winsys.h | 7 +++---- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 9 +-------- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 3 +-- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 8 +------- 5 files changed, 6 insertions(+), 25 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 342695c..262fe1d 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -125,21 +125,20 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, case PIPE_USAGE_DYNAMIC: /* Older kernels didn't always flush the HDP cache before * CS execution */ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 40) { res->domains = RADEON_DOMAIN_GTT; res->flags |= RADEON_FLAG_GTT_WC; break; } - res->flags |= RADEON_FLAG_CPU_ACCESS; /* fall through */ case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: default: /* Not listing GTT here improves performance in some * apps. */ res->domains = RADEON_DOMAIN_VRAM; res->flags |= RADEON_FLAG_GTT_WC; break; } @@ -151,29 +150,26 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, * kernels, because they didn't always flush the HDP * cache before CS execution. * * Write-combined CPU mappings are fine, the kernel * ensures all CPU writes finish before the GPU * executes a command stream. */ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 40) res->domains = RADEON_DOMAIN_GTT; - else if (res->domains & RADEON_DOMAIN_VRAM) - res->flags |= RADEON_FLAG_CPU_ACCESS; } /* Tiled textures are unmappable. Always put them in VRAM. */ if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) || res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) { res->domains = RADEON_DOMAIN_VRAM; - res->flags &= ~RADEON_FLAG_CPU_ACCESS; res->flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC; } /* If VRAM is just stolen system memory, allow both VRAM and * GTT, whichever has free space. If a buffer is evicted from * VRAM to GTT, it will stay there. * * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only * placements even with a low amount of stolen VRAM. diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 706188f..1be94f7 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -44,24 +44,23 @@ enum radeon_bo_layout { }; enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_GTT = 2, RADEON_DOMAIN_VRAM = 4, RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT }; enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_GTT_WC = (1 << 0), - RADEON_FLAG_CPU_ACCESS = (1 << 1), - RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), - RADEON_FLAG_NO_SUBALLOC = (1 << 3), - RADEON_FLAG_SPARSE = (1 << 4), + RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), + RADEON_FLAG_NO_SUBALLOC = (1 << 2), + RADEON_FLAG_SPARSE = (1 << 3), }; enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_READ = 2, RADEON_USAGE_WRITE = 4, RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, /* The winsys ensures that the CS submission will be scheduled after * previously flushed CSs referencing this BO in a conflicting way. */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 5119d3f..9736f44a 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -391,22 +391,20 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base, pb_cache_bucket); request.alloc_size = size; request.phys_alignment = alignment; if (initial_domain & RADEON_DOMAIN_VRAM) request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; - if (flags & RADEON_FLAG_CPU_ACCESS) - request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; if (flags & RADEON_FLAG_NO_CPU_ACCESS) request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); if (r) { fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); @@ -499,22 +497,20 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); enum radeon_bo_domain domains; enum radeon_bo_flag flags = 0; uint32_t base_id; if (!slab) return NULL; if (heap & 1) flags |= RADEON_FLAG_GTT_WC; - if (heap & 2) - flags |= RADEON_FLAG_CPU_ACCESS; switch (heap >> 2) { case 0: domains = RADEON_DOMAIN_VRAM; break; default: case 1: domains = RADEON_DOMAIN_VRAM_GTT; break; case 2: @@ -1157,23 +1153,21 @@ amdgpu_bo_create(struct radeon_winsys *rws, /* Sub-allocate small buffers from slabs. */ if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) && size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) && alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { struct pb_slab_entry *entry; unsigned heap = 0; if (flags & RADEON_FLAG_GTT_WC) heap |= 1; - if (flags & RADEON_FLAG_CPU_ACCESS) - heap |= 2; - if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS)) + if (flags & ~RADEON_FLAG_GTT_WC) goto no_slab; switch (domain) { case RADEON_DOMAIN_VRAM: heap |= 0 * 4; break; case RADEON_DOMAIN_VRAM_GTT: heap |= 1 * 4; break; case RADEON_DOMAIN_GTT: @@ -1197,21 +1191,20 @@ amdgpu_bo_create(struct radeon_winsys *rws, bo = container_of(entry, bo, u.slab.entry); pipe_reference_init(&bo->base.reference, 1); return &bo->base; } no_slab: if (flags & RADEON_FLAG_SPARSE) { assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0); - assert(!(flags & RADEON_FLAG_CPU_ACCESS)); flags |= RADEON_FLAG_NO_CPU_ACCESS; return amdgpu_bo_sparse_create(ws, size, domain, flags); } /* This flag is irrelevant for the cache. */ flags &= ~RADEON_FLAG_NO_SUBALLOC; /* Align size to page size. This is the minimum alignment for normal diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 1b3ca65..a1fb045 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -574,22 +574,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) break; case IB_MAIN: buffer_size = MAX2(buffer_size, 8 * 1024 * 4); break; default: unreachable("unhandled IB type"); } pb = ws->base.buffer_create(&ws->base, buffer_size, ws->info.gart_page_size, - RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS); + RADEON_DOMAIN_GTT, 0); if (!pb) return false; mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE); if (!mapped) { pb_reference(&pb, NULL); return false; } pb_reference(&ib->big_ib_buffer, pb); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 274d576..81a59e5 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -604,22 +604,20 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, assert((initial_domains & ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0); args.size = size; args.alignment = alignment; args.initial_domain = initial_domains; args.flags = 0; if (flags & RADEON_FLAG_GTT_WC) args.flags |= RADEON_GEM_GTT_WC; - if (flags & RADEON_FLAG_CPU_ACCESS) - args.flags |= RADEON_GEM_CPU_ACCESS; if (flags & RADEON_FLAG_NO_CPU_ACCESS) args.flags |= RADEON_GEM_NO_CPU_ACCESS; if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE, &args, sizeof(args))) { fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); fprintf(stderr, "radeon: size : %u bytes\n", size); fprintf(stderr, "radeon: alignment : %u bytes\n", alignment); fprintf(stderr, "radeon: domains : %u\n", args.initial_domain); fprintf(stderr, "radeon: flags : %u\n", args.flags); @@ -733,22 +731,20 @@ struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap, struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab); enum radeon_bo_domain domains; enum radeon_bo_flag flags = 0; unsigned base_hash; if (!slab) return NULL; if (heap & 1) flags |= RADEON_FLAG_GTT_WC; - if (heap & 2) - flags |= RADEON_FLAG_CPU_ACCESS; switch (heap >> 2) { case 0: domains = RADEON_DOMAIN_VRAM; break; default: case 1: domains = RADEON_DOMAIN_VRAM_GTT; break; case 2: @@ -945,23 +941,21 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, /* Sub-allocate small buffers from slabs. */ if (!(flags & RADEON_FLAG_NO_SUBALLOC) && size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) && ws->info.has_virtual_memory && alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { struct pb_slab_entry *entry; unsigned heap = 0; if (flags & RADEON_FLAG_GTT_WC) heap |= 1; - if (flags & RADEON_FLAG_CPU_ACCESS) - heap |= 2; - if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS)) + if (flags & ~RADEON_FLAG_GTT_WC) goto no_slab; switch (domain) { case RADEON_DOMAIN_VRAM: heap |= 0 * 4; break; case RADEON_DOMAIN_VRAM_GTT: heap |= 1 * 4; break; case RADEON_DOMAIN_GTT: -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev