From: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 42 +++++++++++++++++++---- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 28 ++++++++++++++- src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 ++ 3 files changed, 64 insertions(+), 8 deletions(-)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index bbfe5cc..06842a4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -242,32 +242,54 @@ static uint64_t radeon_bomgr_find_va(const struct radeon_info *info, if ((hole->size - waste) == size) { hole->size = waste; mtx_unlock(&heap->mutex); return offset; } } offset = heap->start; waste = offset % alignment; waste = waste ? alignment - waste : 0; + + if (offset + waste + size > heap->end) { + mtx_unlock(&heap->mutex); + return 0; + } + if (waste) { n = CALLOC_STRUCT(radeon_bo_va_hole); n->size = waste; n->offset = offset; list_add(&n->list, &heap->holes); } offset += waste; heap->start += size + waste; mtx_unlock(&heap->mutex); return offset; } +static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws, + uint64_t size, uint64_t alignment) +{ + uint64_t va = 0; + + /* Try to allocate from the 64-bit address space first. + * If it doesn't exist (start = 0) or if it doesn't have enough space, + * fall back to the 32-bit address space. + */ + if (ws->vm64.start) + va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment); + if (!va) + va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment); + return va; +} + static void radeon_bomgr_free_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t va, uint64_t size) { struct radeon_bo_va_hole *hole = NULL; size = align(size, info->gart_page_size); mtx_lock(&heap->mutex); if ((va + size) == heap->start) { @@ -363,21 +385,23 @@ void radeon_bo_destroy(struct pb_buffer *_buf) if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)) != 0 && va.operation == RADEON_VA_RESULT_ERROR) { fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n"); fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size); fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va); } } - radeon_bomgr_free_va(&rws->info, &rws->vm64, bo->va, bo->base.size); + radeon_bomgr_free_va(&rws->info, + bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64, + bo->va, bo->base.size); } /* Close object. */ args.handle = bo->handle; drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args); mtx_destroy(&bo->u.real.map_mutex); if (bo->initial_domain & RADEON_DOMAIN_VRAM) rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size); @@ -653,22 +677,28 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, if (heap >= 0) { pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base, heap); } if (rws->info.has_virtual_memory) { struct drm_radeon_gem_va va; unsigned va_gap_size; va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; - bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm64, - size + va_gap_size, alignment); + + if (flags & RADEON_FLAG_32BIT) { + bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32, + size + va_gap_size, alignment); + assert(bo->va + size < rws->vm32.end); + } else { + bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment); + } va.handle = bo->handle; va.vm_id = 0; va.operation = RADEON_VA_MAP; va.flags = RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE | RADEON_VM_PAGE_SNOOPED; va.offset = bo->va; r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); if (r && va.operation == RADEON_VA_RESULT_ERROR) { @@ -1055,22 +1085,21 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); mtx_unlock(&ws->bo_handles_mutex); if (ws->info.has_virtual_memory) { struct drm_radeon_gem_va va; - bo->va = radeon_bomgr_find_va(&ws->info, &ws->vm64, - bo->base.size, 1 << 20); + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20); va.handle = bo->handle; va.operation = RADEON_VA_MAP; va.vm_id = 0; va.offset = bo->va; va.flags = RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE | RADEON_VM_PAGE_SNOOPED; va.offset = bo->va; r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); @@ -1199,22 +1228,21 @@ done: mtx_unlock(&ws->bo_handles_mutex); if (stride) *stride = whandle->stride; if (offset) *offset = whandle->offset; if (ws->info.has_virtual_memory && !bo->va) { struct drm_radeon_gem_va va; - bo->va = radeon_bomgr_find_va(&ws->info, &ws->vm64, - bo->base.size, 1 << 20); + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20); va.handle = bo->handle; va.operation = RADEON_VA_MAP; va.vm_id = 0; va.offset = bo->va; va.flags = RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE | RADEON_VM_PAGE_SNOOPED; va.offset = bo->va; r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 4fe36dc..85a186a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -356,20 +356,22 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) if (ws->info.drm_minor < 49) ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); /* Radeon allocates all buffers as contigous, which makes large allocations * unlikely to succeed. */ ws->info.max_alloc_size = MAX2(ws->info.vram_size, ws->info.gart_size) * 0.7; if (ws->info.has_dedicated_vram) ws->info.max_alloc_size = MIN2(ws->info.vram_size * 0.7, ws->info.max_alloc_size); if (ws->info.drm_minor < 40) ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); + /* Both 32-bit and 64-bit address spaces only have 4GB. */ + ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); /* Get max clock frequency info and convert it to MHz */ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, &ws->info.max_shader_clock); ws->info.max_shader_clock /= 1000; ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); /* Generation-specific queries. */ if (ws->gen == DRV_R300) { @@ -546,20 +548,21 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) pb_cache_deinit(&ws->bo_cache); if (ws->gen >= DRV_R600) { radeon_surface_manager_free(ws->surf_man); } util_hash_table_destroy(ws->bo_names); util_hash_table_destroy(ws->bo_handles); util_hash_table_destroy(ws->bo_vas); mtx_destroy(&ws->bo_handles_mutex); + mtx_destroy(&ws->vm32.mutex); mtx_destroy(&ws->vm64.mutex); mtx_destroy(&ws->bo_fence_lock); if (ws->fd >= 0) close(ws->fd); FREE(rws); } static void radeon_query_info(struct radeon_winsys *rws, @@ -809,25 +812,48 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, radeon_drm_cs_init_functions(ws); radeon_surface_init_functions(ws); (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain); (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain); ws->bo_names = util_hash_table_create(handle_hash, handle_compare); ws->bo_handles = util_hash_table_create(handle_hash, handle_compare); ws->bo_vas = util_hash_table_create(handle_hash, handle_compare); (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); + (void) mtx_init(&ws->vm32.mutex, mtx_plain); (void) mtx_init(&ws->vm64.mutex, mtx_plain); (void) mtx_init(&ws->bo_fence_lock, mtx_plain); + list_inithead(&ws->vm32.holes); list_inithead(&ws->vm64.holes); - ws->vm64.start = ws->va_start; + /* The kernel currently returns 8MB. Make sure this doesn't change. */ + if (ws->va_start > 8 * 1024 * 1024) { + /* Not enough 32-bit address space. */ + radeon_winsys_destroy(&ws->base); + mtx_unlock(&fd_tab_mutex); + return NULL; + } + + ws->vm32.start = ws->va_start; + ws->vm32.end = 1ull << 32; + + /* The maximum is 8GB of virtual address space limited by the kernel. + * It's obviously not enough for bigger cards, like Hawaiis with 4GB + * and 8GB of physical memory and 4GB of GART. + * + * Older kernels set the limit to 4GB, which is even worse, so they only + * have 32-bit address space. + */ + if (ws->info.drm_minor >= 41) { + ws->vm64.start = 1ull << 32; + ws->vm64.end = 1ull << 33; + } /* TTM aligns the BO size to the CPU page size */ ws->info.gart_page_size = sysconf(_SC_PAGESIZE); if (ws->num_cpus > 1 && debug_get_option_thread()) util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0); /* Create the screen at the end. The winsys must be initialized * completely. * diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index c65f5cb..03d96ea 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -41,20 +41,21 @@ enum radeon_generation { DRV_R600, DRV_SI }; #define RADEON_SLAB_MIN_SIZE_LOG2 9 #define RADEON_SLAB_MAX_SIZE_LOG2 14 struct radeon_vm_heap { mtx_t mutex; uint64_t start; + uint64_t end; struct list_head holes; }; struct radeon_drm_winsys { struct radeon_winsys base; struct pipe_reference reference; struct pb_cache bo_cache; struct pb_slabs bo_slabs; int fd; /* DRM file descriptor */ @@ -77,20 +78,21 @@ struct radeon_drm_winsys { /* List of buffer GEM names. Protected by bo_handles_mutex. */ struct util_hash_table *bo_names; /* List of buffer handles. Protectded by bo_handles_mutex. */ struct util_hash_table *bo_handles; /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */ struct util_hash_table *bo_vas; mtx_t bo_handles_mutex; mtx_t bo_fence_lock; + struct radeon_vm_heap vm32; struct radeon_vm_heap vm64; bool check_vm; struct radeon_surface_manager *surf_man; uint32_t num_cpus; /* Number of CPUs. */ struct radeon_drm_cs *hyperz_owner; mtx_t hyperz_owner_mutex; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev