On Sat, Jan 7, 2012 at 8:08 PM, Marek Olšák <mar...@gmail.com> wrote: > On Fri, Jan 6, 2012 at 4:42 PM, <j.gli...@gmail.com> wrote: >> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >> b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >> index ccf9c4f..8ef0c18 100644 >> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >> @@ -30,6 +30,7 @@ >> #include "util/u_hash_table.h" >> #include "util/u_memory.h" >> #include "util/u_simple_list.h" >> +#include "util/u_double_list.h" >> #include "os/os_thread.h" >> #include "os/os_mman.h" >> >> @@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct >> pb_buffer *bo) >> return (struct radeon_bo *)bo; >> } >> >> +struct radeon_bo_va_hole { >> + struct list_head list; >> + uint64_t offset; >> + uint64_t size; >> +}; >> + >> struct radeon_bomgr { >> /* Base class. */ >> struct pb_manager base; >> @@ -77,6 +84,11 @@ struct radeon_bomgr { >> /* List of buffer handles and its mutex. */ >> struct util_hash_table *bo_handles; >> pipe_mutex bo_handles_mutex; >> + >> + /* is virtual address supported */ >> + bool va; >> + unsigned va_offset; >> + struct list_head va_holes; >> }; >> >> static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr) >> @@ -151,9 +163,85 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf, >> } >> } >> >> +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t >> size) >> +{ >> + struct radeon_bo_va_hole *hole, *n; >> + uint64_t offset = 0; >> + >> + pipe_mutex_lock(mgr->bo_handles_mutex); > > radeon_bomgr::bo_handles_mutex should only guard accesses to > radeon_bomgr::bo_handles. I don't see a reason to reuse it. Could you > please add another mutex for the va_* stuff? > >> + /* first look for a hole */ >> + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { >> + if (hole->size == size) { >> + offset = hole->offset; >> + list_del(&hole->list); >> + FREE(hole); >> + pipe_mutex_unlock(mgr->bo_handles_mutex); >> + return offset; >> + } >> + if (hole->size > size) { >> + offset = hole->offset; >> + hole->size -= size; >> + hole->offset += size; >> + pipe_mutex_unlock(mgr->bo_handles_mutex); >> + return offset; >> + } >> + } >> + >> + offset = mgr->va_offset; >> + mgr->va_offset += size; >> + pipe_mutex_unlock(mgr->bo_handles_mutex); >> + return offset; >> +} >> + >> +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, >> uint64_t size) >> +{ >> + pipe_mutex_lock(mgr->bo_handles_mutex); >> + if (va >= mgr->va_offset) { >> + mgr->va_offset = va + size; >> + } else { >> + struct radeon_bo_va_hole *hole, *n; >> + uint64_t stmp, etmp; >> + >> + /* free all hole that fall into the range >> + * NOTE that we might loose virtual address space >> + */ >> + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { >> + stmp = hole->offset; >> + etmp = stmp + hole->size; >> + if (va >= stmp && va < etmp) { >> + list_del(&hole->list); >> + FREE(hole); >> + } >> + } >> + } >> + pipe_mutex_unlock(mgr->bo_handles_mutex); >> +} >> + >> +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, >> uint64_t size) >> +{ >> + pipe_mutex_lock(mgr->bo_handles_mutex); >> + if ((va + size) == mgr->va_offset) { >> + mgr->va_offset = va; >> + } else { >> + struct radeon_bo_va_hole *hole; >> + >> + /* FIXME on allocation failure we just loose virtual address space >> + * maybe print a warning >> + */ >> + hole = CALLOC_STRUCT(radeon_bo_va_hole); >> + if (hole) { >> + hole->size = size; >> + hole->offset = va; >> + list_add(&hole->list, &mgr->va_holes); >> + } >> + } >> + pipe_mutex_unlock(mgr->bo_handles_mutex); >> +} >> + >> static void radeon_bo_destroy(struct pb_buffer *_buf) >> { >> struct radeon_bo *bo = radeon_bo(_buf); >> + struct radeon_bomgr *mgr = bo->mgr; >> struct drm_gem_close args; >> >> memset(&args, 0, sizeof(args)); >> @@ -168,6 +256,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) >> if (bo->ptr) >> os_munmap(bo->ptr, bo->base.size); >> >> + if (mgr->va) { >> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); >> + } >> + >> /* Close object. */ >> args.handle = bo->handle; >> drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args); >> @@ -343,6 +435,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct >> pb_manager *_mgr, >> struct radeon_bo *bo; >> struct drm_radeon_gem_create args; >> struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc; >> + int r; >> >> memset(&args, 0, sizeof(args)); >> >> @@ -378,8 +471,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct >> pb_manager *_mgr, >> bo->rws = mgr->rws; >> bo->handle = args.handle; >> bo->reloc_domains = rdesc->reloc_domains; >> + bo->va = 0; >> pipe_mutex_init(bo->map_mutex); >> >> + if (mgr->va) { >> + struct drm_radeon_gem_va va; >> + >> + bo->va_size = ((size + 4095) & ~4095); > > Please use the "align" function from u_math.h. > >> + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); >> + >> + va.handle = bo->handle; >> + va.vm_id = 0; >> + va.operation = RADEON_VA_MAP; >> + va.flags = RADEON_VM_PAGE_READABLE | >> + RADEON_VM_PAGE_WRITEABLE | >> + RADEON_VM_PAGE_SNOOPED; >> + va.offset = bo->va; >> + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, >> sizeof(va)); >> + if (r && va.operation == RADEON_VA_RESULT_ERROR) { >> + fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); >> + fprintf(stderr, "radeon: size : %d bytes\n", size); >> + fprintf(stderr, "radeon: alignment : %d bytes\n", >> desc->alignment); >> + fprintf(stderr, "radeon: domains : %d\n", >> args.initial_domain); >> + radeon_bo_destroy(&bo->base); >> + return NULL; >> + } >> + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { >> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); >> + bo->va = va.offset; >> + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); >> + } >> + } >> + >> return &bo->base; >> } >> >> @@ -441,6 +564,11 @@ struct pb_manager *radeon_bomgr_create(struct >> radeon_drm_winsys *rws) >> mgr->rws = rws; >> mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare); >> pipe_mutex_init(mgr->bo_handles_mutex); >> + >> + mgr->va = rws->info.r600_va; >> + mgr->va_offset = rws->info.r600_va_start; >> + list_inithead(&mgr->va_holes); >> + >> return &mgr->base; >> } >> >> @@ -584,6 +712,7 @@ static struct pb_buffer >> *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, >> struct radeon_bo *bo; >> struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); >> struct drm_gem_open open_arg = {}; >> + int r; >> >> memset(&open_arg, 0, sizeof(open_arg)); >> >> @@ -628,6 +757,7 @@ static struct pb_buffer >> *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, >> bo->base.vtbl = &radeon_bo_vtbl; >> bo->mgr = mgr; >> bo->rws = mgr->rws; >> + bo->va = 0; >> pipe_mutex_init(bo->map_mutex); >> >> util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, >> bo); >> @@ -638,6 +768,33 @@ done: >> if (stride) >> *stride = whandle->stride; >> >> + if (mgr->va) { >> + struct drm_radeon_gem_va va; >> + >> + bo->va_size = ((bo->base.size + 4095) & ~4095); >> + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); >> + >> + va.handle = bo->handle; >> + va.operation = RADEON_VA_MAP; >> + va.vm_id = 0; >> + va.offset = bo->va; >> + va.flags = RADEON_VM_PAGE_READABLE | >> + RADEON_VM_PAGE_WRITEABLE | >> + RADEON_VM_PAGE_SNOOPED; >> + va.offset = bo->va; >> + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); >> + if (r && va.operation == RADEON_VA_RESULT_ERROR) { >> + fprintf(stderr, "radeon: Failed to open a buffer:\n"); >> + radeon_bo_destroy(&bo->base); >> + return NULL; >> + } >> + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { >> + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); >> + bo->va = va.offset; >> + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); >> + } >> + } >> + >> return (struct pb_buffer*)bo; >> >> fail: >> @@ -674,6 +831,13 @@ static boolean radeon_winsys_bo_get_handle(struct >> pb_buffer *buffer, >> return TRUE; >> } >> >> +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer) >> +{ >> + struct radeon_bo *bo = get_radeon_bo(buffer); >> + >> + return bo->va; >> +} >> + >> void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) >> { >> ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; >> @@ -686,4 +850,5 @@ void radeon_bomgr_init_functions(struct >> radeon_drm_winsys *ws) >> ws->base.buffer_create = radeon_winsys_bo_create; >> ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; >> ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; >> + ws->base.buffer_va = radeon_winsys_bo_va; >> } >> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h >> b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h >> index ba71cfb..0fc00ae 100644 >> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h >> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h >> @@ -61,6 +61,8 @@ struct radeon_bo { >> uint32_t reloc_domains; >> uint32_t handle; >> uint32_t name; >> + uint64_t va; >> + uint64_t va_size; >> >> /* how many command streams is this bo referenced in? */ >> int num_cs_references; >> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >> b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >> index 8d5a6b3..a745937 100644 >> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >> @@ -73,9 +73,10 @@ >> >> #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) >> >> -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd) >> +static boolean radeon_init_cs_context(struct radeon_cs_context *csc, >> + struct radeon_drm_winsys *ws) >> { >> - csc->fd = fd; >> + csc->fd = ws->fd; >> csc->nrelocs = 512; >> csc->relocs_bo = (struct radeon_bo**) >> CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); >> @@ -90,17 +91,34 @@ static boolean radeon_init_cs_context(struct >> radeon_cs_context *csc, int fd) >> return FALSE; >> } >> >> + csc->cs_flags = (uint32_t*) >> + CALLOC(1, 3 * sizeof(uint32_t)); > > I don't see a reason to allocate cs_flags. Wouldn't it be simpler to > just declare it as an array? > > Also please rebase. winsys/radeon already uses RADEON_CHUNK_ID_FLAGS > and it's incompatible with the way you use it. When I added that > chunk, I made it such that it had only one uint32_t and new flags had > to be or'd. It's already in kernel 3.2. > >> + if (!csc->cs_flags) { >> + FREE(csc->relocs_bo); >> + FREE(csc->relocs); >> + return FALSE; >> + } >> + >> csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; >> csc->chunks[0].length_dw = 0; >> csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; >> csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; >> csc->chunks[1].length_dw = 0; >> csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; >> + csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; >> + csc->chunks[2].length_dw = 3; >> + csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)csc->cs_flags; >> + csc->cs_flags[0] = 0; >> + csc->cs_flags[1] = RADEON_CS_RING_GFX; >> + csc->cs_flags[2] = 0; >> + if (ws->info.r600_va) >> + csc->cs_flags[0] |= RADEON_CS_USE_VM; >> >> csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; >> csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; >> + csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; >> >> - csc->cs.num_chunks = 2; >> + csc->cs.num_chunks = 3; > > This could be 2 for DRM 2.11.0 and older. Please see also Mesa master. > We change num_chunks in radeon_drm_cs_flush. > >> csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; >> return TRUE; >> } >> @@ -128,6 +146,7 @@ static void radeon_destroy_cs_context(struct >> radeon_cs_context *csc) >> radeon_cs_context_cleanup(csc); >> FREE(csc->relocs_bo); >> FREE(csc->relocs); >> + FREE(csc->cs_flags); >> } >> >> DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) >> @@ -147,11 +166,11 @@ static struct radeon_winsys_cs >> *radeon_drm_cs_create(struct radeon_winsys *rws) >> >> cs->ws = ws; >> >> - if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) { >> + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { >> FREE(cs); >> return NULL; >> } >> - if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) { >> + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { >> radeon_destroy_cs_context(&cs->csc1); >> FREE(cs); >> return NULL; >> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h >> b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h >> index f316b5e..26e0c76 100644 >> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h >> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h >> @@ -35,8 +35,8 @@ struct radeon_cs_context { >> >> int fd; >> struct drm_radeon_cs cs; >> - struct drm_radeon_cs_chunk chunks[2]; >> - uint64_t chunk_array[2]; >> + struct drm_radeon_cs_chunk chunks[3]; >> + uint64_t chunk_array[3]; >> >> /* Relocs. */ >> unsigned nrelocs; >> @@ -44,6 +44,7 @@ struct radeon_cs_context { >> unsigned validated_crelocs; >> struct radeon_bo **relocs_bo; >> struct drm_radeon_cs_reloc *relocs; >> + uint32_t *cs_flags; >> >> /* 0 = BO not added, 1 = BO added */ >> char is_handle_added[256]; >> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c >> b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c >> index 442bd2a..8124016 100644 >> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c >> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c >> @@ -138,9 +138,11 @@ static boolean radeon_get_drm_value(int fd, unsigned >> request, >> info.request = request; >> >> retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); >> - if (retval && errname) { >> - fprintf(stderr, "radeon: Failed to get %s, error number %d\n", >> - errname, retval); >> + if (retval) { >> + if (errname) { >> + fprintf(stderr, "radeon: Failed to get %s, error number %d\n", >> + errname, retval); >> + } >> return FALSE; >> } >> return TRUE; >> @@ -263,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys >> *ws) >> &ws->info.r600_backend_map)) >> ws->info.r600_backend_map_valid = TRUE; >> } >> + ws->info.r600_va = FALSE; >> + if (ws->info.drm_minor >= 12) { > > Kernel 3.2 already reports 2.12.0. Did you mean 13? > >> + ws->info.r600_va = TRUE; >> + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, >> + &ws->info.r600_va_start)) >> + ws->info.r600_va = FALSE; >> + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, >> NULL, >> + &ws->info.r600_ib_vm_max_size)) >> + ws->info.r600_va = FALSE; >> + } >> } >> >> return TRUE; >> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h >> b/src/gallium/winsys/radeon/drm/radeon_winsys.h >> index c4ea655..69c42c2 100644 >> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h >> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h >> @@ -96,6 +96,9 @@ struct radeon_info { >> uint32_t r600_num_tile_pipes; >> uint32_t r600_backend_map; >> boolean r600_backend_map_valid; >> + boolean r600_va; > > Could you please rename r600_va to r600_virtual_address_space. Let's > make the code more approachable to potential new contributors by using > whole words. Plus, you don't have to document it if you give it the > right name. > >> + uint32_t r600_va_start; >> + uint32_t r600_ib_vm_max_size; >> }; >> >> enum radeon_feature_id { >> @@ -242,6 +245,14 @@ struct radeon_winsys { >> unsigned stride, >> struct winsys_handle *whandle); >> >> + /** >> + * Return the virtual address of a buffer. >> + * >> + * \param buf A winsys buffer object >> + * \return virtual address >> + */ >> + uint64_t (*buffer_va)(struct pb_buffer *buf); > > Please rename this function to "buffer_get_virtual_address". > > Marek
I should have mention that this patch is several month old and at that time the kernel interface was much into flux. I haven't had time to respin against lastest mesa and lastest kernel, will do next week once i sorted out more pressing issues. Thanks for the review. Cheers, Jerome _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev