On Fri, Jan 6, 2012 at 4:42 PM, <j.gli...@gmail.com> wrote: > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c > b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c > index ccf9c4f..8ef0c18 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c > @@ -30,6 +30,7 @@ > #include "util/u_hash_table.h" > #include "util/u_memory.h" > #include "util/u_simple_list.h" > +#include "util/u_double_list.h" > #include "os/os_thread.h" > #include "os/os_mman.h" > > @@ -67,6 +68,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer > *bo) > return (struct radeon_bo *)bo; > } > > +struct radeon_bo_va_hole { > + struct list_head list; > + uint64_t offset; > + uint64_t size; > +}; > + > struct radeon_bomgr { > /* Base class. */ > struct pb_manager base; > @@ -77,6 +84,11 @@ struct radeon_bomgr { > /* List of buffer handles and its mutex. */ > struct util_hash_table *bo_handles; > pipe_mutex bo_handles_mutex; > + > + /* is virtual address supported */ > + bool va; > + unsigned va_offset; > + struct list_head va_holes; > }; > > static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr) > @@ -151,9 +163,85 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf, > } > } > > +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size) > +{ > + struct radeon_bo_va_hole *hole, *n; > + uint64_t offset = 0; > + > + pipe_mutex_lock(mgr->bo_handles_mutex);
radeon_bomgr::bo_handles_mutex should only guard accesses to radeon_bomgr::bo_handles. I don't see a reason to reuse it. Could you please add another mutex for the va_* stuff? > + /* first look for a hole */ > + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { > + if (hole->size == size) { > + offset = hole->offset; > + list_del(&hole->list); > + FREE(hole); > + pipe_mutex_unlock(mgr->bo_handles_mutex); > + return offset; > + } > + if (hole->size > size) { > + offset = hole->offset; > + hole->size -= size; > + hole->offset += size; > + pipe_mutex_unlock(mgr->bo_handles_mutex); > + return offset; > + } > + } > + > + offset = mgr->va_offset; > + mgr->va_offset += size; > + pipe_mutex_unlock(mgr->bo_handles_mutex); > + return offset; > +} > + > +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, > uint64_t size) > +{ > + pipe_mutex_lock(mgr->bo_handles_mutex); > + if (va >= mgr->va_offset) { > + mgr->va_offset = va + size; > + } else { > + struct radeon_bo_va_hole *hole, *n; > + uint64_t stmp, etmp; > + > + /* free all hole that fall into the range > + * NOTE that we might loose virtual address space > + */ > + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { > + stmp = hole->offset; > + etmp = stmp + hole->size; > + if (va >= stmp && va < etmp) { > + list_del(&hole->list); > + FREE(hole); > + } > + } > + } > + pipe_mutex_unlock(mgr->bo_handles_mutex); > +} > + > +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, > uint64_t size) > +{ > + pipe_mutex_lock(mgr->bo_handles_mutex); > + if ((va + size) == mgr->va_offset) { > + mgr->va_offset = va; > + } else { > + struct radeon_bo_va_hole *hole; > + > + /* FIXME on allocation failure we just loose virtual address space > + * maybe print a warning > + */ > + hole = CALLOC_STRUCT(radeon_bo_va_hole); > + if (hole) { > + hole->size = size; > + hole->offset = va; > + list_add(&hole->list, &mgr->va_holes); > + } > + } > + pipe_mutex_unlock(mgr->bo_handles_mutex); > +} > + > static void radeon_bo_destroy(struct pb_buffer *_buf) > { > struct radeon_bo *bo = radeon_bo(_buf); > + struct radeon_bomgr *mgr = bo->mgr; > struct drm_gem_close args; > > memset(&args, 0, sizeof(args)); > @@ -168,6 +256,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) > if (bo->ptr) > os_munmap(bo->ptr, bo->base.size); > > + if (mgr->va) { > + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); > + } > + > /* Close object. */ > args.handle = bo->handle; > drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args); > @@ -343,6 +435,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct > pb_manager *_mgr, > struct radeon_bo *bo; > struct drm_radeon_gem_create args; > struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc; > + int r; > > memset(&args, 0, sizeof(args)); > > @@ -378,8 +471,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct > pb_manager *_mgr, > bo->rws = mgr->rws; > bo->handle = args.handle; > bo->reloc_domains = rdesc->reloc_domains; > + bo->va = 0; > pipe_mutex_init(bo->map_mutex); > > + if (mgr->va) { > + struct drm_radeon_gem_va va; > + > + bo->va_size = ((size + 4095) & ~4095); Please use the "align" function from u_math.h. > + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); > + > + va.handle = bo->handle; > + va.vm_id = 0; > + va.operation = RADEON_VA_MAP; > + va.flags = RADEON_VM_PAGE_READABLE | > + RADEON_VM_PAGE_WRITEABLE | > + RADEON_VM_PAGE_SNOOPED; > + va.offset = bo->va; > + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); > + if (r && va.operation == RADEON_VA_RESULT_ERROR) { > + fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); > + fprintf(stderr, "radeon: size : %d bytes\n", size); > + fprintf(stderr, "radeon: alignment : %d bytes\n", > desc->alignment); > + fprintf(stderr, "radeon: domains : %d\n", > args.initial_domain); > + radeon_bo_destroy(&bo->base); > + return NULL; > + } > + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { > + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); > + bo->va = va.offset; > + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); > + } > + } > + > return &bo->base; > } > > @@ -441,6 +564,11 @@ struct pb_manager *radeon_bomgr_create(struct > radeon_drm_winsys *rws) > mgr->rws = rws; > mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare); > pipe_mutex_init(mgr->bo_handles_mutex); > + > + mgr->va = rws->info.r600_va; > + mgr->va_offset = rws->info.r600_va_start; > + list_inithead(&mgr->va_holes); > + > return &mgr->base; > } > > @@ -584,6 +712,7 @@ static struct pb_buffer > *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, > struct radeon_bo *bo; > struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); > struct drm_gem_open open_arg = {}; > + int r; > > memset(&open_arg, 0, sizeof(open_arg)); > > @@ -628,6 +757,7 @@ static struct pb_buffer > *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, > bo->base.vtbl = &radeon_bo_vtbl; > bo->mgr = mgr; > bo->rws = mgr->rws; > + bo->va = 0; > pipe_mutex_init(bo->map_mutex); > > util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, > bo); > @@ -638,6 +768,33 @@ done: > if (stride) > *stride = whandle->stride; > > + if (mgr->va) { > + struct drm_radeon_gem_va va; > + > + bo->va_size = ((bo->base.size + 4095) & ~4095); > + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); > + > + va.handle = bo->handle; > + va.operation = RADEON_VA_MAP; > + va.vm_id = 0; > + va.offset = bo->va; > + va.flags = RADEON_VM_PAGE_READABLE | > + RADEON_VM_PAGE_WRITEABLE | > + RADEON_VM_PAGE_SNOOPED; > + va.offset = bo->va; > + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); > + if (r && va.operation == RADEON_VA_RESULT_ERROR) { > + fprintf(stderr, "radeon: Failed to open a buffer:\n"); > + radeon_bo_destroy(&bo->base); > + return NULL; > + } > + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { > + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); > + bo->va = va.offset; > + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); > + } > + } > + > return (struct pb_buffer*)bo; > > fail: > @@ -674,6 +831,13 @@ static boolean radeon_winsys_bo_get_handle(struct > pb_buffer *buffer, > return TRUE; > } > > +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer) > +{ > + struct radeon_bo *bo = get_radeon_bo(buffer); > + > + return bo->va; > +} > + > void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) > { > ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; > @@ -686,4 +850,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys > *ws) > ws->base.buffer_create = radeon_winsys_bo_create; > ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; > ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; > + ws->base.buffer_va = radeon_winsys_bo_va; > } > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h > b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h > index ba71cfb..0fc00ae 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h > @@ -61,6 +61,8 @@ struct radeon_bo { > uint32_t reloc_domains; > uint32_t handle; > uint32_t name; > + uint64_t va; > + uint64_t va_size; > > /* how many command streams is this bo referenced in? */ > int num_cs_references; > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > index 8d5a6b3..a745937 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > @@ -73,9 +73,10 @@ > > #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) > > -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd) > +static boolean radeon_init_cs_context(struct radeon_cs_context *csc, > + struct radeon_drm_winsys *ws) > { > - csc->fd = fd; > + csc->fd = ws->fd; > csc->nrelocs = 512; > csc->relocs_bo = (struct radeon_bo**) > CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); > @@ -90,17 +91,34 @@ static boolean radeon_init_cs_context(struct > radeon_cs_context *csc, int fd) > return FALSE; > } > > + csc->cs_flags = (uint32_t*) > + CALLOC(1, 3 * sizeof(uint32_t)); I don't see a reason to allocate cs_flags. Wouldn't it be simpler to just declare it as an array? Also please rebase. winsys/radeon already uses RADEON_CHUNK_ID_FLAGS and it's incompatible with the way you use it. When I added that chunk, I made it such that it had only one uint32_t and new flags had to be or'd. It's already in kernel 3.2. > + if (!csc->cs_flags) { > + FREE(csc->relocs_bo); > + FREE(csc->relocs); > + return FALSE; > + } > + > csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; > csc->chunks[0].length_dw = 0; > csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; > csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; > csc->chunks[1].length_dw = 0; > csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; > + csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; > + csc->chunks[2].length_dw = 3; > + csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)csc->cs_flags; > + csc->cs_flags[0] = 0; > + csc->cs_flags[1] = RADEON_CS_RING_GFX; > + csc->cs_flags[2] = 0; > + if (ws->info.r600_va) > + csc->cs_flags[0] |= RADEON_CS_USE_VM; > > csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; > csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; > + csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; > > - csc->cs.num_chunks = 2; > + csc->cs.num_chunks = 3; This could be 2 for DRM 2.11.0 and older. Please see also Mesa master. We change num_chunks in radeon_drm_cs_flush. > csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; > return TRUE; > } > @@ -128,6 +146,7 @@ static void radeon_destroy_cs_context(struct > radeon_cs_context *csc) > radeon_cs_context_cleanup(csc); > FREE(csc->relocs_bo); > FREE(csc->relocs); > + FREE(csc->cs_flags); > } > > DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) > @@ -147,11 +166,11 @@ static struct radeon_winsys_cs > *radeon_drm_cs_create(struct radeon_winsys *rws) > > cs->ws = ws; > > - if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) { > + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { > FREE(cs); > return NULL; > } > - if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) { > + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { > radeon_destroy_cs_context(&cs->csc1); > FREE(cs); > return NULL; > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h > b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h > index f316b5e..26e0c76 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h > @@ -35,8 +35,8 @@ struct radeon_cs_context { > > int fd; > struct drm_radeon_cs cs; > - struct drm_radeon_cs_chunk chunks[2]; > - uint64_t chunk_array[2]; > + struct drm_radeon_cs_chunk chunks[3]; > + uint64_t chunk_array[3]; > > /* Relocs. */ > unsigned nrelocs; > @@ -44,6 +44,7 @@ struct radeon_cs_context { > unsigned validated_crelocs; > struct radeon_bo **relocs_bo; > struct drm_radeon_cs_reloc *relocs; > + uint32_t *cs_flags; > > /* 0 = BO not added, 1 = BO added */ > char is_handle_added[256]; > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > index 442bd2a..8124016 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > @@ -138,9 +138,11 @@ static boolean radeon_get_drm_value(int fd, unsigned > request, > info.request = request; > > retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); > - if (retval && errname) { > - fprintf(stderr, "radeon: Failed to get %s, error number %d\n", > - errname, retval); > + if (retval) { > + if (errname) { > + fprintf(stderr, "radeon: Failed to get %s, error number %d\n", > + errname, retval); > + } > return FALSE; > } > return TRUE; > @@ -263,6 +265,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys > *ws) > &ws->info.r600_backend_map)) > ws->info.r600_backend_map_valid = TRUE; > } > + ws->info.r600_va = FALSE; > + if (ws->info.drm_minor >= 12) { Kernel 3.2 already reports 2.12.0. Did you mean 13? > + ws->info.r600_va = TRUE; > + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, > + &ws->info.r600_va_start)) > + ws->info.r600_va = FALSE; > + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, > NULL, > + &ws->info.r600_ib_vm_max_size)) > + ws->info.r600_va = FALSE; > + } > } > > return TRUE; > diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h > b/src/gallium/winsys/radeon/drm/radeon_winsys.h > index c4ea655..69c42c2 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h > +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h > @@ -96,6 +96,9 @@ struct radeon_info { > uint32_t r600_num_tile_pipes; > uint32_t r600_backend_map; > boolean r600_backend_map_valid; > + boolean r600_va; Could you please rename r600_va to r600_virtual_address_space. Let's make the code more approachable to potential new contributors by using whole words. Plus, you don't have to document it if you give it the right name. > + uint32_t r600_va_start; > + uint32_t r600_ib_vm_max_size; > }; > > enum radeon_feature_id { > @@ -242,6 +245,14 @@ struct radeon_winsys { > unsigned stride, > struct winsys_handle *whandle); > > + /** > + * Return the virtual address of a buffer. > + * > + * \param buf A winsys buffer object > + * \return virtual address > + */ > + uint64_t (*buffer_va)(struct pb_buffer *buf); Please rename this function to "buffer_get_virtual_address". Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev