All GEN GPU can bind to any piece of memory (thanks UMA), and so through a special ioctl we can map a chunk of page-aligned client memory into the GPU address space. However, not all GEN are equal. Some have cache-coherency between the CPU and the GPU, whilst the others are incoherent and rely on snooping on explicit flushes to push/pull dirty data. Whereas we can use client buffers as a general replacement for kernel allocated buffers with LLC (cache coherency), using snooped buffers behaves differently and so must be used with care.
AMD_pinned_memory supposes that the client memory buffer is suitable for any general usage (e.g. vertex data, texture data) and so only on LLC can we offer that extension. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.c | 36 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_batch.h | 8 +++++ src/mesa/drivers/dri/i965/intel_buffer_objects.c | 40 +++++++++++++++++------- src/mesa/drivers/dri/i965/intel_extensions.c | 8 +++++ src/mesa/drivers/dri/i965/intel_screen.c | 14 +++++++++ src/mesa/drivers/dri/i965/intel_screen.h | 1 + 6 files changed, 96 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c index 099da72..36a0890 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.c +++ b/src/mesa/drivers/dri/i965/brw_batch.c @@ -1363,6 +1363,42 @@ struct brw_bo *brw_bo_create(struct brw_batch *batch, return bo; } +/* + * Wrap the chunk of client memory given by ptr+size inside a GPU + * buffer, and make it cache coherent (though on non-LLC architectures + * this requires snooping on explicit cache flushes). This allows the + * caller to write into the memory chunk and for those writes to be + * visible on the GPU (exactly as if they create the buffer and then + * persistently mapped it to obtain the pointer). + */ +struct brw_bo *brw_bo_create_userptr(struct brw_batch *batch, + const char *name, + void *ptr, + uint64_t size, + uint64_t alignment) +{ + drm_intel_bo *base; + struct brw_bo *bo; + + base = drm_intel_bo_alloc_userptr(batch->bufmgr, name, + ptr, I915_TILING_NONE, 0, size, 0); + if (base == NULL) + return NULL; + + base->align = alignment; + bo = brw_bo_import(batch, base, false); + if (bo == NULL) { + drm_intel_bo_unreference(base); + return NULL; + } + + bo->cache_coherent = true; + bo->reusable = false; + list_move(&bo->link, &bo->batch->inactive); + + return bo; +} + static bool __brw_bo_set_caching(struct brw_bo *bo, int caching) { struct drm_i915_gem_caching arg; diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index 1e80000..5de1209 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -249,6 +249,14 @@ brw_bo_create_tiled(struct brw_batch *batch, uint32_t *pitch, unsigned flags); +/* Create a local brw_bo for GPU access to client memory */ +struct brw_bo * +brw_bo_create_userptr(struct brw_batch *batch, + const char *name, + void *ptr, + uint64_t size, + uint64_t alignment); + /* Create a local brw_bo for a foreign buffer using its global flinked name */ struct brw_bo *brw_bo_create_from_name(struct brw_batch *batch, const char *name, diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 1d01d32..ef4d120 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -58,14 +58,10 @@ mark_buffer_inactive(struct intel_buffer_object *intel_obj) intel_obj->gpu_active_end = 0; } -/** Allocates a new brw_bo to store the data for the buffer object. */ static void -alloc_buffer_object(struct brw_context *brw, - struct intel_buffer_object *intel_obj) +mark_new_state(struct brw_context *brw, + struct intel_buffer_object *intel_obj) { - intel_obj->buffer = - brw_bo_create(&brw->batch, "bufferobj", intel_obj->Base.Size, 64, 0); - /* the buffer might be bound as a uniform buffer, need to update it */ if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) @@ -74,8 +70,18 @@ alloc_buffer_object(struct brw_context *brw, brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER; +} + +/** Allocates a new brw_bo to store the data for the buffer object. */ +static void +alloc_buffer_object(struct brw_context *brw, + struct intel_buffer_object *intel_obj) +{ + intel_obj->buffer = + brw_bo_create(&brw->batch, "bufferobj", intel_obj->Base.Size, 64, 0); mark_buffer_inactive(intel_obj); + mark_new_state(brw, intel_obj); } static void @@ -170,12 +176,24 @@ brw_buffer_data(struct gl_context *ctx, release_buffer(intel_obj); if (size != 0) { - alloc_buffer_object(brw, intel_obj); - if (!intel_obj->buffer) - return false; + if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) { + intel_obj->buffer = + brw_bo_create_userptr(&brw->batch, "bufferobj(userptr)", + (void *)data, size, 0); + if (!intel_obj->buffer) + return false; + } else { + intel_obj->buffer = + brw_bo_create(&brw->batch, "bufferobj", size, 64, 0); + if (!intel_obj->buffer) + return false; + + if (data != NULL) + brw_bo_write(intel_obj->buffer, 0, data, size, 0, NULL); + } - if (data != NULL) - brw_bo_write(intel_obj->buffer, 0, data, size, 0, NULL); + mark_buffer_inactive(intel_obj); + mark_new_state(brw, intel_obj); } return true; diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index f10e69b..30d3e82 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -172,6 +172,14 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_transform_feedback = true; ctx->Extensions.OES_depth_texture_cube_map = true; + ctx->Extensions.AMD_pinned_memory = + /* Flexibility of using client memory for any buffer (incl. vertex + * buffers) rules out the prospect of using snooped buffers, and + * using snooped buffers without cogniscience is likely to be + * detrimental to performance anyway. + */ + brw->intelScreen->hw_has_userptr && brw->intelScreen->devinfo->has_llc; + ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp; /* Only enable this in core profile because other parts of Mesa behave diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 2c9d362..41a4dc0 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1101,6 +1101,19 @@ intel_detect_swizzling(struct intel_screen *screen) return true; } +static bool +intel_detect_userptr(struct intel_screen *screen) +{ + struct drm_i915_gem_userptr arg; + + memset(&arg, 0, sizeof(arg)); + arg.user_ptr = -4096ULL; + arg.user_size = 8192; + errno = 0; + drmIoctl(intel_screen_to_fd(screen), DRM_IOCTL_I915_GEM_USERPTR, &arg); + return errno == EFAULT; +} + static int intel_detect_timestamp(struct intel_screen *screen) { @@ -1548,6 +1561,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen); + intelScreen->hw_has_userptr = intel_detect_userptr(intelScreen); intel_detect_pipelined_register_access(intelScreen); diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 3356ebf..fcba8f2 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -58,6 +58,7 @@ struct intel_screen bool hw_must_use_separate_stencil : 1; bool hw_has_swizzling : 1; unsigned hw_has_timestamp : 2; + bool hw_has_userptr : 1; /** * Does the kernel support resource streamer? */ -- 2.5.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev