All GEN GPU can bind to any piece of memory (thanks UMA), and so through a special ioctl we can map a chunk of page-aligned client memory into the GPU address space. However, not all GEN are equal. Some have cache-coherency between the CPU and the GPU, whilst the others are incoherent and rely on snooping on explicit flushes to push/pull dirty data. Whereas we can use client buffers as a general replacement for kernel allocated buffers with LLC (cache coherency), using snooped buffers behaves differently and so must be used with care.
AMD_pinned_memory supposes that the client memory buffer is suitable for any general usage (e.g. vertex data, texture data) and so only on LLC can we offer that extension. --- .../drivers/dri/i965/intel_buffer_objects.c | 68 +++++++++++++------ .../drivers/dri/i965/intel_buffer_objects.h | 6 ++ src/mesa/drivers/dri/i965/intel_extensions.c | 11 +++ 3 files changed, 65 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 452e6d33c07..4b34b55793b 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -72,6 +72,23 @@ mark_buffer_invalid(struct intel_buffer_object *intel_obj) intel_obj->valid_data_end = 0; } +/** Allocates a new brw_bo to store the data for the buffer object. */ +static void +mark_new_state(struct brw_context *brw, + struct intel_buffer_object *intel_obj) +{ + /* the buffer might be bound as a uniform buffer, need to update it + */ + if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; +} + /** Allocates a new brw_bo to store the data for the buffer object. */ static void alloc_buffer_object(struct brw_context *brw, @@ -96,20 +113,28 @@ alloc_buffer_object(struct brw_context *brw, */ size += 64 * 32; /* max read length of 64 256-bit units */ } + + assert(!intel_obj->pinned); intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER); - /* the buffer might be bound as a uniform buffer, need to update it - */ - if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + mark_new_state(brw, intel_obj); + mark_buffer_inactive(intel_obj); + mark_buffer_invalid(intel_obj); +} + +static void +alloc_userptr_object(struct brw_context *brw, + struct intel_buffer_object *intel_obj, + GLsizeiptrARB size, + const GLvoid *data) +{ + intel_obj->buffer = + brw_bo_alloc_userptr(brw->bufmgr, "bufferobj(userptr)", + (void *)data, size); + intel_obj->pinned = true; + mark_new_state(brw, intel_obj); mark_buffer_inactive(intel_obj); mark_buffer_invalid(intel_obj); } @@ -119,6 +144,7 @@ release_buffer(struct intel_buffer_object *intel_obj) { brw_bo_unreference(intel_obj->buffer); intel_obj->buffer = NULL; + intel_obj->pinned = false; } /** @@ -192,10 +218,6 @@ brw_buffer_data(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - /* Part of the ABI, but this function doesn't use it. - */ - (void) target; - intel_obj->Base.Size = size; intel_obj->Base.Usage = usage; intel_obj->Base.StorageFlags = storageFlags; @@ -207,12 +229,16 @@ brw_buffer_data(struct gl_context *ctx, release_buffer(intel_obj); if (size != 0) { - alloc_buffer_object(brw, intel_obj); + if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) + alloc_buffer_object(brw, intel_obj); + else + alloc_userptr_object(brw, intel_obj, size, data); if (!intel_obj->buffer) return false; if (data != NULL) { - brw_bo_subdata(intel_obj->buffer, 0, size, data); + if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) + brw_bo_subdata(intel_obj->buffer, 0, size, data); mark_buffer_valid_data(intel_obj, 0, size); } } @@ -275,9 +301,10 @@ brw_buffer_subdata(struct gl_context *ctx, brw_batch_references(&brw->batch, intel_obj->buffer); if (busy) { - if (size == intel_obj->Base.Size || + if (!intel_obj->pinned && + (size == intel_obj->Base.Size || (intel_obj->valid_data_start >= offset && - intel_obj->valid_data_end <= offset + size)) { + intel_obj->valid_data_end <= offset + size))) { /* Replace the current busy bo so the subdata doesn't stall. */ brw_bo_unreference(intel_obj->buffer); alloc_buffer_object(brw, intel_obj); @@ -425,7 +452,7 @@ brw_map_buffer_range(struct gl_context *ctx, */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { if (brw_batch_references(&brw->batch, intel_obj->buffer)) { - if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { + if (!intel_obj->pinned && access & GL_MAP_INVALIDATE_BUFFER_BIT) { brw_bo_unreference(intel_obj->buffer); alloc_buffer_object(brw, intel_obj); } else { @@ -433,7 +460,8 @@ brw_map_buffer_range(struct gl_context *ctx, "object\n"); intel_batchbuffer_flush(brw); } - } else if (brw_bo_busy(intel_obj->buffer) && + } else if (!intel_obj->pinned && + brw_bo_busy(intel_obj->buffer) && (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { brw_bo_unreference(intel_obj->buffer); alloc_buffer_object(brw, intel_obj); diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h index 849b231c8c0..072f71a0be1 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h @@ -87,6 +87,12 @@ struct intel_buffer_object * cycle of blitting on buffer wraparound. */ bool prefer_stall_to_blit; + + /** + * If this buffer wraps a chunk of client memory, we can not replace + * it with another buffer (of video memory) on a whim; it is pinned. + */ + bool pinned; /** @} */ }; diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index f1c3aeff135..f0a425e8981 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -210,6 +210,17 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_disjoint_timer_query = ctx->Extensions.ARB_timer_query; + /* AMD_pinned_memory assumes the flexibility of using client memory + * for any buffer (incl. vertex buffers) which rules out the prospect + * of using snooped buffers, as using snooped buffers without + * cogniscience is likely to be detrimental to performance and require + * extensive checking in the driver for correctness, e.g. to prevent + * illegal snoop <-> snoop transfers. + */ + ctx->Extensions.AMD_pinned_memory = + brw->screen->kernel_features & KERNEL_ALLOWS_USERPTR && + brw->screen->devinfo.has_llc; + /* Only enable this in core profile because other parts of Mesa behave * slightly differently when the extension is enabled. */ -- 2.18.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev