[Mesa-dev] [PATCH 3/4] i965: Expose AMD_pinned_memory

Chris Wilson Thu, 16 Aug 2018 14:02:48 -0700

All GEN GPU can bind to any piece of memory (thanks UMA), and so through
a special ioctl we can map a chunk of page-aligned client memory into
the GPU address space. However, not all GEN are equal. Some have
cache-coherency between the CPU and the GPU, whilst the others are
incoherent and rely on snooping on explicit flushes to push/pull dirty
data. Whereas we can use client buffers as a general replacement for kernel
allocated buffers with LLC (cache coherency), using snooped buffers
behaves differently and so must be used with care.


AMD_pinned_memory supposes that the client memory buffer is suitable
for any general usage (e.g. vertex data, texture data) and so only on
LLC can we offer that extension.
---
 .../drivers/dri/i965/intel_buffer_objects.c   | 68 +++++++++++++------
 .../drivers/dri/i965/intel_buffer_objects.h   |  6 ++
 src/mesa/drivers/dri/i965/intel_extensions.c  | 11 +++
 3 files changed, 65 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 452e6d33c07..4b34b55793b 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -72,6 +72,23 @@ mark_buffer_invalid(struct intel_buffer_object *intel_obj)
    intel_obj->valid_data_end = 0;
 }
 
+/** Allocates a new brw_bo to store the data for the buffer object. */
+static void
+mark_new_state(struct brw_context *brw,
+               struct intel_buffer_object *intel_obj)
+{
+   /* the buffer might be bound as a uniform buffer, need to update it
+    */
+   if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+}
+
 /** Allocates a new brw_bo to store the data for the buffer object. */
 static void
 alloc_buffer_object(struct brw_context *brw,
@@ -96,20 +113,28 @@ alloc_buffer_object(struct brw_context *brw,
        */
       size += 64 * 32; /* max read length of 64 256-bit units */
    }
+
+   assert(!intel_obj->pinned);
    intel_obj->buffer =
       brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER);
 
-   /* the buffer might be bound as a uniform buffer, need to update it
-    */
-   if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   mark_new_state(brw, intel_obj);
+   mark_buffer_inactive(intel_obj);
+   mark_buffer_invalid(intel_obj);
+}
+
+static void
+alloc_userptr_object(struct brw_context *brw,
+                     struct intel_buffer_object *intel_obj,
+                     GLsizeiptrARB size,
+                     const GLvoid *data)
+{
+   intel_obj->buffer =
+      brw_bo_alloc_userptr(brw->bufmgr, "bufferobj(userptr)",
+                           (void *)data, size);
+   intel_obj->pinned = true;
 
+   mark_new_state(brw, intel_obj);
    mark_buffer_inactive(intel_obj);
    mark_buffer_invalid(intel_obj);
 }
@@ -119,6 +144,7 @@ release_buffer(struct intel_buffer_object *intel_obj)
 {
    brw_bo_unreference(intel_obj->buffer);
    intel_obj->buffer = NULL;
+   intel_obj->pinned = false;
 }
 
 /**
@@ -192,10 +218,6 @@ brw_buffer_data(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 
-   /* Part of the ABI, but this function doesn't use it.
-    */
-   (void) target;
-
    intel_obj->Base.Size = size;
    intel_obj->Base.Usage = usage;
    intel_obj->Base.StorageFlags = storageFlags;
@@ -207,12 +229,16 @@ brw_buffer_data(struct gl_context *ctx,
       release_buffer(intel_obj);
 
    if (size != 0) {
-      alloc_buffer_object(brw, intel_obj);
+      if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
+         alloc_buffer_object(brw, intel_obj);
+      else
+         alloc_userptr_object(brw, intel_obj, size, data);
       if (!intel_obj->buffer)
          return false;
 
       if (data != NULL) {
-         brw_bo_subdata(intel_obj->buffer, 0, size, data);
+         if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
+            brw_bo_subdata(intel_obj->buffer, 0, size, data);
          mark_buffer_valid_data(intel_obj, 0, size);
       }
    }
@@ -275,9 +301,10 @@ brw_buffer_subdata(struct gl_context *ctx,
       brw_batch_references(&brw->batch, intel_obj->buffer);
 
    if (busy) {
-      if (size == intel_obj->Base.Size ||
+      if (!intel_obj->pinned &&
+          (size == intel_obj->Base.Size ||
           (intel_obj->valid_data_start >= offset &&
-           intel_obj->valid_data_end <= offset + size)) {
+           intel_obj->valid_data_end <= offset + size))) {
          /* Replace the current busy bo so the subdata doesn't stall. */
          brw_bo_unreference(intel_obj->buffer);
          alloc_buffer_object(brw, intel_obj);
@@ -425,7 +452,7 @@ brw_map_buffer_range(struct gl_context *ctx,
     */
    if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
       if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
-         if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
+         if (!intel_obj->pinned && access & GL_MAP_INVALIDATE_BUFFER_BIT) {
             brw_bo_unreference(intel_obj->buffer);
             alloc_buffer_object(brw, intel_obj);
          } else {
@@ -433,7 +460,8 @@ brw_map_buffer_range(struct gl_context *ctx,
                        "object\n");
             intel_batchbuffer_flush(brw);
          }
-      } else if (brw_bo_busy(intel_obj->buffer) &&
+      } else if (!intel_obj->pinned &&
+                 brw_bo_busy(intel_obj->buffer) &&
                  (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
          brw_bo_unreference(intel_obj->buffer);
          alloc_buffer_object(brw, intel_obj);
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
index 849b231c8c0..072f71a0be1 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@@ -87,6 +87,12 @@ struct intel_buffer_object
     * cycle of blitting on buffer wraparound.
     */
    bool prefer_stall_to_blit;
+
+   /**
+    * If this buffer wraps a chunk of client memory, we can not replace
+    * it with another buffer (of video memory) on a whim; it is pinned.
+    */
+   bool pinned;
    /** @} */
 };
 
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index f1c3aeff135..f0a425e8981 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -210,6 +210,17 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.EXT_disjoint_timer_query =
          ctx->Extensions.ARB_timer_query;
 
+      /* AMD_pinned_memory assumes the flexibility of using client memory
+       * for any buffer (incl. vertex buffers) which rules out the prospect
+       * of using snooped buffers, as using snooped buffers without
+       * cogniscience is likely to be detrimental to performance and require
+       * extensive checking in the driver for correctness, e.g. to prevent
+       * illegal snoop <-> snoop transfers.
+       */
+      ctx->Extensions.AMD_pinned_memory =
+         brw->screen->kernel_features & KERNEL_ALLOWS_USERPTR &&
+         brw->screen->devinfo.has_llc;
+
       /* Only enable this in core profile because other parts of Mesa behave
        * slightly differently when the extension is enabled.
        */
-- 
2.18.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] i965: Expose AMD_pinned_memory

Reply via email to