[Mesa-dev] [PATCH 63/70] i965: AMD_pinned_memory and userptr

Chris Wilson Fri, 07 Aug 2015 13:19:58 -0700

All GEN GPU can bind to any piece of memory (thanks UMA), and so through
a special ioctl we can map a chunk of page-aligned client memory into
the GPU address space. However, not all GEN are equal. Some have
cache-coherency between the CPU and the GPU, whilst the others are
incoherent and rely on snooping on explicit flushes to push/pull dirty
data. Whereas we can use client buffers as a general replacement for kernel
allocated buffers with LLC (cache coherency), using snooped buffers
behaves differently and so must be used with care.


AMD_pinned_memory supposes that the client memory buffer is suitable
for any general usage (e.g. vertex data, texture data) and so only on
LLC can we offer that extension.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_batch.c            | 36 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_batch.h            |  8 +++++
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 40 +++++++++++++++++-------
 src/mesa/drivers/dri/i965/intel_extensions.c     |  8 +++++
 src/mesa/drivers/dri/i965/intel_screen.c         | 14 +++++++++
 src/mesa/drivers/dri/i965/intel_screen.h         |  1 +
 6 files changed, 96 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.c 
b/src/mesa/drivers/dri/i965/brw_batch.c
index 099da72..36a0890 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -1363,6 +1363,42 @@ struct brw_bo *brw_bo_create(struct brw_batch *batch,
    return bo;
 }
 
+/*
+ * Wrap the chunk of client memory given by ptr+size inside a GPU
+ * buffer, and make it cache coherent (though on non-LLC architectures
+ * this requires snooping on explicit cache flushes). This allows the
+ * caller to write into the memory chunk and for those writes to be
+ * visible on the GPU (exactly as if they create the buffer and then
+ * persistently mapped it to obtain the pointer).
+ */
+struct brw_bo *brw_bo_create_userptr(struct brw_batch *batch,
+                                     const char *name,
+                                     void *ptr,
+                                     uint64_t size,
+                                     uint64_t alignment)
+{
+   drm_intel_bo *base;
+   struct brw_bo *bo;
+
+   base = drm_intel_bo_alloc_userptr(batch->bufmgr, name,
+                                     ptr, I915_TILING_NONE, 0, size, 0);
+   if (base == NULL)
+      return NULL;
+
+   base->align = alignment;
+   bo = brw_bo_import(batch, base, false);
+   if (bo == NULL) {
+      drm_intel_bo_unreference(base);
+      return NULL;
+   }
+
+   bo->cache_coherent = true;
+   bo->reusable = false;
+   list_move(&bo->link, &bo->batch->inactive);
+
+   return bo;
+}
+
 static bool __brw_bo_set_caching(struct brw_bo *bo, int caching)
 {
    struct drm_i915_gem_caching arg;
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 1e80000..5de1209 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -249,6 +249,14 @@ brw_bo_create_tiled(struct brw_batch *batch,
                     uint32_t *pitch,
                     unsigned flags);
 
+/* Create a local brw_bo for GPU access to client memory */
+struct brw_bo *
+brw_bo_create_userptr(struct brw_batch *batch,
+                      const char *name,
+                      void *ptr,
+                      uint64_t size,
+                      uint64_t alignment);
+
 /* Create a local brw_bo for a foreign buffer using its global flinked name */
 struct brw_bo *brw_bo_create_from_name(struct brw_batch *batch,
                                        const char *name,
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 1d01d32..ef4d120 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -58,14 +58,10 @@ mark_buffer_inactive(struct intel_buffer_object *intel_obj)
    intel_obj->gpu_active_end = 0;
 }
 
-/** Allocates a new brw_bo to store the data for the buffer object. */
 static void
-alloc_buffer_object(struct brw_context *brw,
-                    struct intel_buffer_object *intel_obj)
+mark_new_state(struct brw_context *brw,
+               struct intel_buffer_object *intel_obj)
 {
-   intel_obj->buffer =
-      brw_bo_create(&brw->batch, "bufferobj", intel_obj->Base.Size, 64, 0);
-
    /* the buffer might be bound as a uniform buffer, need to update it
     */
    if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
@@ -74,8 +70,18 @@ alloc_buffer_object(struct brw_context *brw,
       brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
    if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
       brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER;
+}
+
+/** Allocates a new brw_bo to store the data for the buffer object. */
+static void
+alloc_buffer_object(struct brw_context *brw,
+                    struct intel_buffer_object *intel_obj)
+{
+   intel_obj->buffer =
+      brw_bo_create(&brw->batch, "bufferobj", intel_obj->Base.Size, 64, 0);
 
    mark_buffer_inactive(intel_obj);
+   mark_new_state(brw, intel_obj);
 }
 
 static void
@@ -170,12 +176,24 @@ brw_buffer_data(struct gl_context *ctx,
       release_buffer(intel_obj);
 
    if (size != 0) {
-      alloc_buffer_object(brw, intel_obj);
-      if (!intel_obj->buffer)
-         return false;
+      if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+         intel_obj->buffer =
+            brw_bo_create_userptr(&brw->batch, "bufferobj(userptr)",
+                                  (void *)data, size, 0);
+         if (!intel_obj->buffer)
+            return false;
+      } else {
+         intel_obj->buffer =
+            brw_bo_create(&brw->batch, "bufferobj", size, 64, 0);
+         if (!intel_obj->buffer)
+            return false;
+
+         if (data != NULL)
+            brw_bo_write(intel_obj->buffer, 0, data, size, 0, NULL);
+      }
 
-      if (data != NULL)
-         brw_bo_write(intel_obj->buffer, 0, data, size, 0, NULL);
+      mark_buffer_inactive(intel_obj);
+      mark_new_state(brw, intel_obj);
    }
 
    return true;
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index f10e69b..30d3e82 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -172,6 +172,14 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.EXT_transform_feedback = true;
       ctx->Extensions.OES_depth_texture_cube_map = true;
 
+      ctx->Extensions.AMD_pinned_memory =
+         /* Flexibility of using client memory for any buffer (incl. vertex
+          * buffers) rules out the prospect of using snooped buffers, and
+          * using snooped buffers without cogniscience is likely to be
+          * detrimental to performance anyway.
+          */
+         brw->intelScreen->hw_has_userptr && 
brw->intelScreen->devinfo->has_llc;
+
       ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp;
 
       /* Only enable this in core profile because other parts of Mesa behave
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 2c9d362..41a4dc0 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1101,6 +1101,19 @@ intel_detect_swizzling(struct intel_screen *screen)
       return true;
 }
 
+static bool
+intel_detect_userptr(struct intel_screen *screen)
+{
+   struct drm_i915_gem_userptr arg;
+
+   memset(&arg, 0, sizeof(arg));
+   arg.user_ptr = -4096ULL;
+   arg.user_size = 8192;
+   errno = 0;
+   drmIoctl(intel_screen_to_fd(screen), DRM_IOCTL_I915_GEM_USERPTR, &arg);
+   return errno == EFAULT;
+}
+
 static int
 intel_detect_timestamp(struct intel_screen *screen)
 {
@@ -1548,6 +1561,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
 
    intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
    intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
+   intelScreen->hw_has_userptr = intel_detect_userptr(intelScreen);
 
    intel_detect_pipelined_register_access(intelScreen);
 
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h 
b/src/mesa/drivers/dri/i965/intel_screen.h
index 3356ebf..fcba8f2 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -58,6 +58,7 @@ struct intel_screen
    bool hw_must_use_separate_stencil : 1;
    bool hw_has_swizzling : 1;
    unsigned hw_has_timestamp : 2;
+   bool hw_has_userptr : 1;
    /**
     * Does the kernel support resource streamer?
     */
-- 
2.5.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 63/70] i965: AMD_pinned_memory and userptr

Reply via email to