On more recent HW, the indirect writes via the GGTT are internally
buffered presenting a nuisance to trying to use them for persistent
client maps. (We cannot guarantee that any write by the client will
then be visible to either the GPU or third parties in a timely manner,
leading to corruption.) Fortunately, we try very hard not to even use
the GGTT for anything and even less so for persistent mmaps, so their
loss is unlikely to be noticed.
No piglits harmed.
Cc: Kenneth Graunke <kenn...@whitecape.org>
Cc: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
---
include/drm-uapi/i915_drm.h | 22 +++++++++++++++
src/mesa/drivers/dri/i965/brw_bufmgr.c | 36 ++++++++++++++++++++++++
src/mesa/drivers/dri/i965/intel_screen.c | 3 ++
src/mesa/drivers/dri/i965/intel_screen.h | 1 +
4 files changed, 62 insertions(+)
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 16e452aa12d..268b585f8a4 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU
path). However,
+ * on a few different processors and chipsets, this is not
necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of
the backing
+ * storage (physical memory) via a different path (with different
physical tags
+ * to the indirect write via the GGTT) will see stale values from
before
+ * the GGTT write. Inside the kernel, we can for the most part keep
track of
+ * the different read/write domains in use (e.g. set-domain), but
the assumption
+ * of coherency is baked into the ABI, hence reporting its true
state in this
+ * parameter.
+ *
+ * Reports true when writes via mmap_gtt are immediately visible
following an
+ * lfence to flush the WCB.
+ *
+ * Reports false when writes via mmap_gtt are indeterminately
delayed in an in
+ * internal buffer and are _not_ immediately visible to third
parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when reporting false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT 52
+
typedef struct drm_i915_getparam {
__s32 param;
/*
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index f1675b191c1..6955c5c890c 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -1068,6 +1068,19 @@ brw_bo_map_wc(struct brw_context *brw, struct
brw_bo *bo, unsigned flags)
return bo->map_wc;
}
+static void
+bo_set_domain(struct brw_bo *bo, unsigned int read, unsigned int write)
+{
+ struct brw_bufmgr *bufmgr = bo->bufmgr;
+
+ struct drm_i915_gem_set_domain arg = {
+ .handle = bo->gem_handle,
+ .read_domains = read,
+ .write_domain = write,
+ };
+ drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg);
+}
+
/**
* Perform an uncached mapping via the GTT.
*
@@ -1095,6 +1108,25 @@ brw_bo_map_gtt(struct brw_context *brw, struct
brw_bo *bo, unsigned flags)
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
+ /* Once upon a time we believed that there was no internal
buffering of
+ * the indirect writes via the Global GTT; that is once flushed from
+ * the processor the write would be immediately visible to any one
+ * else reading that memory location be they the GPU, kernel or
another
+ * client. As it turns out, on modern hardware there is an
internal buffer
+ * that cannot be directly flushed (e.g. using the sfence one
would normally
+ * use to flush the WCB) and so far the w/a requires us to do an
uncached
+ * mmio read, quite expensive and requires user cooperation. That
is we
+ * cannot simply support persistent user access to the GTT mmap
buffers
+ * as we have no means of flushing their writes in a timely manner.
+ */
+ if (flags & MAP_PERSISTENT &&
+ flags & MAP_COHERENT &&
+ flags & MAP_WRITE &&
+ !(brw->screen->kernel_features &
KERNEL_ALLOWS_COHERENT_MMAP_GTT)) {
+ DBG("bo_map_gtt: rejected attempt to make a coherent,
persistent and writable GGTT mmap, %d (%s)\n", bo->gem_handle,
bo->name);
+ return NULL;
+ }
+
/* Get a mapping of the buffer if we haven't before. */
if (bo->map_gtt == NULL) {
DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
@@ -1138,6 +1170,10 @@ brw_bo_map_gtt(struct brw_context *brw, struct
brw_bo *bo, unsigned flags)
if (!(flags & MAP_ASYNC)) {
bo_wait_with_stall_warning(brw, bo, "GTT mapping");
}
+ if (flags & MAP_WRITE &&
+ !(brw->screen->kernel_features &
KERNEL_ALLOWS_COHERENT_MMAP_GTT)) {
+ bo_set_domain(bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+ }
return bo->map_gtt;
}
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
b/src/mesa/drivers/dri/i965/intel_screen.c
index eaf5a3b9feb..2d2a7d64897 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2677,6 +2677,9 @@ __DRIconfig **intelInitScreen2(__DRIscreen
*dri_screen)
if (intel_get_boolean(screen, I915_PARAM_HAS_CONTEXT_ISOLATION))
screen->kernel_features |= KERNEL_ALLOWS_CONTEXT_ISOLATION;
+ if (intel_get_boolean(screen, I915_PARAM_MMAP_GTT_COHERENT))
+ screen->kernel_features |= KERNEL_ALLOWS_COHERENT_MMAP_GTT;
+
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
screen->winsys_msaa_samples_override =
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h
b/src/mesa/drivers/dri/i965/intel_screen.h
index 8d56fcd9e7a..1bc1bff64d7 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -82,6 +82,7 @@ struct intel_screen
#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5)
#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6)
#define KERNEL_ALLOWS_CONTEXT_ISOLATION (1<<7)
+#define KERNEL_ALLOWS_COHERENT_MMAP_GTT (1<<8)
struct brw_bufmgr *bufmgr;