From: Jesse Barnes <jbar...@virtuousgeek.org>

Use David's new IOMMU layer functions for supporting SVM in i915.

TODO:
  error record collection for failing SVM contexts
  callback handling for fatal faults
  scheduling

v2: integrate David's core IOMMU support
    make sure we don't clobber the PASID in the context reg state
v3: fixup for intel-svm.h changes (David)
v4: use fault & halt for now (Jesse)
    fix ring free in error path on context alloc (Julia)
v5: update with new callback struct (Jesse)
v6: fix init svm check per new IOMMU code (Jesse)
v7: drop debug code and obsolete i915_svm.c file (Jesse)
v8: fix !CONFIG_INTEL_IOMMU_SVM init stub (Jesse)
v9: update to new execlist and reg handling bits (Jesse)
    context teardown fix (lrc deferred alloc vs teardown race?) (Jesse)
    check for SVM availability at context create (Jesse)
v10: intel_context_svm_init/fini & rebase
v11: move context specific stuff to i915_gem_context
v12: move addressing to context descriptor
v13: strip out workqueue and mm notifiers

Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: David Woodhouse <dw...@infradead.org>
Signed-off-by: David Woodhouse <david.woodho...@intel.com> (v3)
Signed-off-by: Jesse Barnes <jbar...@virtuousgeek.org> (v9)
Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  32 ++++++++++
 drivers/gpu/drm/i915/i915_gem.c         |   7 +++
 drivers/gpu/drm/i915/i915_gem_context.c | 104 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_reg.h         |  18 ++++++
 drivers/gpu/drm/i915/intel_lrc.c        |  39 +++++-------
 5 files changed, 167 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 598e078418e3..64f3f0f18509 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -39,6 +39,7 @@
 #include <linux/backlight.h>
 #include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
+#include <linux/intel-svm.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
 #include <linux/shmem_fs.h>
@@ -866,6 +867,8 @@ struct i915_ctx_hang_stats {
  * @remap_slice: l3 row remapping information.
  * @flags: context specific flags:
  *         CONTEXT_NO_ZEROMAP: do not allow mapping things to page 0.
+ *         CONTEXT_NO_ERROR_CAPTURE: do not capture gpu state on hang.
+ *         CONTEXT_SVM: context with 1:1 gpu vs cpu mapping of vm.
  * @file_priv: filp associated with this context (NULL for global default
  *            context).
  * @hang_stats: information about the role of this context in possible GPU
@@ -891,6 +894,8 @@ struct i915_gem_context {
        unsigned long flags;
 #define CONTEXT_NO_ZEROMAP             BIT(0)
 #define CONTEXT_NO_ERROR_CAPTURE       BIT(1)
+#define CONTEXT_SVM                    BIT(2)
+
        unsigned hw_id;
        u32 user_handle;
 
@@ -909,6 +914,9 @@ struct i915_gem_context {
        struct atomic_notifier_head status_notifier;
        bool execlists_force_single_submission;
 
+       u32 pasid; /* svm, 20 bits */
+       struct task_struct *task;
+
        struct list_head link;
 
        u8 remap_slice;
@@ -2001,6 +2009,8 @@ struct drm_i915_private {
 
        struct i915_runtime_pm pm;
 
+       bool svm_available;
+
        /* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
        struct {
                void (*cleanup_engine)(struct intel_engine_cs *engine);
@@ -3628,6 +3638,28 @@ extern void intel_set_memory_cxsr(struct 
drm_i915_private *dev_priv,
 int i915_reg_read_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file);
 
+/* svm */
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static inline bool intel_init_svm(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = to_i915(dev);
+
+       dev_priv->svm_available = USES_FULL_48BIT_PPGTT(dev_priv) &&
+               intel_svm_available(&dev->pdev->dev);
+
+       return dev_priv->svm_available;
+}
+#else
+static inline bool intel_init_svm(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = to_i915(dev);
+
+       dev_priv->svm_available = false;
+
+       return dev_priv->svm_available;
+}
+#endif
+
 /* overlay */
 extern struct intel_overlay_error_state *
 intel_overlay_capture_error_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e08c774a1aa..45d67b54c018 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4304,6 +4304,13 @@ i915_gem_init_hw(struct drm_device *dev)
                }
        }
 
+       if (INTEL_GEN(dev) >= 8) {
+               if (intel_init_svm(dev))
+                       DRM_DEBUG_DRIVER("Initialized Intel SVM support\n");
+               else
+                       DRM_ERROR("Failed to enable Intel SVM support\n");
+       }
+
        i915_gem_init_swizzling(dev);
 
        /*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 189a6c018b72..9ab6332f296b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -134,6 +134,47 @@ static int get_context_size(struct drm_i915_private 
*dev_priv)
        return ret;
 }
 
+static void i915_svm_fault_cb(struct device *dev, int pasid, u64 addr,
+                             u32 private, int rwxp, int response)
+{
+}
+
+static struct svm_dev_ops i915_svm_ops = {
+       .fault_cb = i915_svm_fault_cb,
+};
+
+static int i915_gem_context_svm_init(struct i915_gem_context *ctx)
+{
+       struct device *dev = &ctx->i915->drm.pdev->dev;
+       int ret;
+
+       if (WARN_ON_ONCE(!ctx->i915->svm_available))
+               return -ENODEV;
+
+       get_task_struct(current);
+
+       ret = intel_svm_bind_mm(dev, &ctx->pasid, 0, &i915_svm_ops);
+       if (ret) {
+               DRM_DEBUG_DRIVER("pasid alloc fail: %d\n", ret);
+               put_task_struct(current);
+               return ret;
+       }
+
+       ctx->task = current;
+
+       return 0;
+}
+
+static void i915_gem_context_svm_fini(struct i915_gem_context *ctx)
+{
+       struct device *dev = &ctx->i915->drm.pdev->dev;
+
+       if (ctx->task) {
+               intel_svm_unbind_mm(dev, ctx->pasid);
+               put_task_struct(ctx->task);
+       }
+}
+
 void i915_gem_context_free(struct kref *ctx_ref)
 {
        struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -143,6 +184,9 @@ void i915_gem_context_free(struct kref *ctx_ref)
        trace_i915_context_free(ctx);
        GEM_BUG_ON(!ctx->closed);
 
+       if (ctx->flags & CONTEXT_SVM)
+               i915_gem_context_svm_fini(ctx);
+
        i915_ppgtt_put(ctx->ppgtt);
 
        for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -257,9 +301,34 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, 
unsigned *out)
        return 0;
 }
 
+static u32 __create_ctx_desc(struct drm_i915_private *dev_priv, u32 flags)
+{
+       u32 desc = GEN8_CTX_VALID;
+
+       if (flags & I915_GEM_CONTEXT_ENABLE_SVM) {
+               desc |= INTEL_ADVANCED_CONTEXT <<
+                       GEN8_CTX_ADDRESSING_MODE_SHIFT;
+               /*
+                * Switch to stream once we have a scheduler and can
+                * re-submit contexts.
+                */
+               desc |= FAULT_AND_HALT << GEN8_CTX_FAULT_SHIFT;
+       } else {
+               if (IS_GEN8(dev_priv))
+                       desc |= GEN8_CTX_L3LLC_COHERENT;
+
+               desc |= GEN8_CTX_PRIVILEGE;
+
+               desc |= GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
+                       GEN8_CTX_ADDRESSING_MODE_SHIFT;
+       }
+
+       return desc;
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_device *dev,
-                   struct drm_i915_file_private *file_priv)
+                   struct drm_i915_file_private *file_priv, u32 flags)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct i915_gem_context *ctx;
@@ -323,8 +392,8 @@ __create_hw_context(struct drm_device *dev,
 
        ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
        ctx->ring_size = 4 * PAGE_SIZE;
-       ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
-                            GEN8_CTX_ADDRESSING_MODE_SHIFT;
+       ctx->desc_template = __create_ctx_desc(dev_priv, flags);
+
        ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
 
        return ctx;
@@ -345,13 +414,14 @@ i915_gem_create_context(struct drm_device *dev,
 {
        struct i915_gem_context *ctx;
        bool create_vm = false;
+       int ret;
 
        lockdep_assert_held(&dev->struct_mutex);
 
        if (flags & (I915_GEM_CONTEXT_FULL_PPGTT | I915_GEM_CONTEXT_ENABLE_SVM))
                create_vm = true;
 
-       ctx = __create_hw_context(dev, file_priv);
+       ctx = __create_hw_context(dev, file_priv, flags);
        if (IS_ERR(ctx))
                return ctx;
 
@@ -360,19 +430,31 @@ i915_gem_create_context(struct drm_device *dev,
                        i915_ppgtt_create(to_i915(dev), file_priv);
 
                if (IS_ERR(ppgtt)) {
-                       DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
-                                        PTR_ERR(ppgtt));
-                       idr_remove(&file_priv->context_idr, ctx->user_handle);
-                       context_close(ctx);
-                       return ERR_CAST(ppgtt);
+                       ret = PTR_ERR(ppgtt);
+                       DRM_DEBUG_DRIVER("PPGTT setup failed (%d)\n", ret);
+                       goto free_ctx;
                }
 
                ctx->ppgtt = ppgtt;
        }
 
+       if (flags & I915_GEM_CONTEXT_ENABLE_SVM) {
+               ret = i915_gem_context_svm_init(ctx);
+               if (ret)
+                       goto free_ctx;
+
+               ctx->flags |= CONTEXT_SVM;
+       }
+
        trace_i915_context_create(ctx);
 
        return ctx;
+
+free_ctx:
+       idr_remove(&file_priv->context_idr, ctx->user_handle);
+       context_close(ctx);
+
+       return ERR_PTR(ret);
 }
 
 /**
@@ -987,6 +1069,7 @@ static bool contexts_enabled(struct drm_device *dev)
 int i915_gem_context_create2_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file)
 {
+       struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_gem_context_create2 *args = data;
        struct drm_i915_file_private *file_priv = file->driver_priv;
        struct i915_gem_context *ctx;
@@ -1007,7 +1090,8 @@ int i915_gem_context_create2_ioctl(struct drm_device 
*dev, void *data,
        if (USES_FULL_PPGTT(dev))
                flags |= I915_GEM_CONTEXT_FULL_PPGTT;
 
-       if (flags & I915_GEM_CONTEXT_ENABLE_SVM) {
+       if ((flags & I915_GEM_CONTEXT_ENABLE_SVM) &&
+           !dev_priv->svm_available) {
                ret = -ENODEV;
                goto unlock;
        }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d4adf2806c50..8eebb038622b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3062,6 +3062,24 @@ enum {
        INTEL_LEGACY_64B_CONTEXT
 };
 
+enum {
+       FAULT_AND_HANG = 0,
+       FAULT_AND_HALT, /* Debug only */
+       FAULT_AND_STREAM,
+       FAULT_AND_CONTINUE /* Unsupported */
+};
+
+#define GEN8_CTX_VALID (1<<0)
+#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
+#define GEN8_CTX_FORCE_RESTORE (1<<2)
+#define GEN8_CTX_L3LLC_COHERENT (1<<5)
+#define GEN8_CTX_PRIVILEGE (1<<8)
+
+#define GEN8_CTX_FAULT_SHIFT 6
+#define GEN8_CTX_ID_SHIFT 32
+#define GEN8_CTX_ID_WIDTH 21
+#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT       0x17
+#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT       0x26
 #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
 #define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\
                                INTEL_LEGACY_64B_CONTEXT : \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6b49df4316f4..6e27cc83aa43 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -185,12 +185,6 @@
 #define CTX_R_PWR_CLK_STATE            0x42
 #define CTX_GPGPU_CSR_BASE_ADDRESS     0x44
 
-#define GEN8_CTX_VALID (1<<0)
-#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
-#define GEN8_CTX_FORCE_RESTORE (1<<2)
-#define GEN8_CTX_L3LLC_COHERENT (1<<5)
-#define GEN8_CTX_PRIVILEGE (1<<8)
-
 #define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
        (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
        (reg_state)[(pos)+1] = (val); \
@@ -207,16 +201,13 @@
        reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
 } while (0)
 
-enum {
-       FAULT_AND_HANG = 0,
-       FAULT_AND_HALT, /* Debug only */
-       FAULT_AND_STREAM,
-       FAULT_AND_CONTINUE /* Unsupported */
-};
-#define GEN8_CTX_ID_SHIFT 32
-#define GEN8_CTX_ID_WIDTH 21
-#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT       0x17
-#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT       0x26
+#define ASSIGN_CTX_SVM(reg_state, ctx, engine) do { \
+               ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, \
+                              GEN8_RING_PDP_UDW((engine), 0), 0); \
+               ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, \
+                              GEN8_RING_PDP_LDW((engine), 0), (ctx)->pasid); \
+} while (0)
+
 
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
@@ -270,10 +261,6 @@ logical_ring_init_platform_invariants(struct 
intel_engine_cs *engine)
                                        IS_BXT_REVID(dev_priv, 0, 
BXT_REVID_A1)) &&
                                        (engine->id == VCS || engine->id == 
VCS2);
 
-       engine->ctx_desc_template = GEN8_CTX_VALID;
-       if (IS_GEN8(dev_priv))
-               engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
-       engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
 
        /* TODO: WaDisableLiteRestore when we start using semaphore
         * signalling between Command Streamers */
@@ -380,7 +367,9 @@ static void execlists_update_context(struct 
drm_i915_gem_request *rq)
         * PML4 is allocated during ppgtt init, so this is not needed
         * in 48-bit mode.
         */
-       if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+       if (!(rq->ctx->flags & CONTEXT_SVM) &&
+           ppgtt &&
+           !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
                execlists_update_context_pdps(ppgtt, reg_state);
 }
 
@@ -1399,7 +1388,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request 
*req,
         * it is unsafe in case of lite-restore (because the ctx is
         * not idle). PML4 is allocated during ppgtt init so this is
         * not needed in 48-bit.*/
-       if (req->ctx->ppgtt &&
+
+       if (!(req->ctx->flags & CONTEXT_SVM) &&
+           req->ctx->ppgtt &&
            (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) 
{
                if (!USES_FULL_48BIT_PPGTT(req->i915) &&
                    !intel_vgpu_active(req->i915)) {
@@ -2057,7 +2048,9 @@ populate_lr_context(struct i915_gem_context *ctx,
        ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0),
                       0);
 
-       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+       if (ctx->flags & CONTEXT_SVM) {
+               ASSIGN_CTX_SVM(reg_state, ctx, engine);
+       } else if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
                /* 64b PPGTT (48bit canonical)
                 * PDP0_DESCRIPTOR contains the base address to PML4 and
                 * other PDP Descriptors are ignored.
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to