Reviewed the patch & it looks fine. Reviewed-by: "Akash Goel <akash.go...@gmail.com>"
On Thu, Nov 13, 2014 at 3:58 PM, Thomas Daniel <thomas.dan...@intel.com> wrote: > From: Oscar Mateo <oscar.ma...@intel.com> > > Up until now, we have pinned every logical ring context backing object > during creation, and left it pinned until destruction. This made my life > easier, but it's a harmful thing to do, because we cause fragmentation > of the GGTT (and, eventually, we would run out of space). > > This patch makes the pinning on-demand: the backing objects of the two > contexts that are written to the ELSP are pinned right before submission > and unpinned once the hardware is done with them. The only context that > is still pinned regardless is the global default one, so that the HWS can > still be accessed in the same way (ring->status_page). > > v2: In the early version of this patch, we were pinning the context as > we put it into the ELSP: on the one hand, this is very efficient because > only a maximum two contexts are pinned at any given time, but on the other > hand, we cannot really pin in interrupt time :( > > v3: Use a mutex rather than atomic_t to protect pin count to avoid races. > Do not unpin default context in free_request. > > v4: Break out pin and unpin into functions. Fix style problems reported > by checkpatch > > v5: Remove unpin_lock as all pinning and unpinning is done with the struct > mutex already locked. Add WARN_ONs to make sure this is the case in > future. > > Issue: VIZ-4277 > Signed-off-by: Oscar Mateo <oscar.ma...@intel.com> > Signed-off-by: Thomas Daniel <thomas.dan...@intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 12 +++++- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++------- > drivers/gpu/drm/i915/intel_lrc.c | 69 > +++++++++++++++++++++++++++++------ > drivers/gpu/drm/i915/intel_lrc.h | 4 ++ > 5 files changed, 98 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > b/drivers/gpu/drm/i915/i915_debugfs.c > index e60d5c2..6eaf813 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void > *unused) > continue; > > if (ctx_obj) { > - struct page *page = > i915_gem_object_get_page(ctx_obj, 1); > - uint32_t *reg_state = kmap_atomic(page); > + struct page *page; > + uint32_t *reg_state; > int j; > > + i915_gem_obj_ggtt_pin(ctx_obj, > + GEN8_LR_CONTEXT_ALIGN, 0); > + > + page = i915_gem_object_get_page(ctx_obj, > 1); > + reg_state = kmap_atomic(page); > + > seq_printf(m, "CONTEXT: %s %u\n", > ring->name, > > intel_execlists_ctx_id(ctx_obj)); > > @@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void > *unused) > } > kunmap_atomic(reg_state); > > + i915_gem_object_ggtt_unpin(ctx_obj); > + > seq_putc(m, '\n'); > } > } > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index 059330c..3c7299d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -655,6 +655,7 @@ struct intel_context { > struct { > struct drm_i915_gem_object *state; > struct intel_ringbuffer *ringbuf; > + int unpin_count; > } engine[I915_NUM_RINGS]; > > struct list_head link; > diff --git a/drivers/gpu/drm/i915/i915_gem.c > b/drivers/gpu/drm/i915/i915_gem.c > index 408afe7..2ee6996 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct > drm_i915_private *dev_priv, > > static void i915_gem_free_request(struct drm_i915_gem_request *request) > { > + struct intel_context *ctx = request->ctx; > + > list_del(&request->list); > i915_gem_request_remove_from_client(request); > > - if (request->ctx) > - i915_gem_context_unreference(request->ctx); > + if (i915.enable_execlists && ctx) { > + struct intel_engine_cs *ring = request->ring; > > + if (ctx != ring->default_context) > + intel_lr_context_unpin(ring, ctx); > + i915_gem_context_unreference(ctx); > + } > kfree(request); > } > > @@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct > drm_i915_private *dev_priv, > } > > /* > + * Clear the execlists queue up before freeing the requests, as > those > + * are the ones that keep the context and ringbuffer backing > objects > + * pinned in place. > + */ > + while (!list_empty(&ring->execlist_queue)) { > + struct intel_ctx_submit_request *submit_req; > + > + submit_req = list_first_entry(&ring->execlist_queue, > + struct intel_ctx_submit_request, > + execlist_link); > + list_del(&submit_req->execlist_link); > + intel_runtime_pm_put(dev_priv); > + i915_gem_context_unreference(submit_req->ctx); > + kfree(submit_req); > + } > + > + /* > * We must free the requests after all the corresponding objects > have > * been moved off active lists. Which is the same order as the > normal > * retire_requests function does. This is important if object hold > @@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct > drm_i915_private *dev_priv, > i915_gem_free_request(request); > } > > - while (!list_empty(&ring->execlist_queue)) { > - struct intel_ctx_submit_request *submit_req; > - > - submit_req = list_first_entry(&ring->execlist_queue, > - struct intel_ctx_submit_request, > - execlist_link); > - list_del(&submit_req->execlist_link); > - intel_runtime_pm_put(dev_priv); > - i915_gem_context_unreference(submit_req->ctx); > - kfree(submit_req); > - } > - > /* These may not have been flush before the reset, do so now */ > kfree(ring->preallocated_lazy_request); > ring->preallocated_lazy_request = NULL; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index 906b985..f7fa0f7 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -139,8 +139,6 @@ > #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) > > -#define GEN8_LR_CONTEXT_ALIGN 4096 > - > #define RING_EXECLIST_QFULL (1 << 0x2) > #define RING_EXECLIST1_VALID (1 << 0x3) > #define RING_EXECLIST0_VALID (1 << 0x4) > @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct > intel_ringbuffer *ringbuf) > execlists_context_queue(ring, ctx, ringbuf->tail); > } > > +static int intel_lr_context_pin(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > + int ret = 0; > + > + WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > + if (ctx->engine[ring->id].unpin_count++ == 0) { > + ret = i915_gem_obj_ggtt_pin(ctx_obj, > + GEN8_LR_CONTEXT_ALIGN, 0); > + if (ret) > + ctx->engine[ring->id].unpin_count = 0; > + } > + > + return ret; > +} > + > +void intel_lr_context_unpin(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > + > + if (ctx_obj) { > + WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > + if (--ctx->engine[ring->id].unpin_count == 0) > + i915_gem_object_ggtt_unpin(ctx_obj); > + } > +} > + > static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > struct intel_context *ctx) > { > + int ret; > + > if (ring->outstanding_lazy_seqno) > return 0; > > @@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct > intel_engine_cs *ring, > if (request == NULL) > return -ENOMEM; > > + if (ctx != ring->default_context) { > + ret = intel_lr_context_pin(ring, ctx); > + if (ret) { > + kfree(request); > + return ret; > + } > + } > + > /* Hold a reference to the context this request belongs to > * (we will need it when the time comes to emit/retire the > * request). > @@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context > *ctx) > > for (i = 0; i < I915_NUM_RINGS; i++) { > struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; > - struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; > > if (ctx_obj) { > + struct intel_ringbuffer *ringbuf = > + ctx->engine[i].ringbuf; > + struct intel_engine_cs *ring = ringbuf->ring; > + > intel_destroy_ringbuffer_obj(ringbuf); > kfree(ringbuf); > - i915_gem_object_ggtt_unpin(ctx_obj); > + if (ctx == ring->default_context) > + i915_gem_object_ggtt_unpin(ctx_obj); > drm_gem_object_unreference(&ctx_obj->base); > } > } > @@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct > intel_engine_cs *ring, > int intel_lr_context_deferred_create(struct intel_context *ctx, > struct intel_engine_cs *ring) > { > + const bool is_global_default_ctx = (ctx == ring->default_context); > struct drm_device *dev = ring->dev; > struct drm_i915_gem_object *ctx_obj; > uint32_t context_size; > @@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct > intel_context *ctx, > return ret; > } > > - ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); > - if (ret) { > - DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); > - drm_gem_object_unreference(&ctx_obj->base); > - return ret; > + if (is_global_default_ctx) { > + ret = i915_gem_obj_ggtt_pin(ctx_obj, > GEN8_LR_CONTEXT_ALIGN, 0); > + if (ret) { > + DRM_DEBUG_DRIVER("Pin LRC backing obj failed: > %d\n", > + ret); > + drm_gem_object_unreference(&ctx_obj->base); > + return ret; > + } > } > > ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); > if (!ringbuf) { > DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", > ring->name); > - i915_gem_object_ggtt_unpin(ctx_obj); > + if (is_global_default_ctx) > + i915_gem_object_ggtt_unpin(ctx_obj); > drm_gem_object_unreference(&ctx_obj->base); > ret = -ENOMEM; > return ret; > @@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct > intel_context *ctx, > > error: > kfree(ringbuf); > - i915_gem_object_ggtt_unpin(ctx_obj); > + if (is_global_default_ctx) > + i915_gem_object_ggtt_unpin(ctx_obj); > drm_gem_object_unreference(&ctx_obj->base); > return ret; > } > diff --git a/drivers/gpu/drm/i915/intel_lrc.h > b/drivers/gpu/drm/i915/intel_lrc.h > index 84bbf19..14b216b 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -24,6 +24,8 @@ > #ifndef _INTEL_LRC_H_ > #define _INTEL_LRC_H_ > > +#define GEN8_LR_CONTEXT_ALIGN 4096 > + > /* Execlists regs */ > #define RING_ELSP(ring) ((ring)->mmio_base+0x230) > #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) > @@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct > intel_engine_cs *ring, > void intel_lr_context_free(struct intel_context *ctx); > int intel_lr_context_deferred_create(struct intel_context *ctx, > struct intel_engine_cs *ring); > +void intel_lr_context_unpin(struct intel_engine_cs *ring, > + struct intel_context *ctx); > > /* Execlists */ > int intel_sanitize_enable_execlists(struct drm_device *dev, int > enable_execlists); > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
_______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx