Reviewed the patch & it looks fine. Reviewed-by: "Akash Goel <akash.go...@gmail.com>"
On Thu, Nov 13, 2014 at 3:58 PM, Thomas Daniel <thomas.dan...@intel.com> wrote: > Same as with the context, pinning to GGTT regardless is harmful (it > badly fragments the GGTT and can even exhaust it). > > Unfortunately, this case is also more complex than the previous one > because we need to map and access the ringbuffer in several places > along the execbuffer path (and we cannot make do by leaving the > default ringbuffer pinned, as before). Also, the context object > itself contains a pointer to the ringbuffer address that we have to > keep updated if we are going to allow the ringbuffer to move around. > > v2: Same as with the context pinning, we cannot really do it during > an interrupt. Also, pin the default ringbuffers objects regardless > (makes error capture a lot easier). > > v3: Rebased. Take a pin reference of the ringbuffer for each item > in the execlist request queue because the hardware may still be using > the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update > is executed. The ringbuffer must remain pinned until the context save > is complete. No longer pin and unpin ringbuffer in > populate_lr_context() - this transient address is meaningless and the > pinning can cause a sleep while atomic. > > v4: Moved ringbuffer pin and unpin into the lr_context_pin functions. > Downgraded pinning check BUG_ONs to WARN_ONs. > > v5: Reinstated WARN_ONs for unexpected execlist states. Removed unused > variable. > > Issue: VIZ-4277 > Signed-off-by: Oscar Mateo <oscar.ma...@intel.com> > Signed-off-by: Thomas Daniel <thomas.dan...@intel.com> > --- > drivers/gpu/drm/i915/intel_lrc.c | 102 > +++++++++++++++++++++++-------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 85 +++++++++++++++----------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + > 3 files changed, 128 insertions(+), 62 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index f7fa0f7..ca20f91 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -202,6 +202,9 @@ enum { > }; > #define GEN8_CTX_ID_SHIFT 32 > > +static int intel_lr_context_pin(struct intel_engine_cs *ring, > + struct intel_context *ctx); > + > /** > * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists > * @dev: DRM device. > @@ -339,7 +342,9 @@ static void execlists_elsp_write(struct > intel_engine_cs *ring, > spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); > } > > -static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, > u32 tail) > +static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, > + struct drm_i915_gem_object *ring_obj, > + u32 tail) > { > struct page *page; > uint32_t *reg_state; > @@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct > drm_i915_gem_object *ctx_obj, u32 tai > reg_state = kmap_atomic(page); > > reg_state[CTX_RING_TAIL+1] = tail; > + reg_state[CTX_RING_BUFFER_START+1] = > i915_gem_obj_ggtt_offset(ring_obj); > > kunmap_atomic(reg_state); > > @@ -358,21 +364,25 @@ static int execlists_submit_context(struct > intel_engine_cs *ring, > struct intel_context *to0, u32 tail0, > struct intel_context *to1, u32 tail1) > { > - struct drm_i915_gem_object *ctx_obj0; > + struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state; > + struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf; > struct drm_i915_gem_object *ctx_obj1 = NULL; > + struct intel_ringbuffer *ringbuf1 = NULL; > > - ctx_obj0 = to0->engine[ring->id].state; > BUG_ON(!ctx_obj0); > WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0)); > + WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj)); > > - execlists_ctx_write_tail(ctx_obj0, tail0); > + execlists_update_context(ctx_obj0, ringbuf0->obj, tail0); > > if (to1) { > + ringbuf1 = to1->engine[ring->id].ringbuf; > ctx_obj1 = to1->engine[ring->id].state; > BUG_ON(!ctx_obj1); > WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1)); > + WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj)); > > - execlists_ctx_write_tail(ctx_obj1, tail1); > + execlists_update_context(ctx_obj1, ringbuf1->obj, tail1); > } > > execlists_elsp_write(ring, ctx_obj0, ctx_obj1); > @@ -524,6 +534,10 @@ static int execlists_context_queue(struct > intel_engine_cs *ring, > return -ENOMEM; > req->ctx = to; > i915_gem_context_reference(req->ctx); > + > + if (to != ring->default_context) > + intel_lr_context_pin(ring, to); > + > req->ring = ring; > req->tail = tail; > > @@ -544,7 +558,7 @@ static int execlists_context_queue(struct > intel_engine_cs *ring, > > if (to == tail_req->ctx) { > WARN(tail_req->elsp_submitted != 0, > - "More than 2 already-submitted reqs > queued\n"); > + "More than 2 already-submitted reqs > queued\n"); > list_del(&tail_req->execlist_link); > list_add_tail(&tail_req->execlist_link, > &ring->execlist_retired_req_list); > @@ -732,6 +746,12 @@ void intel_execlists_retire_requests(struct > intel_engine_cs *ring) > spin_unlock_irqrestore(&ring->execlist_lock, flags); > > list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { > + struct intel_context *ctx = req->ctx; > + struct drm_i915_gem_object *ctx_obj = > + ctx->engine[ring->id].state; > + > + if (ctx_obj && (ctx != ring->default_context)) > + intel_lr_context_unpin(ring, ctx); > intel_runtime_pm_put(dev_priv); > i915_gem_context_unreference(req->ctx); > list_del(&req->execlist_link); > @@ -803,6 +823,7 @@ static int intel_lr_context_pin(struct intel_engine_cs > *ring, > struct intel_context *ctx) > { > struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > int ret = 0; > > WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > @@ -810,21 +831,35 @@ static int intel_lr_context_pin(struct > intel_engine_cs *ring, > ret = i915_gem_obj_ggtt_pin(ctx_obj, > GEN8_LR_CONTEXT_ALIGN, 0); > if (ret) > - ctx->engine[ring->id].unpin_count = 0; > + goto reset_unpin_count; > + > + ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); > + if (ret) > + goto unpin_ctx_obj; > } > > return ret; > + > +unpin_ctx_obj: > + i915_gem_object_ggtt_unpin(ctx_obj); > +reset_unpin_count: > + ctx->engine[ring->id].unpin_count = 0; > + > + return ret; > } > > void intel_lr_context_unpin(struct intel_engine_cs *ring, > struct intel_context *ctx) > { > struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > > if (ctx_obj) { > WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > - if (--ctx->engine[ring->id].unpin_count == 0) > + if (--ctx->engine[ring->id].unpin_count == 0) { > + intel_unpin_ringbuffer_obj(ringbuf); > i915_gem_object_ggtt_unpin(ctx_obj); > + } > } > } > > @@ -1541,7 +1576,6 @@ populate_lr_context(struct intel_context *ctx, > struct drm_i915_gem_object *ctx_o > { > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > - struct drm_i915_gem_object *ring_obj = ringbuf->obj; > struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; > struct page *page; > uint32_t *reg_state; > @@ -1587,7 +1621,9 @@ populate_lr_context(struct intel_context *ctx, > struct drm_i915_gem_object *ctx_o > reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); > reg_state[CTX_RING_TAIL+1] = 0; > reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); > - reg_state[CTX_RING_BUFFER_START+1] = > i915_gem_obj_ggtt_offset(ring_obj); > + /* Ring buffer start address is not known until the buffer is > pinned. > + * It is written to the context image in execlists_update_context() > + */ > reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); > reg_state[CTX_RING_BUFFER_CONTROL+1] = > ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | > RING_VALID; > @@ -1669,10 +1705,12 @@ void intel_lr_context_free(struct intel_context > *ctx) > ctx->engine[i].ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > > + if (ctx == ring->default_context) { > + intel_unpin_ringbuffer_obj(ringbuf); > + i915_gem_object_ggtt_unpin(ctx_obj); > + } > intel_destroy_ringbuffer_obj(ringbuf); > kfree(ringbuf); > - if (ctx == ring->default_context) > - i915_gem_object_ggtt_unpin(ctx_obj); > drm_gem_object_unreference(&ctx_obj->base); > } > } > @@ -1770,11 +1808,8 @@ int intel_lr_context_deferred_create(struct > intel_context *ctx, > if (!ringbuf) { > DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", > ring->name); > - if (is_global_default_ctx) > - i915_gem_object_ggtt_unpin(ctx_obj); > - drm_gem_object_unreference(&ctx_obj->base); > ret = -ENOMEM; > - return ret; > + goto error_unpin_ctx; > } > > ringbuf->ring = ring; > @@ -1787,22 +1822,30 @@ int intel_lr_context_deferred_create(struct > intel_context *ctx, > ringbuf->space = ringbuf->size; > ringbuf->last_retired_head = -1; > > - /* TODO: For now we put this in the mappable region so that we can > reuse > - * the existing ringbuffer code which ioremaps it. When we start > - * creating many contexts, this will no longer work and we must > switch > - * to a kmapish interface. > - */ > - ret = intel_alloc_ringbuffer_obj(dev, ringbuf); > - if (ret) { > - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: > %d\n", > + if (ringbuf->obj == NULL) { > + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); > + if (ret) { > + DRM_DEBUG_DRIVER( > + "Failed to allocate ringbuffer obj %s: > %d\n", > ring->name, ret); > - goto error; > + goto error_free_rbuf; > + } > + > + if (is_global_default_ctx) { > + ret = intel_pin_and_map_ringbuffer_obj(dev, > ringbuf); > + if (ret) { > + DRM_ERROR( > + "Failed to pin and map ringbuffer > %s: %d\n", > + ring->name, ret); > + goto error_destroy_rbuf; > + } > + } > + > } > > ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf); > if (ret) { > DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); > - intel_destroy_ringbuffer_obj(ringbuf); > goto error; > } > > @@ -1823,7 +1866,6 @@ int intel_lr_context_deferred_create(struct > intel_context *ctx, > DRM_ERROR("Init render state failed: %d\n", ret); > ctx->engine[ring->id].ringbuf = NULL; > ctx->engine[ring->id].state = NULL; > - intel_destroy_ringbuffer_obj(ringbuf); > goto error; > } > ctx->rcs_initialized = true; > @@ -1832,7 +1874,13 @@ int intel_lr_context_deferred_create(struct > intel_context *ctx, > return 0; > > error: > + if (is_global_default_ctx) > + intel_unpin_ringbuffer_obj(ringbuf); > +error_destroy_rbuf: > + intel_destroy_ringbuffer_obj(ringbuf); > +error_free_rbuf: > kfree(ringbuf); > +error_unpin_ctx: > if (is_global_default_ctx) > i915_gem_object_ggtt_unpin(ctx_obj); > drm_gem_object_unreference(&ctx_obj->base); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c > b/drivers/gpu/drm/i915/intel_ringbuffer.c > index a8f72e8..0c4aab1 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct > intel_engine_cs *ring) > return 0; > } > > -void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) > +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) > { > - if (!ringbuf->obj) > - return; > - > iounmap(ringbuf->virtual_start); > + ringbuf->virtual_start = NULL; > i915_gem_object_ggtt_unpin(ringbuf->obj); > +} > + > +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, > + struct intel_ringbuffer *ringbuf) > +{ > + struct drm_i915_private *dev_priv = to_i915(dev); > + struct drm_i915_gem_object *obj = ringbuf->obj; > + int ret; > + > + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); > + if (ret) > + return ret; > + > + ret = i915_gem_object_set_to_gtt_domain(obj, true); > + if (ret) { > + i915_gem_object_ggtt_unpin(obj); > + return ret; > + } > + > + ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base + > + i915_gem_obj_ggtt_offset(obj), ringbuf->size); > + if (ringbuf->virtual_start == NULL) { > + i915_gem_object_ggtt_unpin(obj); > + return -EINVAL; > + } > + > + return 0; > +} > + > +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) > +{ > drm_gem_object_unreference(&ringbuf->obj->base); > ringbuf->obj = NULL; > } > @@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct > intel_ringbuffer *ringbuf) > int intel_alloc_ringbuffer_obj(struct drm_device *dev, > struct intel_ringbuffer *ringbuf) > { > - struct drm_i915_private *dev_priv = to_i915(dev); > struct drm_i915_gem_object *obj; > - int ret; > - > - if (ringbuf->obj) > - return 0; > > obj = NULL; > if (!HAS_LLC(dev)) > @@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device > *dev, > /* mark ring buffers as read-only from GPU side by default */ > obj->gt_ro = 1; > > - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); > - if (ret) > - goto err_unref; > - > - ret = i915_gem_object_set_to_gtt_domain(obj, true); > - if (ret) > - goto err_unpin; > - > - ringbuf->virtual_start = > - ioremap_wc(dev_priv->gtt.mappable_base + > i915_gem_obj_ggtt_offset(obj), > - ringbuf->size); > - if (ringbuf->virtual_start == NULL) { > - ret = -EINVAL; > - goto err_unpin; > - } > - > ringbuf->obj = obj; > - return 0; > > -err_unpin: > - i915_gem_object_ggtt_unpin(obj); > -err_unref: > - drm_gem_object_unreference(&obj->base); > - return ret; > + return 0; > } > > static int intel_init_ring_buffer(struct drm_device *dev, > @@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct > drm_device *dev, > goto error; > } > > - ret = intel_alloc_ringbuffer_obj(dev, ringbuf); > - if (ret) { > - DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", > ring->name, ret); > - goto error; > + if (ringbuf->obj == NULL) { > + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); > + if (ret) { > + DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", > + ring->name, ret); > + goto error; > + } > + > + ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf); > + if (ret) { > + DRM_ERROR("Failed to pin and map ringbuffer %s: > %d\n", > + ring->name, ret); > + intel_destroy_ringbuffer_obj(ringbuf); > + goto error; > + } > } > > /* Workaround an erratum on the i830 which causes a hang if > @@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct > intel_engine_cs *ring) > intel_stop_ring_buffer(ring); > WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) > == 0); > > + intel_unpin_ringbuffer_obj(ringbuf); > intel_destroy_ringbuffer_obj(ringbuf); > ring->preallocated_lazy_request = NULL; > ring->outstanding_lazy_seqno = 0; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h > b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 8c002d2..365854ad 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs *ring, > #define I915_GEM_HWS_SCRATCH_INDEX 0x30 > #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << > MI_STORE_DWORD_INDEX_SHIFT) > > +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf); > +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, > + struct intel_ringbuffer *ringbuf); > void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf); > int intel_alloc_ringbuffer_obj(struct drm_device *dev, > struct intel_ringbuffer *ringbuf); > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
_______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx