On 20/11/2019 15:21, Chris Wilson wrote:
Since retirement may be running in a worker on another CPU, it may be
skipped in the local intel_gt_wait_for_idle(). To ensure the state is
consistent for our sanity checks upon load, serialise with the remote
retirer by waiting on the timeline->mutex.

What may get skipped? Timeline might have been removed from the active list before wait_for_idle? But why would that be bad for this code? It just splits part of the retirement to two paths - wait_for_idle still... oops wait.. indeed.. wait_for_idle used to guarantee retirement and now it can be ongoing. Does wait_for_idle needs to always wait or you think case-by-case basis is better?

Regards,

Tvrtko

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
  drivers/gpu/drm/i915/i915_gem.c | 26 +++++++++++++++++++++++---
  1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1ba5f26700b0..61395b03443e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -45,6 +45,7 @@
  #include "gem/i915_gem_context.h"
  #include "gem/i915_gem_ioctls.h"
  #include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
  #include "gt/intel_engine_user.h"
  #include "gt/intel_gt.h"
  #include "gt/intel_gt_pm.h"
@@ -1041,6 +1042,18 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void 
*data,
        return err;
  }
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+       struct intel_timeline *tl;
+
+       tl = intel_context_timeline_lock(ce);
+       if (IS_ERR(tl))
+               return PTR_ERR(tl);
+
+       intel_context_timeline_unlock(tl);
+       return 0;
+}
+
  static int __intel_engines_record_defaults(struct intel_gt *gt)
  {
        struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -1109,13 +1122,20 @@ static int __intel_engines_record_defaults(struct 
intel_gt *gt)
                if (!rq)
                        continue;
- /* We want to be able to unbind the state from the GGTT */
-               GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
-
+               GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT,
+                                    &rq->hw_context->flags));
                state = rq->hw_context->state;
                if (!state)
                        continue;
+ /* Serialise with retirement on another CPU */
+               err = __intel_context_flush_retire(rq->hw_context);
+               if (err)
+                       goto out;
+
+               /* We want to be able to unbind the state from the GGTT */
+               GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
+
                /*
                 * As we will hold a reference to the logical state, it will
                 * not be torn down with the context, and importantly the

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to