i915: Make sure execbuffer always passes ww state to i915_vma_pin.

Maarten Lankhorst Wed, 19 Aug 2020 04:55:55 -0700

Op 12-08-2020 om 21:32 schreef Thomas Hellström (Intel):
>
> On 8/10/20 12:30 PM, Maarten Lankhorst wrote:
>> As a preparation step for full object locking and wait/wound handling
>> during pin and object mapping, ensure that we always pass the ww context
>> in i915_gem_execbuffer.c to i915_vma_pin, use lockdep to ensure this
>> happens.
>>
>> This also requires changing the order of eb_parse slightly, to ensure
>> we pass ww at a point where we could still handle -EDEADLK safely.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankho...@linux.intel.com>
>
> I'm a bit curious as how we handle the lifetime on the contending locks since 
> we often return through the call tree before doing the ww transaction 
> relaxation  (the slow lock). Has that been a problem?
>
>
>> ---
>>   drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 140 ++++++++++--------
>>   .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +-
>>   drivers/gpu/drm/i915/gt/gen6_ppgtt.c          |   4 +-
>>   drivers/gpu/drm/i915/gt/gen6_ppgtt.h          |   4 +-
>>   drivers/gpu/drm/i915/gt/intel_context.c       |  65 +++++---
>>   drivers/gpu/drm/i915/gt/intel_context.h       |  13 ++
>>   drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +-
>>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
>>   drivers/gpu/drm/i915/gt/intel_gt.c            |   2 +-
>>   drivers/gpu/drm/i915/gt/intel_lrc.c           |   5 +-
>>   drivers/gpu/drm/i915/gt/intel_renderstate.c   |   2 +-
>>   drivers/gpu/drm/i915/gt/intel_ring.c          |  10 +-
>>   drivers/gpu/drm/i915/gt/intel_ring.h          |   3 +-
>>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  15 +-
>>   drivers/gpu/drm/i915/gt/intel_timeline.c      |  12 +-
>>   drivers/gpu/drm/i915/gt/intel_timeline.h      |   3 +-
>>   drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +-
>>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |   2 +-
>>   drivers/gpu/drm/i915/gt/selftest_timeline.c   |   4 +-
>>   drivers/gpu/drm/i915/gt/uc/intel_guc.c        |   2 +-
>>   drivers/gpu/drm/i915/i915_drv.h               |  13 +-
>>   drivers/gpu/drm/i915/i915_gem.c               |  11 +-
>>   drivers/gpu/drm/i915/i915_vma.c               |  13 +-
>>   drivers/gpu/drm/i915/i915_vma.h               |  13 +-
>>   26 files changed, 217 insertions(+), 137 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
>> b/drivers/gpu/drm/i915/display/intel_display.c
>> index 5b4434289117..aa5a88340d10 100644
>> --- a/drivers/gpu/drm/i915/display/intel_display.c
>> +++ b/drivers/gpu/drm/i915/display/intel_display.c
>> @@ -3451,7 +3451,7 @@ initial_plane_vma(struct drm_i915_private *i915,
>>       if (IS_ERR(vma))
>>           goto err_obj;
>>   -    if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
>> +    if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
>>           goto err_obj;
>>         if (i915_gem_object_is_tiled(obj) &&
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index 34c8b0dd85e0..cf5ecbde9e06 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -1154,7 +1154,7 @@ static int context_barrier_task(struct 
>> i915_gem_context *ctx,
>>             i915_gem_ww_ctx_init(&ww, true);
>>   retry:
>> -        err = intel_context_pin(ce);
>> +        err = intel_context_pin_ww(ce, &ww);
>>           if (err)
>>               goto err;
>>   @@ -1247,7 +1247,7 @@ static int pin_ppgtt_update(struct intel_context 
>> *ce, struct i915_gem_ww_ctx *ww
>>         if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
>>           /* ppGTT is not part of the legacy context image */
>> -        return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
>> +        return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
>>         return 0;
>>   }
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 604e26adea23..94bfdc54f035 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -437,16 +437,17 @@ eb_pin_vma(struct i915_execbuffer *eb,
>>           pin_flags |= PIN_GLOBAL;
>>         /* Attempt to reuse the current location if available */
>> -    if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
>> +    /* TODO: Add -EDEADLK handling here */
>> +    if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
>>           if (entry->flags & EXEC_OBJECT_PINNED)
>>               return false;
>>             /* Failing that pick any _free_ space if suitable */
>> -        if (unlikely(i915_vma_pin(vma,
>> -                      entry->pad_to_size,
>> -                      entry->alignment,
>> -                      eb_pin_flags(entry, ev->flags) |
>> -                      PIN_USER | PIN_NOEVICT)))
>> +        if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
>> +                         entry->pad_to_size,
>> +                         entry->alignment,
>> +                         eb_pin_flags(entry, ev->flags) |
>> +                         PIN_USER | PIN_NOEVICT)))
>>               return false;
>>       }
>>   @@ -587,7 +588,7 @@ static inline int use_cpu_reloc(const struct 
>> reloc_cache *cache,
>>           obj->cache_level != I915_CACHE_NONE);
>>   }
>>   -static int eb_reserve_vma(const struct i915_execbuffer *eb,
>> +static int eb_reserve_vma(struct i915_execbuffer *eb,
>>                 struct eb_vma *ev,
>>                 u64 pin_flags)
>>   {
>> @@ -602,7 +603,7 @@ static int eb_reserve_vma(const struct i915_execbuffer 
>> *eb,
>>               return err;
>>       }
>>   -    err = i915_vma_pin(vma,
>> +    err = i915_vma_pin_ww(vma, &eb->ww,
>>                  entry->pad_to_size, entry->alignment,
>>                  eb_pin_flags(entry, ev->flags) | pin_flags);
>>       if (err)
>> @@ -1133,9 +1134,10 @@ static void *reloc_kmap(struct drm_i915_gem_object 
>> *obj,
>>   }
>>     static void *reloc_iomap(struct drm_i915_gem_object *obj,
>> -             struct reloc_cache *cache,
>> +             struct i915_execbuffer *eb,
>>                unsigned long page)
>>   {
>> +    struct reloc_cache *cache = &eb->reloc_cache;
>>       struct i915_ggtt *ggtt = cache_to_ggtt(cache);
>>       unsigned long offset;
>>       void *vaddr;
>> @@ -1157,10 +1159,13 @@ static void *reloc_iomap(struct drm_i915_gem_object 
>> *obj,
>>           if (err)
>>               return ERR_PTR(err);
>>   -        vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>> -                           PIN_MAPPABLE |
>> -                           PIN_NONBLOCK /* NOWARN */ |
>> -                           PIN_NOEVICT);
>> +        vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
>> +                          PIN_MAPPABLE |
>> +                          PIN_NONBLOCK /* NOWARN */ |
>> +                          PIN_NOEVICT);
>> +        if (vma == ERR_PTR(-EDEADLK))
>> +            return vma;
>> +
>>           if (IS_ERR(vma)) {
>>               memset(&cache->node, 0, sizeof(cache->node));
>>               mutex_lock(&ggtt->vm.mutex);
>> @@ -1196,9 +1201,10 @@ static void *reloc_iomap(struct drm_i915_gem_object 
>> *obj,
>>   }
>>     static void *reloc_vaddr(struct drm_i915_gem_object *obj,
>> -             struct reloc_cache *cache,
>> +             struct i915_execbuffer *eb,
>>                unsigned long page)
>>   {
>> +    struct reloc_cache *cache = &eb->reloc_cache;
>>       void *vaddr;
>>         if (cache->page == page) {
>> @@ -1206,7 +1212,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object 
>> *obj,
>>       } else {
>>           vaddr = NULL;
>>           if ((cache->vaddr & KMAP) == 0)
>> -            vaddr = reloc_iomap(obj, cache, page);
>> +            vaddr = reloc_iomap(obj, eb, page);
>>           if (!vaddr)
>>               vaddr = reloc_kmap(obj, cache, page);
>>       }
>> @@ -1293,7 +1299,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer 
>> *eb,
>>           goto err_unmap;
>>       }
>>   -    err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
>> +    err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
>>       if (err)
>>           goto err_unmap;
>>   @@ -1314,7 +1320,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer 
>> *eb,
>>               eb->reloc_context = ce;
>>           }
>>   -        err = intel_context_pin(ce);
>> +        err = intel_context_pin_ww(ce, &eb->ww);
>>           if (err)
>>               goto err_unpin;
>>   @@ -1537,8 +1543,7 @@ relocate_entry(struct i915_vma *vma,
>>           void *vaddr;
>>     repeat:
>> -        vaddr = reloc_vaddr(vma->obj,
>> -                    &eb->reloc_cache,
>> +        vaddr = reloc_vaddr(vma->obj, eb,
>>                       offset >> PAGE_SHIFT);
>>           if (IS_ERR(vaddr))
>>               return PTR_ERR(vaddr);
>> @@ -1954,6 +1959,7 @@ static noinline int eb_relocate_parse_slow(struct 
>> i915_execbuffer *eb,
>>       rq = eb_pin_engine(eb, false);
>>       if (IS_ERR(rq)) {
>>           err = PTR_ERR(rq);
>> +        rq = NULL;
>>           goto err;
>>       }
>>   @@ -2238,7 +2244,8 @@ static int i915_reset_gen7_sol_offsets(struct 
>> i915_request *rq)
>>   }
>>     static struct i915_vma *
>> -shadow_batch_pin(struct drm_i915_gem_object *obj,
>> +shadow_batch_pin(struct i915_execbuffer *eb,
>> +         struct drm_i915_gem_object *obj,
>>            struct i915_address_space *vm,
>>            unsigned int flags)
>>   {
>> @@ -2249,7 +2256,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj,
>>       if (IS_ERR(vma))
>>           return vma;
>>   -    err = i915_vma_pin(vma, 0, 0, flags);
>> +    err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
>>       if (err)
>>           return ERR_PTR(err);
>>   @@ -2403,16 +2410,33 @@ static int eb_parse_pipeline(struct 
>> i915_execbuffer *eb,
>>       return err;
>>   }
>>   +static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, 
>> struct i915_vma *vma)
>> +{
>> +    /*
>> +     * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>> +     * batch" bit. Hence we need to pin secure batches into the global gtt.
>> +     * hsw should have this fixed, but bdw mucks it up again. */
>> +    if (eb->batch_flags & I915_DISPATCH_SECURE)
>> +        return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 
>> 0);
>> +
>> +    return NULL;
>> +}
>> +
>>   static int eb_parse(struct i915_execbuffer *eb)
>>   {
>>       struct drm_i915_private *i915 = eb->i915;
>>       struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
>> -    struct i915_vma *shadow, *trampoline;
>> +    struct i915_vma *shadow, *trampoline, *batch;
>>       unsigned int len;
>>       int err;
>>   -    if (!eb_use_cmdparser(eb))
>> -        return 0;
>> +    if (!eb_use_cmdparser(eb)) {
>> +        batch = eb_dispatch_secure(eb, eb->batch->vma);
>> +        if (IS_ERR(batch))
>> +            return PTR_ERR(batch);
>> +
>> +        goto secure_batch;
>> +    }
>>         len = eb->batch_len;
>>       if (!CMDPARSER_USES_GGTT(eb->i915)) {
>> @@ -2440,7 +2464,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>>       if (err)
>>           goto err;
>>   -    shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
>> +    shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
>>       if (IS_ERR(shadow)) {
>>           err = PTR_ERR(shadow);
>>           goto err;
>> @@ -2452,7 +2476,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>>       if (CMDPARSER_USES_GGTT(eb->i915)) {
>>           trampoline = shadow;
>>   -        shadow = shadow_batch_pin(pool->obj,
>> +        shadow = shadow_batch_pin(eb, pool->obj,
>>                         &eb->engine->gt->ggtt->vm,
>>                         PIN_GLOBAL);
>>           if (IS_ERR(shadow)) {
>> @@ -2465,19 +2489,34 @@ static int eb_parse(struct i915_execbuffer *eb)
>>           eb->batch_flags |= I915_DISPATCH_SECURE;
>>       }
>>   +    batch = eb_dispatch_secure(eb, shadow);
>> +    if (IS_ERR(batch)) {
>> +        err = PTR_ERR(batch);
>> +        goto err_trampoline;
>> +    }
>> +
>>       err = eb_parse_pipeline(eb, shadow, trampoline);
>>       if (err)
>> -        goto err_trampoline;
>> +        goto err_unpin_batch;
>>   -    eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
>> -    eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
>>       eb->batch = &eb->vma[eb->buffer_count++];
>> +    eb->batch->vma = i915_vma_get(shadow);
>> +    eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
>>         eb->trampoline = trampoline;
>>       eb->batch_start_offset = 0;
>>   +secure_batch:
>> +    if (batch) {
>> +        eb->batch = &eb->vma[eb->buffer_count++];
>> +        eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
>> +        eb->batch->vma = i915_vma_get(batch);
>> +    }
>>       return 0;
>>   +err_unpin_batch:
>> +    if (batch)
>> +        i915_vma_unpin(batch);
>>   err_trampoline:
>>       if (trampoline)
>>           i915_vma_unpin(trampoline);
>> @@ -2619,7 +2658,7 @@ static struct i915_request *eb_pin_engine(struct 
>> i915_execbuffer *eb, bool throt
>>        * GGTT space, so do this first before we reserve a seqno for
>>        * ourselves.
>>        */
>> -    err = intel_context_pin(ce);
>> +    err = intel_context_pin_ww(ce, &eb->ww);
>>       if (err)
>>           return ERR_PTR(err);
>>   @@ -3237,33 +3276,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>         ww_acquire_done(&eb.ww.ctx);
>>   -    /*
>> -     * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>> -     * batch" bit. Hence we need to pin secure batches into the global gtt.
>> -     * hsw should have this fixed, but bdw mucks it up again. */
>> -    if (eb.batch_flags & I915_DISPATCH_SECURE) {
>> -        struct i915_vma *vma;
>> -
>> -        /*
>> -         * So on first glance it looks freaky that we pin the batch here
>> -         * outside of the reservation loop. But:
>> -         * - The batch is already pinned into the relevant ppgtt, so we
>> -         *   already have the backing storage fully allocated.
>> -         * - No other BO uses the global gtt (well contexts, but meh),
>> -         *   so we don't really have issues with multiple objects not
>> -         *   fitting due to fragmentation.
>> -         * So this is actually safe.
>> -         */
>> -        vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>> -        if (IS_ERR(vma)) {
>> -            err = PTR_ERR(vma);
>> -            goto err_vma;
>> -        }
>> -
>> -        batch = vma;
>> -    } else {
>> -        batch = eb.batch->vma;
>> -    }
>> +    batch = eb.batch->vma;
>>         /* All GPU relocation batches must be submitted prior to the user rq 
>> */
>>       GEM_BUG_ON(eb.reloc_cache.rq);
>> @@ -3272,7 +3285,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>       eb.request = i915_request_create(eb.context);
>>       if (IS_ERR(eb.request)) {
>>           err = PTR_ERR(eb.request);
>> -        goto err_batch_unpin;
>> +        goto err_vma;
>>       }
>>         if (in_fence) {
>> @@ -3333,9 +3346,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>       }
>>       i915_request_put(eb.request);
>>   -err_batch_unpin:
>> -    if (eb.batch_flags & I915_DISPATCH_SECURE)
>> -        i915_vma_unpin(batch);
>>   err_vma:
>>       eb_release_vmas(&eb, true);
>>       if (eb.trampoline)
>> @@ -3423,7 +3433,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void 
>> *data,
>>       /* Copy in the exec list from userland */
>>       exec_list = kvmalloc_array(count, sizeof(*exec_list),
>>                      __GFP_NOWARN | GFP_KERNEL);
>> -    exec2_list = kvmalloc_array(count + 1, eb_element_size(),
>> +
>> +    /* Allocate extra slots for use by the command parser */
>> +    exec2_list = kvmalloc_array(count + 2, eb_element_size(),
>>                       __GFP_NOWARN | GFP_KERNEL);
>>       if (exec_list == NULL || exec2_list == NULL) {
>>           drm_dbg(&i915->drm,
>> @@ -3500,8 +3512,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, 
>> void *data,
>>       if (err)
>>           return err;
>>   -    /* Allocate an extra slot for use by the command parser */
>> -    exec2_list = kvmalloc_array(count + 1, eb_element_size(),
>> +    /* Allocate extra slots for use by the command parser */
>> +    exec2_list = kvmalloc_array(count + 2, eb_element_size(),
>>                       __GFP_NOWARN | GFP_KERNEL);
>>       if (exec2_list == NULL) {
>>           drm_dbg(&i915->drm, "Failed to allocate exec list for %zd 
>> buffers\n",
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
>> index 563839cbaf1c..e1d50a5a1477 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
>> @@ -36,7 +36,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
>>       if (err)
>>           return err;
>>   -    err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
>> +    err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
>>       if (err)
>>           return err;
>>   @@ -139,7 +139,7 @@ static int igt_gpu_reloc(void *arg)
>>             i915_gem_ww_ctx_init(&eb.ww, false);
>>   retry:
>> -        err = intel_context_pin(eb.context);
>> +        err = intel_context_pin_ww(eb.context, &eb.ww);
>>           if (!err) {
>>               err = __igt_gpu_reloc(&eb, scratch);
>>   diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
>> b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> index 7e5a86b774a7..fd0d24d28763 100644
>> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> @@ -368,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt 
>> *ppgtt, int size)
>>       return vma;
>>   }
>>   -int gen6_ppgtt_pin(struct i915_ppgtt *base)
>> +int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
>>   {
>>       struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
>>       int err;
>> @@ -394,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
>>        */
>>       err = 0;
>>       if (!atomic_read(&ppgtt->pin_count))
>> -        err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
>> +        err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
>>       if (!err)
>>           atomic_inc(&ppgtt->pin_count);
>>       mutex_unlock(&ppgtt->pin_mutex);
>> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h 
>> b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
>> index 7249672e5802..3357228f3304 100644
>> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
>> @@ -8,6 +8,8 @@
>>     #include "intel_gtt.h"
>>   +struct i915_gem_ww_ctx;
>> +
>>   struct gen6_ppgtt {
>>       struct i915_ppgtt base;
>>   @@ -67,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct 
>> i915_ppgtt *base)
>>                (pt = i915_pt_entry(pd, iter), true);        \
>>            ++iter)
>>   -int gen6_ppgtt_pin(struct i915_ppgtt *base);
>> +int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
>>   void gen6_ppgtt_unpin(struct i915_ppgtt *base);
>>   void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
>>   void gen6_ppgtt_enable(struct intel_gt *gt);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
>> b/drivers/gpu/drm/i915/gt/intel_context.c
>> index efe9a7a89ede..c05ef213bdc2 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -93,12 +93,12 @@ static void intel_context_active_release(struct 
>> intel_context *ce)
>>       i915_active_release(&ce->active);
>>   }
>>   -static int __context_pin_state(struct i915_vma *vma)
>> +static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx 
>> *ww)
>>   {
>>       unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
>>       int err;
>>   -    err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
>> +    err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
>>       if (err)
>>           return err;
>>   @@ -127,11 +127,12 @@ static void __context_unpin_state(struct i915_vma 
>> *vma)
>>       __i915_vma_unpin(vma);
>>   }
>>   -static int __ring_active(struct intel_ring *ring)
>> +static int __ring_active(struct intel_ring *ring,
>> +             struct i915_gem_ww_ctx *ww)
>>   {
>>       int err;
>>   -    err = intel_ring_pin(ring);
>> +    err = intel_ring_pin(ring, ww);
>>       if (err)
>>           return err;
>>   @@ -152,24 +153,25 @@ static void __ring_retire(struct intel_ring *ring)
>>       intel_ring_unpin(ring);
>>   }
>>   -static int intel_context_pre_pin(struct intel_context *ce)
>> +static int intel_context_pre_pin(struct intel_context *ce,
>> +                 struct i915_gem_ww_ctx *ww)
>>   {
>>       int err;
>>         CE_TRACE(ce, "active\n");
>>   -    err = __ring_active(ce->ring);
>> +    err = __ring_active(ce->ring, ww);
>>       if (err)
>>           return err;
>>   -    err = intel_timeline_pin(ce->timeline);
>> +    err = intel_timeline_pin(ce->timeline, ww);
>>       if (err)
>>           goto err_ring;
>>         if (!ce->state)
>>           return 0;
>>   -    err = __context_pin_state(ce->state);
>> +    err = __context_pin_state(ce->state, ww);
>>       if (err)
>>           goto err_timeline;
>>   @@ -192,7 +194,8 @@ static void intel_context_post_unpin(struct 
>> intel_context *ce)
>>       __ring_retire(ce->ring);
>>   }
>>   -int __intel_context_do_pin(struct intel_context *ce)
>> +int __intel_context_do_pin_ww(struct intel_context *ce,
>> +                  struct i915_gem_ww_ctx *ww)
>>   {
>>       bool handoff = false;
>>       void *vaddr;
>> @@ -209,7 +212,14 @@ int __intel_context_do_pin(struct intel_context *ce)
>>        * refcount for __intel_context_active(), which prevent a lock
>>        * inversion of ce->pin_mutex vs dma_resv_lock().
>>        */
>> -    err = intel_context_pre_pin(ce);
>> +
>> +    err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
>
> Since hwsp_ggtt->obj is a shared gem object due to sub-allocation, holding 
> this lock across execbuf unnecessarily stalls submission of other clients 
> that share the same suballocation slab. Since it's pinned using a pin-count 
> rather than using a dma-fence, it should be completely safe to drop this lock 
> before returning zero from this function. However if we in the future move to 
> protecting the residency with the request dma-fence we can no longer drop it 
> here, since we don't have that dma-fence yet.
>
> An alternative brought up by Daniel would be to revert the commit that 
> introduces the hwsp cacheline suballocation.
I'm removing the cacheline at the end of the next series, but I need to figure 
out why booting fails still on legacy platforms, otherwise happy with the 
solution. Even rollover works as expected, without a requirement of 
i915_vma_pin.
>
>> +    if (!err && ce->ring->vma->obj)
>> +        err = i915_gem_object_lock(ce->ring->vma->obj, ww);
>> +    if (!err && ce->state)
>> +        err = i915_gem_object_lock(ce->state->obj, ww);
>
> Could these three locks be made interruptible?
They already are, when ww->interruptible is set. :-)
>
>> +int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>> +          u32 align, unsigned int flags);
>>     static inline int i915_vma_pin_count(const struct i915_vma *vma)
>>   {
>
> /Thomas
>
>


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 15/24] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.

Reply via email to