On 31-10-2022 23:16, Matt Roper wrote:
> On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote:
>> On XE_LPM+ platforms the media engines are carved out into a separate
>> GT but have a common GGTMMADR address range which essentially makes
>> the GGTT address space to be shared between media and render GT.
> 
> While this is all true, I feel like this description is lacking a bit of
> explanation for why/how that translates into the code changes below.
> For example you should elaborate on the areas this impacts, such as the
> need to invalidate both GTs' TLBs, retire requests for both GTs, etc.
> 
> Also, the movement of the PAT setup should be noted and explained as
> well since it differs from how you approached the other changes here.
> 
>>
>> BSPEC: 63834
>>
>> Cc: Matt Roper <matthew.d.ro...@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamse...@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c      | 49 +++++++++++-------
>>  drivers/gpu/drm/i915/gt/intel_gt.c        | 15 +++++-
>>  drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
>>  drivers/gpu/drm/i915/gt/intel_gtt.h       |  3 ++
>>  drivers/gpu/drm/i915/i915_driver.c        | 19 +++++--
>>  drivers/gpu/drm/i915/i915_gem_evict.c     | 63 +++++++++++++++++------
>>  drivers/gpu/drm/i915/i915_vma.c           |  5 +-
>>  drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
>>  drivers/gpu/drm/i915/selftests/mock_gtt.c |  1 +
>>  9 files changed, 115 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 2518cebbf931..f5c2f3c58627 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -196,10 +196,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space 
>> *vm)
>>  
>>  void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>>  {
>> +    struct intel_gt *gt;
>> +
>>      i915_ggtt_suspend_vm(&ggtt->vm);
>>      ggtt->invalidate(ggtt);
>>  
>> -    intel_gt_check_and_clear_faults(ggtt->vm.gt);
>> +    list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>> +            intel_gt_check_and_clear_faults(gt);
>>  }
>>  
>>  void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
>> @@ -214,27 +217,36 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
>>  
>>  static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
>>  {
>> -    struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>> +    struct intel_uncore *uncore;
>> +    struct intel_gt *gt;
>>  
>> -    /*
>> -     * Note that as an uncached mmio write, this will flush the
>> -     * WCB of the writes into the GGTT before it triggers the invalidate.
>> -     */
>> -    intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
>> +    list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
>> +            uncore = gt->uncore;
>> +            /*
>> +             * Note that as an uncached mmio write, this will flush the
>> +             * WCB of the writes into the GGTT before it triggers the 
>> invalidate.
>> +             */
>> +            intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, 
>> GFX_FLSH_CNTL_EN);
> 
> This isn't a GT register, so writing it for each GT doesn't do anything
> different than just writing it once.  But actually it doesn't look like
> this is even a register we should be writing to anymore since Xe_HP.
> The GFX_FLSH_CNTL register no longer lives here.

Ok I'll remove the iteration over gt, also i do not see an equivalent
register for Xe_HP, so i'll leave this as it is for now.
> 
>> +    }
>>  }
>>  
>>  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>  {
>> -    struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>>      struct drm_i915_private *i915 = ggtt->vm.i915;
>>  
>>      gen8_ggtt_invalidate(ggtt);
>>  
>> -    if (GRAPHICS_VER(i915) >= 12)
>> -            intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
>> -                                  GEN12_GUC_TLB_INV_CR_INVALIDATE);
>> -    else
>> -            intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>> +    if (GRAPHICS_VER(i915) >= 12) {
>> +            struct intel_gt *gt;
>> +
>> +            list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>> +                    intel_uncore_write_fw(gt->uncore,
>> +                                          GEN12_GUC_TLB_INV_CR,
>> +                                          GEN12_GUC_TLB_INV_CR_INVALIDATE);
>> +    } else {
>> +            intel_uncore_write_fw(ggtt->vm.gt->uncore,
>> +                                  GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>> +    }
>>  }
>>  
>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>> @@ -986,8 +998,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>  
>>      ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>  
>> -    setup_private_pat(ggtt->vm.gt);
>> -
>>      return ggtt_probe_common(ggtt, size);
>>  }
>>  
>> @@ -1186,7 +1196,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, 
>> struct intel_gt *gt)
>>              (u64)ggtt->mappable_end >> 20);
>>      drm_dbg(&i915->drm, "DSM size = %lluM\n",
>>              (u64)resource_size(&intel_graphics_stolen_res) >> 20);
>> -
>> +    INIT_LIST_HEAD(&ggtt->gt_list);
>>      return 0;
>>  }
>>  
>> @@ -1296,9 +1306,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space 
>> *vm)
>>  
>>  void i915_ggtt_resume(struct i915_ggtt *ggtt)
>>  {
>> +    struct intel_gt *gt;
>>      bool flush;
>>  
>> -    intel_gt_check_and_clear_faults(ggtt->vm.gt);
>> +    list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>> +            intel_gt_check_and_clear_faults(gt);
>>  
>>      flush = i915_ggtt_resume_vm(&ggtt->vm);
>>  
>> @@ -1307,9 +1319,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
>>      if (flush)
>>              wbinvd_on_all_cpus();
>>  
>> -    if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
>> -            setup_private_pat(ggtt->vm.gt);
>> -
>>      intel_ggtt_restore_fences(ggtt);
>>  }
>>  
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>> index 2e796ffad911..d72efb74563a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>> @@ -110,9 +110,17 @@ static int intel_gt_probe_lmem(struct intel_gt *gt)
>>  
>>  int intel_gt_assign_ggtt(struct intel_gt *gt)
>>  {
>> -    gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
>> +    /* Media GT shares primary GT's GGTT */
>> +    if (gt->type == GT_MEDIA) {
>> +            gt->ggtt = to_gt(gt->i915)->ggtt;
>> +    } else {
>> +            gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), 
>> GFP_KERNEL);
>> +            if (!gt->ggtt)
>> +                    return -ENOMEM;
>> +    }
>>  
>> -    return gt->ggtt ? 0 : -ENOMEM;
>> +    list_add_tail(&gt->ggtt_link, &gt->ggtt->gt_list);
>> +    return 0;
>>  }
>>  
>>  int intel_gt_init_mmio(struct intel_gt *gt)
>> @@ -965,6 +973,9 @@ int intel_gt_tiles_init(struct drm_i915_private *i915)
>>      int ret;
>>  
>>      for_each_gt(gt, i915, id) {
>> +            if (GRAPHICS_VER(i915) >= 8)
>> +                    setup_private_pat(gt);
>> +
> 
> Since the term "tile" is used for PVC-style remote tiles (which we have
> some framework for, but haven't enabled yet), it seems confusing to have
> the PAT setup for all GTs (including the standalone media GT) in a
> function called intel_gt_tiles_init().  Maybe we should also have a prep
> patch that renames this function if we're going to start doing non-tile
> things in here too?

i'll move this to i915_driver_hw_probe
> 
>>              ret = intel_gt_probe_lmem(gt);
>>              if (ret)
>>                      return ret;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
>> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> index 6f686a4244f0..aee37e9e79b0 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> @@ -292,6 +292,9 @@ struct intel_gt {
>>      struct kobject *sysfs_defaults;
>>  
>>      struct i915_perf_gt perf;
>> +
>> +    /** link: &ggtt.gt_list */
>> +    struct list_head ggtt_link;
>>  };
>>  
>>  struct intel_gt_definition {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index 4d75ba4bb41d..cb1272702a1a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -390,6 +390,9 @@ struct i915_ggtt {
>>      struct mutex error_mutex;
>>      struct drm_mm_node error_capture;
>>      struct drm_mm_node uc_fw;
>> +
>> +    /** List of GTs mapping this GGTT */
>> +    struct list_head gt_list;
>>  };
>>  
>>  struct i915_ppgtt {
>> diff --git a/drivers/gpu/drm/i915/i915_driver.c 
>> b/drivers/gpu/drm/i915/i915_driver.c
>> index c3d43f9b1e45..6b973182ddcc 100644
>> --- a/drivers/gpu/drm/i915/i915_driver.c
>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>> @@ -577,7 +577,8 @@ static int i915_driver_hw_probe(struct drm_i915_private 
>> *dev_priv)
>>  {
>>      struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
>>      struct pci_dev *root_pdev;
>> -    int ret;
>> +    struct intel_gt *gt;
>> +    int ret, i;
>>  
>>      if (i915_inject_probe_failure(dev_priv))
>>              return -ENODEV;
>> @@ -614,9 +615,11 @@ static int i915_driver_hw_probe(struct drm_i915_private 
>> *dev_priv)
>>  
>>      i915_perf_init(dev_priv);
>>  
>> -    ret = intel_gt_assign_ggtt(to_gt(dev_priv));
>> -    if (ret)
>> -            goto err_perf;
>> +    for_each_gt(gt, dev_priv, i) {
>> +            ret = intel_gt_assign_ggtt(gt);
>> +            if (ret)
>> +                    goto err_perf;
>> +    }
>>  
>>      ret = i915_ggtt_probe_hw(dev_priv);
>>      if (ret)
>> @@ -1318,7 +1321,8 @@ int i915_driver_suspend_switcheroo(struct 
>> drm_i915_private *i915,
>>  static int i915_drm_resume(struct drm_device *dev)
>>  {
>>      struct drm_i915_private *dev_priv = to_i915(dev);
>> -    int ret;
>> +    struct intel_gt *gt;
>> +    int ret, i;
>>  
>>      disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>  
>> @@ -1333,6 +1337,11 @@ static int i915_drm_resume(struct drm_device *dev)
>>              drm_err(&dev_priv->drm, "failed to re-enable GGTT\n");
>>  
>>      i915_ggtt_resume(to_gt(dev_priv)->ggtt);
>> +
>> +    for_each_gt(gt, dev_priv, i)
>> +            if (GRAPHICS_VER(gt->i915) >= 8)
>> +                    setup_private_pat(gt);
>> +
>>      /* Must be called after GGTT is resumed. */
>>      intel_dpt_resume(dev_priv);
>>  
>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
>> b/drivers/gpu/drm/i915/i915_gem_evict.c
>> index f025ee4fa526..e9d4352ebfb8 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
>> @@ -43,16 +43,30 @@ static bool dying_vma(struct i915_vma *vma)
>>      return !kref_read(&vma->obj->base.refcount);
>>  }
>>  
>> -static int ggtt_flush(struct intel_gt *gt)
>> +static int ggtt_flush(struct i915_address_space *vm)
>>  {
>> -    /*
>> -     * Not everything in the GGTT is tracked via vma (otherwise we
>> -     * could evict as required with minimal stalling) so we are forced
>> -     * to idle the GPU and explicitly retire outstanding requests in
>> -     * the hopes that we can then remove contexts and the like only
>> -     * bound by their active reference.
>> -     */
>> -    return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
>> +    int ret = 0;
>> +
>> +    if (i915_is_ggtt(vm)) {
>> +            struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +            struct intel_gt *gt;
>> +
>> +            list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
>> +                    /*
>> +                     * Not everything in the GGTT is tracked via vma 
>> (otherwise we
>> +                     * could evict as required with minimal stalling) so we 
>> are forced
>> +                     * to idle the GPU and explicitly retire outstanding 
>> requests in
>> +                     * the hopes that we can then remove contexts and the 
>> like only
>> +                     * bound by their active reference.
>> +                     */
>> +                    ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
>> +                    if (ret)
>> +                            return ret;
>> +            }
>> +    } else {
>> +            ret = intel_gt_wait_for_idle(vm->gt, MAX_SCHEDULE_TIMEOUT);
> 
> This function is only ever got called on the GGTT as far as I can see
> (which makes sense given that its name starts with 'ggtt').  It's not
> clear to me why we want to handle PPGTT too now?
> 
Thanks for catching this, it is indeed only used for GGTT, so this
i915_is_ggtt check is not needed.
> Even if this is intentional and correct, it might be best to move this
> up to a small if statement at the top of the function with a return so
> that we can eliminate a level of nesting from most of the function.
> 
>         if (!i915_is_ggtt(vm)) {
>                 wait for idle;
>                 return;
>         }
> 
>         ...
> 
> 
>> +    }
>> +    return ret;
>>  }
>>  
>>  static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
>> @@ -149,6 +163,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
>>      struct drm_mm_node *node;
>>      enum drm_mm_insert_mode mode;
>>      struct i915_vma *active;
>> +    struct intel_gt *gt;
>>      int ret;
>>  
>>      lockdep_assert_held(&vm->mutex);
>> @@ -174,7 +189,14 @@ i915_gem_evict_something(struct i915_address_space *vm,
>>                                  min_size, alignment, color,
>>                                  start, end, mode);
>>  
>> -    intel_gt_retire_requests(vm->gt);
>> +    if (i915_is_ggtt(vm)) {
>> +            struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +
>> +            list_for_each_entry_rcu(gt, &ggtt->gt_list, ggtt_link)
>> +                    intel_gt_retire_requests(gt);
>> +    } else {
>> +            intel_gt_retire_requests(vm->gt);
>> +    }
>>  
>>  search_again:
>>      active = NULL;
>> @@ -246,7 +268,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
>>      if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
>>              return -EBUSY;
>>  
>> -    ret = ggtt_flush(vm->gt);
>> +    ret = ggtt_flush(vm);
> 
> As noted above, this call is always done on GGTT (since we checked and
> returned if it wasn't GGTT just before this point in the function).
> 
>>      if (ret)
>>              return ret;
>>  
>> @@ -332,7 +354,15 @@ int i915_gem_evict_for_node(struct i915_address_space 
>> *vm,
>>       * a stray pin (preventing eviction) that can only be resolved by
>>       * retiring.
>>       */
>> -    intel_gt_retire_requests(vm->gt);
>> +    if (i915_is_ggtt(vm)) {
>> +            struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +            struct intel_gt *gt;
>> +
>> +            list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>> +                    intel_gt_retire_requests(gt);
>> +    } else {
>> +            intel_gt_retire_requests(vm->gt);
>> +    }
>>  
>>      if (i915_vm_has_cache_coloring(vm)) {
>>              /* Expand search to cover neighbouring guard pages (or lack!) */
>> @@ -437,11 +467,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm, 
>> struct i915_gem_ww_ctx *ww)
>>       * pin themselves inside the global GTT and performing the
>>       * switch otherwise is ineffective.
>>       */
>> -    if (i915_is_ggtt(vm)) {
>> -            ret = ggtt_flush(vm->gt);
>> -            if (ret)
>> -                    return ret;
>> -    }
>> +
>> +    ret = ggtt_flush(vm);
>> +    if (ret)
>> +            return ret;
> 
> It's not clear to me why we're removing the GGTT check here?

with the above change done, this will be restored.

Thanks,
Aravind.
> 
> 
> Matt
> 
>>  
>>      do {
>>              struct i915_vma *vma, *vn;
>> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>> b/drivers/gpu/drm/i915/i915_vma.c
>> index c39488eb9eeb..24cbee3c1ce5 100644
>> --- a/drivers/gpu/drm/i915/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> @@ -1542,6 +1542,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, 
>> struct i915_gem_ww_ctx *ww,
>>                         u32 align, unsigned int flags)
>>  {
>>      struct i915_address_space *vm = vma->vm;
>> +    struct intel_gt *gt;
>> +    struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>      int err;
>>  
>>      do {
>> @@ -1557,7 +1559,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, 
>> struct i915_gem_ww_ctx *ww,
>>              }
>>  
>>              /* Unlike i915_vma_pin, we don't take no for an answer! */
>> -            flush_idle_contexts(vm->gt);
>> +            list_for_each_entry_rcu(gt, &ggtt->gt_list, ggtt_link)
>> +                    flush_idle_contexts(gt);
>>              if (mutex_lock_interruptible(&vm->mutex) == 0) {
>>                      /*
>>                       * We pass NULL ww here, as we don't want to unbind
>> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c 
>> b/drivers/gpu/drm/i915/selftests/i915_gem.c
>> index e5dd82e7e480..2535b9684bd1 100644
>> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
>> @@ -127,6 +127,8 @@ static void igt_pm_resume(struct drm_i915_private *i915)
>>       */
>>      with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>              i915_ggtt_resume(to_gt(i915)->ggtt);
>> +            if (GRAPHICS_VER(i915) >= 8)
>> +                    setup_private_pat(to_gt(i915));
>>              i915_gem_resume(i915);
>>      }
>>  }
>> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c 
>> b/drivers/gpu/drm/i915/selftests/mock_gtt.c
>> index 568840e7ca66..b519d271f4fe 100644
>> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
>> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
>> @@ -127,6 +127,7 @@ void mock_init_ggtt(struct intel_gt *gt)
>>      ggtt->vm.vma_ops.bind_vma    = mock_bind_ggtt;
>>      ggtt->vm.vma_ops.unbind_vma  = mock_unbind_ggtt;
>>  
>> +    INIT_LIST_HEAD(&ggtt->gt_list);
>>      i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
>>  }
>>  
>> -- 
>> 2.25.1
>>
> 

Reply via email to