On Tue, 2025-04-22 at 10:04 -0700, Matthew Brost wrote:
> Mixing GPU and CPU atomics does not work unless a strict migration
> policy of GPU atomics must be device memory. Enforce a policy of must
> be
> in VRAM with a retry loop of 2 attempts, if retry loop fails abort
> fault.
> 
> v2:
>  - Only retry migration on atomics
>  - Drop alway migrate modparam
> v3:
>  - Only set vram_only on DGFX (Himal)
>  - Bail on get_pages failure if vram_only and retry count exceeded
> (Himal)
>  - s/vram_only/devmem_only
>  - Update xe_svm_range_is_valid to accept devmem_only argument
> v4:
>  - Fix logic bug get_pages failure
> 
> Signed-off-by: Himal Prasad Ghimiray
> <himal.prasad.ghimi...@intel.com>
> Signed-off-by: Matthew Brost <matthew.br...@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_module.c |  3 --
>  drivers/gpu/drm/xe/xe_module.h |  1 -
>  drivers/gpu/drm/xe/xe_svm.c    | 89 +++++++++++++++++++++++++-------
> --
>  drivers/gpu/drm/xe/xe_svm.h    |  5 --
>  4 files changed, 65 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_module.c
> b/drivers/gpu/drm/xe/xe_module.c
> index 05c7d0ae6d83..1c4dfafbcd0b 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -33,9 +33,6 @@ struct xe_modparam xe_modparam = {
>  module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size,
> uint, 0600);
>  MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in
> MiB), must be power of 2");
>  
> -module_param_named(always_migrate_to_vram,
> xe_modparam.always_migrate_to_vram, bool, 0444);
> -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on
> GPU fault");
> -
 module_param_named_unsafe(force_execlist,
> xe_modparam.force_execlist, bool, 0444);
>  MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
>  
> diff --git a/drivers/gpu/drm/xe/xe_module.h
> b/drivers/gpu/drm/xe/xe_module.h
> index 84339e509c80..5a3bfea8b7b4 100644
> --- a/drivers/gpu/drm/xe/xe_module.h
> +++ b/drivers/gpu/drm/xe/xe_module.h
> @@ -12,7 +12,6 @@
>  struct xe_modparam {
>       bool force_execlist;
>       bool probe_display;
> -     bool always_migrate_to_vram;
>       u32 force_vram_bar_size;
>       int guc_log_level;
>       char *guc_firmware_path;
> diff --git a/drivers/gpu/drm/xe/xe_svm.c
> b/drivers/gpu/drm/xe/xe_svm.c
> index 890f6b2f40e9..f749ae367a8f 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -650,9 +650,11 @@ void xe_svm_fini(struct xe_vm *vm)
>  }
>  
>  static bool xe_svm_range_is_valid(struct xe_svm_range *range,
> -                               struct xe_tile *tile)
> +                               struct xe_tile *tile,
> +                               bool devmem_only)
>  {
> -     return (range->tile_present & ~range->tile_invalidated) &
> BIT(tile->id);
> +     return ((range->tile_present & ~range->tile_invalidated) &
> BIT(tile->id))
> +             && (!devmem_only || range-
> >base.flags.migrate_devmem);
>  }

So let's say devmem_only is true here, and range-
>base.flags.migrate_devmem is false. Wouldn't that mean the range is
unusable and needs to be freed and re-allocated?

Also another thing going back to older code, it seems like range-
>tile_invalidated is protected by the notifier lock, so shouldn't we
assert that to be held in the function? It seems not to be held further
below:

>  
>  #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
> @@ -726,6 +728,35 @@ static int xe_svm_alloc_vram(struct xe_vm *vm,
> struct xe_tile *tile,
>  }
>  #endif
>  
> +static bool supports_4K_migration(struct xe_device *xe)
> +{
> +     if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
> +             return false;
> +
> +     return true;
> +}

Do we have any hardware that supports pagefaults but not 4K VRAM pages?

> +
> +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range
> *range,
> +                                            struct xe_vma *vma)
> +{
> +     struct xe_vm *vm = range_to_vm(&range->base);
> +     u64 range_size = xe_svm_range_size(range);
> +
> +     if (!range->base.flags.migrate_devmem)
> +             return false;
> +
> +     if (xe_svm_range_in_vram(range)) {
> +             drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
> +             return false;
> +     }
> +
> +     if (range_size <= SZ_64K && !supports_4K_migration(vm->xe))
> {
> +             drm_dbg(&vm->xe->drm, "Platform doesn't support
> SZ_4K range migration\n");
> +             return false;
> +     }
> +
> +     return true;
> +}
>  
>  /**
>   * xe_svm_handle_pagefault() - SVM handle page fault
> @@ -750,12 +781,15 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
>                       IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
>               .check_pages_threshold = IS_DGFX(vm->xe) &&
>                       IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ?
> SZ_64K : 0,
> +             .devmem_only = atomic && IS_DGFX(vm->xe) &&
> +                     IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
>       };
>       struct xe_svm_range *range;
>       struct drm_gpusvm_range *r;
>       struct drm_exec exec;
>       struct dma_fence *fence;
>       struct xe_tile *tile = gt_to_tile(gt);
> +     int migrate_try_count = ctx.devmem_only ? 3 : 1;
>       ktime_t end = 0;
>       int err;
>  
> @@ -777,23 +811,26 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
>               return PTR_ERR(r);
>  
>       range = to_xe_range(r);
> -     if (xe_svm_range_is_valid(range, tile))
> +     if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))

Requires notifier lock. Also see comment on re-allocating the range
above.

>               return 0;
>  
>       range_debug(range, "PAGE FAULT");
>  
> -     /* XXX: Add migration policy, for now migrate range once */
> -     if (!range->skip_migrate && range->base.flags.migrate_devmem
> &&
> -         xe_svm_range_size(range) >= SZ_64K) {
> -             range->skip_migrate = true;
> -
> +     if (--migrate_try_count >= 0 &&
> +         xe_svm_range_needs_migrate_to_vram(range, vma)

Requires notifier lock.

Should we have some sort of timeout instead of a try-count? Perhaps as
a last resort fall back to a 4K range?

/Thomas



> ) {
>               err = xe_svm_alloc_vram(vm, tile, range, &ctx);
>               if (err) {
> -                     drm_dbg(&vm->xe->drm,
> -                             "VRAM allocation failed, falling
> back to "
> -                             "retrying fault, asid=%u,
> errno=%pe\n",
> -                             vm->usm.asid, ERR_PTR(err));
> -                     goto retry;
> +                     if (migrate_try_count || !ctx.devmem_only) {
> +                             drm_dbg(&vm->xe->drm,
> +                                     "VRAM allocation failed,
> falling back to retrying fault, asid=%u, errno=%pe\n",
> +                                     vm->usm.asid, ERR_PTR(err));
> +                             goto retry;
> +                     } else {
> +                             drm_err(&vm->xe->drm,
> +                                     "VRAM allocation failed,
> retry count exceeded, asid=%u, errno=%pe\n",
> +                                     vm->usm.asid, ERR_PTR(err));
> +                             return err;
> +                     }
>               }
>       }
>  
> @@ -801,15 +838,22 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
>       err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
>       /* Corner where CPU mappings have changed */
>       if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
> -             if (err == -EOPNOTSUPP) {
> -                     range_debug(range, "PAGE FAULT - EVICT
> PAGES");
> -                     drm_gpusvm_range_evict(&vm->svm.gpusvm,
> &range->base);
> +             if (migrate_try_count > 0 || !ctx.devmem_only) {
> +                     if (err == -EOPNOTSUPP) {
> +                             range_debug(range, "PAGE FAULT -
> EVICT PAGES");
> +                             drm_gpusvm_range_evict(&vm-
> >svm.gpusvm,
> +                                                    &range-
> >base);
> +                     }
> +                     drm_dbg(&vm->xe->drm,
> +                             "Get pages failed, falling back to
> retrying, asid=%u, gpusvm=%p, errno=%pe\n",
> +                             vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> +                     range_debug(range, "PAGE FAULT - RETRY
> PAGES");
> +                     goto retry;
> +             } else {
> +                     drm_err(&vm->xe->drm,
> +                             "Get pages failed, retry count
> exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
> +                             vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
>               }
> -             drm_dbg(&vm->xe->drm,
> -                     "Get pages failed, falling back to retrying,
> asid=%u, gpusvm=%p, errno=%pe\n",
> -                     vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err));
> -             range_debug(range, "PAGE FAULT - RETRY PAGES");
> -             goto retry;
>       }
>       if (err) {
>               range_debug(range, "PAGE FAULT - FAIL PAGE
> COLLECT");
> @@ -843,9 +887,6 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
>       }
>       drm_exec_fini(&exec);
>  
> -     if (xe_modparam.always_migrate_to_vram)
> -             range->skip_migrate = false;
> -
>       dma_fence_wait(fence, false);
>       dma_fence_put(fence);
>  
> diff --git a/drivers/gpu/drm/xe/xe_svm.h
> b/drivers/gpu/drm/xe/xe_svm.h
> index 3d441eb1f7ea..0e1f376a7471 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -39,11 +39,6 @@ struct xe_svm_range {
>        * range. Protected by GPU SVM notifier lock.
>        */
>       u8 tile_invalidated;
> -     /**
> -      * @skip_migrate: Skip migration to VRAM, protected by GPU
> fault handler
> -      * locking.
> -      */
> -     u8 skip_migrate :1;
>  };
>  
>  /**

Reply via email to