On Mon, 2025-08-11 at 13:46 -0700, Matthew Brost wrote: > On Sat, Aug 09, 2025 at 03:51:37PM +0200, Thomas Hellström wrote: > > From: Matthew Brost <matthew.br...@intel.com> > > > > Implement two-pass MMU notifiers for SVM, enabling multiple VMs or > > devices with GPU mappings to pipeline costly TLB invalidations by > > issuing them in the first pass and waiting for completion in the > > second. > > > > Signed-off-by: Matthew Brost <matthew.br...@intel.com> > > --- > > drivers/gpu/drm/drm_gpusvm.c | 2 +- > > drivers/gpu/drm/xe/xe_svm.c | 74 ++++++++++++++++++++++++++++++-- > > ---- > > 2 files changed, 63 insertions(+), 13 deletions(-) > > > > diff --git a/drivers/gpu/drm/drm_gpusvm.c > > b/drivers/gpu/drm/drm_gpusvm.c > > index 92dc7d2bd6cf..f153df1bc862 100644 > > --- a/drivers/gpu/drm/drm_gpusvm.c > > +++ b/drivers/gpu/drm/drm_gpusvm.c > > @@ -413,7 +413,7 @@ drm_gpusvm_notifier_invalidate_twopass(struct > > mmu_interval_notifier *mni, > > * drm_gpusvm_notifier_ops - MMU interval notifier operations for > > GPU SVM > > */ > > static const struct mmu_interval_notifier_ops > > drm_gpusvm_notifier_ops = { > > - .invalidate_twopass = > > drm_gpusvm_notifier_invalidate_twopass, > > + .invalidate_multipass = > > drm_gpusvm_notifier_invalidate_twopass, > > This should be in patch #2.
Yup. My bad fixing up for the interface change in patch 1. Sorry for that. /Thomas > > Matt > > > }; > > > > /** > > diff --git a/drivers/gpu/drm/xe/xe_svm.c > > b/drivers/gpu/drm/xe/xe_svm.c > > index 82a598c8d56e..5728394806ca 100644 > > --- a/drivers/gpu/drm/xe/xe_svm.c > > +++ b/drivers/gpu/drm/xe/xe_svm.c > > @@ -144,15 +144,8 @@ xe_svm_range_notifier_event_begin(struct xe_vm > > *vm, struct drm_gpusvm_range *r, > > * invalidations spanning multiple ranges. > > */ > > for_each_tile(tile, xe, id) > > - if (xe_pt_zap_ptes_range(tile, vm, range)) { > > + if (xe_pt_zap_ptes_range(tile, vm, range)) > > tile_mask |= BIT(id); > > - /* > > - * WRITE_ONCE pairs with READ_ONCE in > > - * xe_vm_has_valid_gpu_mapping() > > - */ > > - WRITE_ONCE(range->tile_invalidated, > > - range->tile_invalidated | > > BIT(id)); > > - } > > > > return tile_mask; > > } > > @@ -161,16 +154,60 @@ static void > > xe_svm_range_notifier_event_end(struct xe_vm *vm, struct > > drm_gpusvm_range *r, > > const struct mmu_notifier_range > > *mmu_range) > > { > > + struct xe_svm_range *range = to_xe_range(r); > > struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; > > > > xe_svm_assert_in_notifier(vm); > > > > + /* > > + * WRITE_ONCE pairs with READ_ONCE in > > xe_vm_has_valid_gpu_mapping() > > + */ > > + WRITE_ONCE(range->tile_invalidated, range->tile_present); > > + > > drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); > > if (!xe_vm_is_closed(vm) && mmu_range->event == > > MMU_NOTIFY_UNMAP) > > xe_svm_garbage_collector_add_range(vm, > > to_xe_range(r), > > mmu_range); > > } > > > > +struct xe_svm_invalidate_pass { > > + struct drm_gpusvm *gpusvm; > > + struct drm_gpusvm_notifier *notifier; > > +#define XE_SVM_INVALIDATE_FENCE_COUNT \ > > + (XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE) > > + struct xe_gt_tlb_invalidation_fence > > fences[XE_SVM_INVALIDATE_FENCE_COUNT]; > > + struct mmu_interval_notifier_pass p; > > +}; > > + > > +static struct mmu_interval_notifier_pass * > > +xe_svm_invalidate_second(struct mmu_interval_notifier_pass *p, > > + const struct mmu_notifier_range > > *mmu_range, > > + unsigned long cur_seq) > > +{ > > + struct xe_svm_invalidate_pass *pass = container_of(p, > > typeof(*pass), p); > > + struct drm_gpusvm *gpusvm = pass->gpusvm; > > + struct drm_gpusvm_notifier *notifier = pass->notifier; > > + struct drm_gpusvm_range *r = NULL; > > + struct xe_vm *vm = gpusvm_to_vm(gpusvm); > > + u64 adj_start = mmu_range->start, adj_end = mmu_range- > > >end; > > + int id; > > + > > + /* Adjust invalidation to notifier boundaries */ > > + adj_start = max(drm_gpusvm_notifier_start(notifier), > > adj_start); > > + adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); > > + > > + for (id = 0; id < XE_SVM_INVALIDATE_FENCE_COUNT; ++id) > > + xe_gt_tlb_invalidation_fence_wait(&pass- > > >fences[id]); > > + > > + drm_gpusvm_in_notifier_lock(gpusvm); > > + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) > > + xe_svm_range_notifier_event_end(vm, r, mmu_range); > > + drm_gpusvm_in_notifier_unlock(gpusvm); > > + > > + kfree(pass); > > + return NULL; > > +} > > + > > static void xe_svm_invalidate_twopass(struct drm_gpusvm *gpusvm, > > struct drm_gpusvm_notifier > > *notifier, > > const struct > > mmu_notifier_range *mmu_range, > > @@ -179,6 +216,8 @@ static void xe_svm_invalidate_twopass(struct > > drm_gpusvm *gpusvm, > > struct xe_vm *vm = gpusvm_to_vm(gpusvm); > > struct xe_device *xe = vm->xe; > > struct drm_gpusvm_range *r, *first; > > + struct xe_svm_invalidate_pass *pass = NULL; > > + struct xe_gt_tlb_invalidation_fence *fences = NULL; > > u64 adj_start = mmu_range->start, adj_end = mmu_range- > > >end; > > u8 tile_mask = 0; > > long err; > > @@ -226,14 +265,25 @@ static void xe_svm_invalidate_twopass(struct > > drm_gpusvm *gpusvm, > > > > xe_device_wmb(xe); > > > > - err = xe_vm_range_tilemask_tlb_invalidation(vm, NULL, > > adj_start, > > + pass = kzalloc(sizeof(*pass), GFP_NOWAIT); > > + if (pass) { > > + pass->gpusvm = gpusvm; > > + pass->notifier = notifier; > > + pass->p.pass = xe_svm_invalidate_second; > > + fences = pass->fences; > > + *p = &pass->p; > > + } > > + > > + err = xe_vm_range_tilemask_tlb_invalidation(vm, fences, > > adj_start, > > adj_end, > > tile_mask); > > WARN_ON_ONCE(err); > > > > range_notifier_event_end: > > - r = first; > > - drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) > > - xe_svm_range_notifier_event_end(vm, r, mmu_range); > > + if (!pass) { > > + r = first; > > + drm_gpusvm_for_each_range(r, notifier, adj_start, > > adj_end) > > + xe_svm_range_notifier_event_end(vm, r, > > mmu_range); > > + } > > } > > > > static int __xe_svm_garbage_collector(struct xe_vm *vm, > > -- > > 2.50.1 > >