On Sat, Aug 09, 2025 at 03:51:37PM +0200, Thomas Hellström wrote: > From: Matthew Brost <matthew.br...@intel.com> > > Implement two-pass MMU notifiers for SVM, enabling multiple VMs or > devices with GPU mappings to pipeline costly TLB invalidations by > issuing them in the first pass and waiting for completion in the second. > > Signed-off-by: Matthew Brost <matthew.br...@intel.com> > --- > drivers/gpu/drm/drm_gpusvm.c | 2 +- > drivers/gpu/drm/xe/xe_svm.c | 74 ++++++++++++++++++++++++++++++------ > 2 files changed, 63 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c > index 92dc7d2bd6cf..f153df1bc862 100644 > --- a/drivers/gpu/drm/drm_gpusvm.c > +++ b/drivers/gpu/drm/drm_gpusvm.c > @@ -413,7 +413,7 @@ drm_gpusvm_notifier_invalidate_twopass(struct > mmu_interval_notifier *mni, > * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM > */ > static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { > - .invalidate_twopass = drm_gpusvm_notifier_invalidate_twopass, > + .invalidate_multipass = drm_gpusvm_notifier_invalidate_twopass,
This should be in patch #2. Matt > }; > > /** > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c > index 82a598c8d56e..5728394806ca 100644 > --- a/drivers/gpu/drm/xe/xe_svm.c > +++ b/drivers/gpu/drm/xe/xe_svm.c > @@ -144,15 +144,8 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, > struct drm_gpusvm_range *r, > * invalidations spanning multiple ranges. > */ > for_each_tile(tile, xe, id) > - if (xe_pt_zap_ptes_range(tile, vm, range)) { > + if (xe_pt_zap_ptes_range(tile, vm, range)) > tile_mask |= BIT(id); > - /* > - * WRITE_ONCE pairs with READ_ONCE in > - * xe_vm_has_valid_gpu_mapping() > - */ > - WRITE_ONCE(range->tile_invalidated, > - range->tile_invalidated | BIT(id)); > - } > > return tile_mask; > } > @@ -161,16 +154,60 @@ static void > xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, > const struct mmu_notifier_range *mmu_range) > { > + struct xe_svm_range *range = to_xe_range(r); > struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; > > xe_svm_assert_in_notifier(vm); > > + /* > + * WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() > + */ > + WRITE_ONCE(range->tile_invalidated, range->tile_present); > + > drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); > if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP) > xe_svm_garbage_collector_add_range(vm, to_xe_range(r), > mmu_range); > } > > +struct xe_svm_invalidate_pass { > + struct drm_gpusvm *gpusvm; > + struct drm_gpusvm_notifier *notifier; > +#define XE_SVM_INVALIDATE_FENCE_COUNT \ > + (XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE) > + struct xe_gt_tlb_invalidation_fence > fences[XE_SVM_INVALIDATE_FENCE_COUNT]; > + struct mmu_interval_notifier_pass p; > +}; > + > +static struct mmu_interval_notifier_pass * > +xe_svm_invalidate_second(struct mmu_interval_notifier_pass *p, > + const struct mmu_notifier_range *mmu_range, > + unsigned long cur_seq) > +{ > + struct xe_svm_invalidate_pass *pass = container_of(p, typeof(*pass), p); > + struct drm_gpusvm *gpusvm = pass->gpusvm; > + struct drm_gpusvm_notifier *notifier = pass->notifier; > + struct drm_gpusvm_range *r = NULL; > + struct xe_vm *vm = gpusvm_to_vm(gpusvm); > + u64 adj_start = mmu_range->start, adj_end = mmu_range->end; > + int id; > + > + /* Adjust invalidation to notifier boundaries */ > + adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start); > + adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); > + > + for (id = 0; id < XE_SVM_INVALIDATE_FENCE_COUNT; ++id) > + xe_gt_tlb_invalidation_fence_wait(&pass->fences[id]); > + > + drm_gpusvm_in_notifier_lock(gpusvm); > + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) > + xe_svm_range_notifier_event_end(vm, r, mmu_range); > + drm_gpusvm_in_notifier_unlock(gpusvm); > + > + kfree(pass); > + return NULL; > +} > + > static void xe_svm_invalidate_twopass(struct drm_gpusvm *gpusvm, > struct drm_gpusvm_notifier *notifier, > const struct mmu_notifier_range > *mmu_range, > @@ -179,6 +216,8 @@ static void xe_svm_invalidate_twopass(struct drm_gpusvm > *gpusvm, > struct xe_vm *vm = gpusvm_to_vm(gpusvm); > struct xe_device *xe = vm->xe; > struct drm_gpusvm_range *r, *first; > + struct xe_svm_invalidate_pass *pass = NULL; > + struct xe_gt_tlb_invalidation_fence *fences = NULL; > u64 adj_start = mmu_range->start, adj_end = mmu_range->end; > u8 tile_mask = 0; > long err; > @@ -226,14 +265,25 @@ static void xe_svm_invalidate_twopass(struct drm_gpusvm > *gpusvm, > > xe_device_wmb(xe); > > - err = xe_vm_range_tilemask_tlb_invalidation(vm, NULL, adj_start, > + pass = kzalloc(sizeof(*pass), GFP_NOWAIT); > + if (pass) { > + pass->gpusvm = gpusvm; > + pass->notifier = notifier; > + pass->p.pass = xe_svm_invalidate_second; > + fences = pass->fences; > + *p = &pass->p; > + } > + > + err = xe_vm_range_tilemask_tlb_invalidation(vm, fences, adj_start, > adj_end, tile_mask); > WARN_ON_ONCE(err); > > range_notifier_event_end: > - r = first; > - drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) > - xe_svm_range_notifier_event_end(vm, r, mmu_range); > + if (!pass) { > + r = first; > + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) > + xe_svm_range_notifier_event_end(vm, r, mmu_range); > + } > } > > static int __xe_svm_garbage_collector(struct xe_vm *vm, > -- > 2.50.1 >