A large number of (unsorted or separate) small (<2MB) mappings can cause a lot of, probably unnecessary, prealloc pages. Ie. a single 4k page size mapping will pre-allocate 3 pages (for levels 2-4) for the pagetable. Which can chew up a large amount of unneeded memory. So add a mechanism to put an upper bound on the # of pre-alloc pages.
Signed-off-by: Rob Clark <robin.cl...@oss.qualcomm.com> --- drivers/gpu/drm/msm/msm_gem_vma.c | 23 +++++++++++++++++++++-- drivers/gpu/drm/msm/msm_gpu.h | 3 +++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index b6de87e5c3f7..83f6f95b4865 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -705,6 +705,8 @@ msm_vma_job_free(struct drm_sched_job *_job) mmu->funcs->prealloc_cleanup(mmu, &job->prealloc); + atomic_sub(job->prealloc.count, &job->queue->in_flight_prealloc); + drm_sched_job_cleanup(_job); job_foreach_bo (obj, job) @@ -1087,10 +1089,11 @@ ops_are_same_pte(struct msm_vm_bind_op *first, struct msm_vm_bind_op *next) * them as a single mapping. Otherwise the prealloc_count() will not realize * they can share pagetable pages and vastly overcount. */ -static void +static int vm_bind_prealloc_count(struct msm_vm_bind_job *job) { struct msm_vm_bind_op *first = NULL, *last = NULL; + int ret; for (int i = 0; i < job->nr_ops; i++) { struct msm_vm_bind_op *op = &job->ops[i]; @@ -1119,6 +1122,20 @@ vm_bind_prealloc_count(struct msm_vm_bind_job *job) /* Flush the remaining range: */ prealloc_count(job, first, last); + + /* + * Now that we know the needed amount to pre-alloc, throttle on pending + * VM_BIND jobs if we already have too much pre-alloc memory in flight + */ + ret = wait_event_interruptible( + to_msm_vm(job->vm)->sched.job_scheduled, + atomic_read(&job->queue->in_flight_prealloc) <= 1024); + if (ret) + return ret; + + atomic_add(job->prealloc.count, &job->queue->in_flight_prealloc); + + return 0; } /* @@ -1389,7 +1406,9 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) if (ret) goto out_unlock; - vm_bind_prealloc_count(job); + ret = vm_bind_prealloc_count(job); + if (ret) + goto out_unlock; struct drm_exec exec; unsigned flags = DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 31b83e9e3673..5508885d865f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -555,6 +555,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, * seqno, protected by submitqueue lock * @idr_lock: for serializing access to fence_idr * @lock: submitqueue lock for serializing submits on a queue + * @in_flight_prealloc: for VM_BIND queue, # of preallocated pgtable pages for + * queued VM_BIND jobs * @ref: reference count * @entity: the submit job-queue */ @@ -569,6 +571,7 @@ struct msm_gpu_submitqueue { struct idr fence_idr; struct spinlock idr_lock; struct mutex lock; + atomic_t in_flight_prealloc; struct kref ref; struct drm_sched_entity *entity; -- 2.49.0