[Public]
> From: Alex Deucher <alexdeuc...@gmail.com> > Sent: Saturday, May 31, 2025 5:48 AM > To: Liang, Prike <prike.li...@amd.com> > Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander > <alexander.deuc...@amd.com>; Koenig, Christian <christian.koe...@amd.com>; > Lazar, Lijo <lijo.la...@amd.com> > Subject: Re: [PATCH 4/9] drm/amdgpu: validate userq buffer virtual address > and size > > On Fri, May 30, 2025 at 3:56 AM Prike Liang > <prike.li...@amd.com<mailto:prike.li...@amd.com>> wrote: > > > > It needs to validate the userq object virtual address whether it is > > validated in vm mapping. > > > > Signed-off-by: Prike Liang <prike.li...@amd.com<mailto:prike.li...@amd.com>> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 56 > > +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | > > 2 + drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 25 +++++++++- > > 3 files changed, 80 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > index f45585bd5872..32e88064bdea 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > @@ -44,6 +44,41 @@ u32 amdgpu_userq_get_supported_ip_mask(struct > amdgpu_device *adev) > > return userq_ip_mask; > > } > > > > +int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, > > + u64 expected_size) { > > + struct amdgpu_bo_va_mapping *mapping; > > + u64 user_addr; > > + u64 size; > > + int r; > > + > > + addr &= AMDGPU_GMC_HOLE_MASK; > > + user_addr = ALIGN_DOWN(addr >> AMDGPU_GPU_PAGE_SHIFT, > 0x100); > > + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; > > + > > + r = amdgpu_bo_reserve(vm->root.bo, false); > > + if (r) > > + return r; > > + > > + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); > > + if (!mapping) > > + goto out_err; > > + > > + if (user_addr != mapping->start || > > + (size != 0 && user_addr + size - 1 != mapping->last)) { > > + pr_debug("expected size 0x%llx not equal to mapping addr > > 0x%llx size > 0x%llx\n", > > + expected_size, mapping->start << > > AMDGPU_GPU_PAGE_SHIFT, > > + (mapping->last - mapping->start + 1) << > AMDGPU_GPU_PAGE_SHIFT); > > + goto out_err; > > + } > > + amdgpu_bo_unreserve(vm->root.bo); > > + return 0; > > + > > +out_err: > > + amdgpu_bo_unreserve(vm->root.bo); > > + return -EINVAL; > > +} > > + > > static int > > amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, > > struct amdgpu_usermode_queue *queue) @@ > > -394,6 +429,17 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > > r = -EINVAL; > > goto unlock; > > } > > + /* Validate the userq virtual address. > > + * TODO: Need to firgure out why the > > queue_size(args->in.queue_size)) > doesn't match > > + * with mesa request size. As a workaround there fix the queue_size > > to the > 0x200000. > > + */ > > We just want to validate that the queue is fully within a valid virtual > address range for > a single buffer. It doesn't matter if the queue address is at the start. > Mesa could > allocate a single buffer for all of its user queues and then suballocate ring > buffers > and other metadata out of the memory. [Prike] Thanks, I will update GEM BOs validation method. > Alex > > > + if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, > 0x200000) || > > + amdgpu_userq_input_va_validate(&fpriv->vm, > > args->in.rptr_va, > PAGE_SIZE) || > > + amdgpu_userq_input_va_validate(&fpriv->vm, > > args->in.wptr_va, > PAGE_SIZE)) { > > + drm_file_err(uq_mgr->file, "Usermode queue input virt > > address is > invalidated\n"); > > + r = -EINVAL; > > + goto unlock; > > + } > > > > queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL); > > if (!queue) { > > @@ -504,11 +550,17 @@ static int amdgpu_userq_input_args_validate(struct > drm_device *dev, > > } > > > > if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET || > > - args->in.queue_size == 0) { > > + args->in.queue_size == 0 || > > + > > !access_ok(u64_to_user_ptr(args->in.queue_va & > AMDGPU_GMC_HOLE_MASK), > > + args->in.queue_size)) { > > drm_file_err(filp, "invalidate userq queue va or > > size\n"); > > return -EINVAL; > > } > > - if (!args->in.wptr_va || !args->in.rptr_va) { > > + if (!args->in.wptr_va || !args->in.rptr_va || > > + !access_ok(u64_to_user_ptr(args->in.wptr_va & > AMDGPU_GMC_HOLE_MASK), > > + sizeof(uint64_t)) || > > + !access_ok(u64_to_user_ptr(args->in.rptr_va & > AMDGPU_GMC_HOLE_MASK), > > + sizeof(uint64_t))) { > > drm_file_err(filp, "invalidate userq queue rptr or > > wptr\n"); > > return -EINVAL; > > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > index ec040c2fd6c9..375fba639c94 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > @@ -132,4 +132,6 @@ int > > amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, > int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, > > u32 idx); > > > > +int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, > > + u64 expected_size); > > #endif > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > index b46e67b179fc..4ba064e6dda0 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > @@ -28,6 +28,7 @@ > > > > #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define > > AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE > > +#define MQD_SHADOW_BASE_SIZE 73728 > > > > static int > > mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) @@ -246,6 +247,19 > > @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, > > r = -ENOMEM; > > goto free_mqd; > > } > > + /* > > + * EOP VA size = MEC_HPD_SIZE * adev->gfx.num_compute_rings. > > + * Since the userq support newer than gfx10, so the keep > MEC_HPD_SIZE as 2048. > > + * TODO: It requires figuring out why the EOP VA not match > > with > MEC_HPD_SIZE * > > + * adev->gfx.num_compute_rings. > > + */ > > The EOP size is MEC_HPD_SIZE. We only multiply it by > adev->gfx.num_compute_rings for kernel queues because we use one > allocation for the EOP buffer for all of the kernel queues. [Prike] Yes, EOP and below shadow BOs mapping size just wants to align with the mesa BOs create and map size setting. Thanks, Prike > > + if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd- > >eop_va, > > + /*ALIGN(2048 * > > adev->gfx.num_compute_rings, > PAGE_SIZE)*/ > > + 0x200000)) { > > + drm_file_err(uq_mgr->file, "EOP VA is > > invalidated\n"); > > + r = -EINVAL; > > + goto free_mqd; > > + } > > > > userq_props->eop_gpu_addr = compute_mqd->eop_va; > > userq_props->hqd_pipe_priority = > > AMDGPU_GFX_PIPE_PRIO_NORMAL; @@ -274,6 +288,15 @@ static int > mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, > > userq_props->csa_addr = mqd_gfx_v11->csa_va; > > userq_props->tmz_queue = > > mqd_user->flags & > > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; > > + > > + /* TODO: Need to check the shadow WA size doesn't match > > with mesa > request.*/ > > + if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11- > >shadow_va, > > + 0x200000)) { > > The shadow size should >= the shadow size defined in struct > amdgpu_gfx_shadow_info and returned by > adev->gfx.funcs->get_gfx_shadow_info(). > > > + drm_file_err(uq_mgr->file, "shadow VA is > > invalidated\n"); > > + r = -EINVAL; > > + goto free_mqd; > > + } > > + > > kfree(mqd_gfx_v11); > > } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { > > struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; > > @@ -290,7 +313,7 @@ static int mes_userq_mqd_create(struct > amdgpu_userq_mgr *uq_mgr, > > r = -ENOMEM; > > goto free_mqd; > > } > > - > > + /* TODO: validate the csa and check the va size as > > + well.*/ > > See adev->gfx.funcs->get_gfx_shadow_info() here as well. > > Alex > > > userq_props->csa_addr = mqd_sdma_v11->csa_va; > > kfree(mqd_sdma_v11); > > } > > -- > > 2.34.1 > >