[PATCH v4 2/2] drm/virtio: Use vmalloc for command buffer allocations.
Userspace requested command buffer allocations could be too large to make as a contiguous allocation. Use vmalloc if necessary to satisfy those allocations. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 78 +++--- 2 files changed, 72 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index f5083c538f9c..9af1ec62434f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -143,7 +143,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, goto out_unused_fd; } - buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); + buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out_unresv; @@ -172,7 +172,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, return 0; out_memdup: - kfree(buf); + kvfree(buf); out_unresv: if (buflist) virtio_gpu_array_unlock_resv(buflist); diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 5a64c776138d..9f9b782dd332 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -155,7 +155,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev, { if (vbuf->resp_size > MAX_INLINE_RESP_SIZE) kfree(vbuf->resp_buf); - kfree(vbuf->data_buf); + kvfree(vbuf->data_buf); kmem_cache_free(vgdev->vbufs, vbuf); } @@ -256,13 +256,54 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct *work) wake_up(&vgdev->cursorq.ack_queue); } +/* Create sg_table from a vmalloc'd buffer. */ +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) +{ + int ret, s, i; + struct sg_table *sgt; + struct scatterlist *sg; + struct page *pg; + + if (WARN_ON(!PAGE_ALIGNED(data))) + return NULL; + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return NULL; + + *sg_ents = DIV_ROUND_UP(size, PAGE_SIZE); + ret = sg_alloc_table(sgt, *sg_ents, GFP_KERNEL); + if (ret) { + kfree(sgt); + return NULL; + } + + for_each_sg(sgt->sgl, sg, *sg_ents, i) { + pg = vmalloc_to_page(data); + if (!pg) { + sg_free_table(sgt); + kfree(sgt); + return NULL; + } + + s = min_t(int, PAGE_SIZE, size); + sg_set_page(sg, pg, s, 0); + + size -= s; + data += s; + } + + return sgt; +} + static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) + struct virtio_gpu_vbuffer *vbuf, + struct scatterlist *vout) __releases(&vgdev->ctrlq.qlock) __acquires(&vgdev->ctrlq.qlock) { struct virtqueue *vq = vgdev->ctrlq.vq; - struct scatterlist *sgs[3], vcmd, vout, vresp; + struct scatterlist *sgs[3], vcmd, vresp; int outcnt = 0, incnt = 0; bool notify = false; int ret; @@ -274,9 +315,8 @@ static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, sgs[outcnt + incnt] = &vcmd; outcnt++; - if (vbuf->data_size) { - sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); - sgs[outcnt + incnt] = &vout; + if (vout) { + sgs[outcnt + incnt] = vout; outcnt++; } @@ -308,7 +348,24 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_fence *fence) { struct virtqueue *vq = vgdev->ctrlq.vq; + struct scatterlist *vout = NULL, sg; + struct sg_table *sgt = NULL; bool notify; + int outcnt = 0; + + if (vbuf->data_size) { + if (is_vmalloc_addr(vbuf->data_buf)) { + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, +&outcnt); + if (!sgt) + return -ENOMEM; + vout = sgt->sgl; + } else { + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); + vout = &sg; + ou
[PATCH] drm/virtio: Fix warning in virtio_gpu_queue_fenced_ctrl_buffer.
Fix warning introduced with commit e1218b8c0cc1 ("drm/virtio: Use vmalloc for command buffer allocations.") from drm-misc-next. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_vq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 9f9b782dd332..80176f379ad5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -358,7 +358,7 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, &outcnt); if (!sgt) - return -ENOMEM; + return; vout = sgt->sgl; } else { sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); -- 2.23.0.162.g0b9fbb3734-goog ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm/virtio: Use vmalloc for command buffer allocations.
Userspace requested command buffer allocations could be too large to make as a contiguous allocation. Use vmalloc if necessary to satisfy those allocations. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 74 -- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index ac60be9b5c19..a8732a8af766 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -195,7 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, if (ret) goto out_free; - buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); + buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out_unresv; @@ -230,7 +230,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, return 0; out_memdup: - kfree(buf); + kvfree(buf); out_unresv: ttm_eu_backoff_reservation(&ticket, &validate_list); out_free: diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 981ee16e3ee9..bcbc48b7284f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -154,7 +154,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev, { if (vbuf->resp_size > MAX_INLINE_RESP_SIZE) kfree(vbuf->resp_buf); - kfree(vbuf->data_buf); + kvfree(vbuf->data_buf); kmem_cache_free(vgdev->vbufs, vbuf); } @@ -251,6 +251,59 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct *work) wake_up(&vgdev->cursorq.ack_queue); } +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - offset_in_page(data); +} + +/* Create sg_table from a vmalloc'd buffer. */ +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size) +{ + int nents, ret, s, i; + struct sg_table *sgt; + struct scatterlist *sg; + struct page *pg; + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return NULL; + + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) { + kfree(sgt); + return NULL; + } + + for_each_sg(sgt->sgl, sg, nents, i) { + pg = vmalloc_to_page(data); + if (!pg) { + sg_free_table(sgt); + kfree(sgt); + return NULL; + } + + s = rest_of_page(data); + if (s > size) + s = size; + + sg_set_page(sg, pg, s, offset_in_page(data)); + + size -= s; + data += s; + + if (size) { + sg_unmark_end(sg); + } else { + sg_mark_end(sg); + break; + } + } + + return sgt; +} + static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) __releases(&vgdev->ctrlq.qlock) @@ -260,6 +313,7 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, struct scatterlist *sgs[3], vcmd, vout, vresp; int outcnt = 0, incnt = 0; int ret; + struct sg_table *sgt = NULL; if (!vgdev->vqs_ready) return -ENODEV; @@ -269,8 +323,17 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, outcnt++; if (vbuf->data_size) { - sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); - sgs[outcnt + incnt] = &vout; + if (is_vmalloc_addr(vbuf->data_buf)) { + spin_unlock(&vgdev->ctrlq.qlock); + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size); + spin_lock(&vgdev->ctrlq.qlock); + if (!sgt) + return -ENOMEM; + sgs[outcnt + incnt] = sgt->sgl; + } else { + sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); + sgs[outcnt + incnt] = &vout; + } outcnt++; } @@ -294,6 +357,11 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, virtqueue_kick(vq); } + if (sgt) { + sg_free_table(sgt); + kfree(
Re: [PATCH] drm/virtio: Use vmalloc for command buffer allocations.
On Thu, Aug 29, 2019 at 11:09 PM Gerd Hoffmann wrote: > Hi, > > > { > > if (vbuf->resp_size > MAX_INLINE_RESP_SIZE) > > kfree(vbuf->resp_buf); > > - kfree(vbuf->data_buf); > > + kvfree(vbuf->data_buf); > > if (is_vmalloc_addr(vbuf->data_buf)) ... > > needed here I gues? > kvfree() handles vmalloc/kmalloc/kvmalloc internally by doing that check. > > > +/* Create sg_table from a vmalloc'd buffer. */ > > +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size) > > Hmm, isn't there an existing function for that? > I'd be surprised if virtio-gpu is the first driver needing this ... > > And it case there really isn't one this should probably added to the > vmalloc or scatterlist code, not the virtio-gpu driver. > There's a few other similar ones around: - pack_sg_list in net/9p/trans_virtio.c, assumes contiguous array of scatterlist and non-vmalloc - videobuf_vmalloc_to_sg in drivers/media/v4l2-core/videobuf-dma-sg.c, assumes contiguous array of scatterlist and that the buffer being converted is page aligned (the l - vmalloc_to_sg() in drivers/media/common/saa7146/saa7146_core.c, duplicate of videobuf_vmalloc_to_sg None of the existing ones seemed to do what was needed and the convention seemed to pack an sg table as needed for the usage and just keep it in the module so that's what I followed. I'm not very familiar with these APIs so maybe there's something I missed, but I did look through the helpers in lib/scatterlist.c and didn't see anything. If you think it is better suited to live in scatterlist I can prepare another change for that. Dave > > cheers, > Gerd > > ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/virtio: Use vmalloc for command buffer allocations.
Hi Gerd, On Fri, Aug 30, 2019 at 4:16 AM Gerd Hoffmann wrote: > > Hi, > > > > > - kfree(vbuf->data_buf); > > > > + kvfree(vbuf->data_buf); > > > > > > if (is_vmalloc_addr(vbuf->data_buf)) ... > > > > > > needed here I gues? > > > > > > > kvfree() handles vmalloc/kmalloc/kvmalloc internally by doing that check. > > Ok. > > > - videobuf_vmalloc_to_sg in drivers/media/v4l2-core/videobuf-dma-sg.c, > > assumes contiguous array of scatterlist and that the buffer being converted > > is page aligned > > Well, vmalloc memory _is_ page aligned. True, but this function gets called for all potential enqueuings (eg resource_create_3d, resource_attach_backing) and I was concerned that some other usage in the future might not have that guarantee. But if that's overly being paranoid, I'm willing to backtrack on that. > sg_alloc_table_from_pages() does alot of what you need, you just need a > small loop around vmalloc_to_page() create a struct page array > beforehand. That feels like an extra allocation when under memory pressure and more work, to not gain much -- there still needs to be a function that iterates through all the pages. But I don't feel super strongly about it and can change it if you think that it will be less maintenance overhead. > Completely different approach: use get_user_pages() and don't copy the > execbuffer at all. This one I'd rather not do unless we can show that the copy is actually a problem. As it stands I expect this to be a fallback instead of the regular case, and if it's not then the VMs need to revisit how long the control queue size is. Right now most execbuffers end up being copied into a single contiguous buffer which results in 3 descriptors of the virtio queue. If vmalloc ends up being used (which is only a fallback because vmemdup_user tries kmalloc first still), then there'll be 66 descriptors for a 256KB buffer. I think that's fine for exceptional cases, but not ideal if it was commonplace. Chia-I suggested that we could have a flag for the ioctl execbuffer indicating that the buffer is BO to solve that, but again I'd prefer to see that it's actually a problem. I'll also be moving the sg table construction out of the critical section and properly accounting for the required number of descriptors before entering it in response to his comments. Thanks, Dave
Re: [PATCH] drm/virtio: Use vmalloc for command buffer allocations.
On Sun, Sep 1, 2019 at 10:28 PM Gerd Hoffmann wrote: > > On Fri, Aug 30, 2019 at 10:49:25AM -0700, David Riley wrote: > > Hi Gerd, > > > > On Fri, Aug 30, 2019 at 4:16 AM Gerd Hoffmann wrote: > > > > > > Hi, > > > > > > > > > - kfree(vbuf->data_buf); > > > > > > + kvfree(vbuf->data_buf); > > > > > > > > > > if (is_vmalloc_addr(vbuf->data_buf)) ... > > > > > > > > > > needed here I gues? > > > > > > > > > > > > > kvfree() handles vmalloc/kmalloc/kvmalloc internally by doing that > > > > check. > > > > > > Ok. > > > > > > > - videobuf_vmalloc_to_sg in drivers/media/v4l2-core/videobuf-dma-sg.c, > > > > assumes contiguous array of scatterlist and that the buffer being > > > > converted > > > > is page aligned > > > > > > Well, vmalloc memory _is_ page aligned. > > > > True, but this function gets called for all potential enqueuings (eg > > resource_create_3d, resource_attach_backing) and I was concerned that > > some other usage in the future might not have that guarantee. > > The vmalloc_to_sg call is wrapped into "if (is_vmalloc())", so this > should not be a problem. > > > > sg_alloc_table_from_pages() does alot of what you need, you just need a > > > small loop around vmalloc_to_page() create a struct page array > > > beforehand. > > > > That feels like an extra allocation when under memory pressure and > > more work, to not gain much -- there still needs to be a function that > > iterates through all the pages. But I don't feel super strongly about > > it and can change it if you think that it will be less maintenance > > overhead. > > Lets see how vmalloc_to_sg looks like when it assumes page-aligned > memory. It's probably noticeable shorter then. It's not really. The allocation of the table is one unit less, and doesn't need to take into account that data might be an offset within the page. It still needs error handling, partial final page handling, and marking of the end of the scatterlist. Things could be slightly simplified to assume that you can always get a contiguous allocation of the table instead of using sg_alloc_table/for_each_sg, but given that we're only going down this path when memory is fragmented and in a fallback, doesn't seem worthwhile to make that trade-off. I've written a different version of vmalloc_to_sgt which uses sg_alloc_table_from_pages under the covers and it comes in slightly shorter (39 lines vs 55 lines), but incurs another allocation as previously so I'm personally in favour of things as written. fpga_mgr_buf_load is another function which roughly does the same sort of operation and it's a bit longer. I'll post a v2 shortly, but if you think it's worth making the extra allocation of the pages array to use, I can post that instead. > cheers, > Gerd > ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
Userspace requested command buffer allocations could be too large to make as a contiguous allocation. Use vmalloc if necessary to satisfy those allocations. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 114 - 2 files changed, 96 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index ac60be9b5c19..a8732a8af766 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -195,7 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, if (ret) goto out_free; - buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); + buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out_unresv; @@ -230,7 +230,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, return 0; out_memdup: - kfree(buf); + kvfree(buf); out_unresv: ttm_eu_backoff_reservation(&ticket, &validate_list); out_free: diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 981ee16e3ee9..3ec89ae8478c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -154,7 +154,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev, { if (vbuf->resp_size > MAX_INLINE_RESP_SIZE) kfree(vbuf->resp_buf); - kfree(vbuf->data_buf); + kvfree(vbuf->data_buf); kmem_cache_free(vgdev->vbufs, vbuf); } @@ -251,13 +251,70 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct *work) wake_up(&vgdev->cursorq.ack_queue); } +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - offset_in_page(data); +} + +/* Create sg_table from a vmalloc'd buffer. */ +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) +{ + int nents, ret, s, i; + struct sg_table *sgt; + struct scatterlist *sg; + struct page *pg; + + *sg_ents = 0; + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return NULL; + + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) { + kfree(sgt); + return NULL; + } + + for_each_sg(sgt->sgl, sg, nents, i) { + pg = vmalloc_to_page(data); + if (!pg) { + sg_free_table(sgt); + kfree(sgt); + return NULL; + } + + s = rest_of_page(data); + if (s > size) + s = size; + + sg_set_page(sg, pg, s, offset_in_page(data)); + + size -= s; + data += s; + *sg_ents += 1; + + if (size) { + sg_unmark_end(sg); + } else { + sg_mark_end(sg); + break; + } + } + + return sgt; +} + static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) + struct virtio_gpu_vbuffer *vbuf, + struct scatterlist *vout) __releases(&vgdev->ctrlq.qlock) __acquires(&vgdev->ctrlq.qlock) { struct virtqueue *vq = vgdev->ctrlq.vq; - struct scatterlist *sgs[3], vcmd, vout, vresp; + struct scatterlist *sgs[3], vcmd, vresp; int outcnt = 0, incnt = 0; int ret; @@ -268,9 +325,8 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, sgs[outcnt + incnt] = &vcmd; outcnt++; - if (vbuf->data_size) { - sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); - sgs[outcnt + incnt] = &vout; + if (vout) { + sgs[outcnt + incnt] = vout; outcnt++; } @@ -299,24 +355,30 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, return ret; } -static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) -{ - int rc; - - spin_lock(&vgdev->ctrlq.qlock); - rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf); - spin_unlock(&vgdev->ctrlq.qlock); - return rc; -} - static int virtio_gpu_queue_fenced_ctrl_buffer(str
Re: [PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
On Thu, Sep 5, 2019 at 10:18 PM Gerd Hoffmann wrote: > > > +/* How many bytes left in this page. */ > > +static unsigned int rest_of_page(void *data) > > +{ > > + return PAGE_SIZE - offset_in_page(data); > > +} > > Not needed. > > > +/* Create sg_table from a vmalloc'd buffer. */ > > +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int > > *sg_ents) > > +{ > > + int nents, ret, s, i; > > + struct sg_table *sgt; > > + struct scatterlist *sg; > > + struct page *pg; > > + > > + *sg_ents = 0; > > + > > + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); > > + if (!sgt) > > + return NULL; > > + > > + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1; > > Why +1? This is part of handling offsets within the vmalloc buffer and to maintain parity with the !is_vmalloc_addr/existing case (sg_init_one handles offsets within pages internally). I had left it in because this is being used for all sg/descriptor generation and I wasn't sure if someone in the future might do something like: buf = vmemdup_user() offset = find_interesting(buf) queue(buf + offset) To respond specifically to your question, if we handle offsets, a vmalloc_to_sgt(size = PAGE_SIZE + 2) could end up with 3 sg_ents with the +1 being to account for that extra page. I'll just remove all support for offsets in v3 of the patch and comment that functionality will be different based on where the buffer was originally allocated from. > > > + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); > > + if (ret) { > > + kfree(sgt); > > + return NULL; > > + } > > + > > + for_each_sg(sgt->sgl, sg, nents, i) { > > + pg = vmalloc_to_page(data); > > + if (!pg) { > > + sg_free_table(sgt); > > + kfree(sgt); > > + return NULL; > > + } > > + > > + s = rest_of_page(data); > > + if (s > size) > > + s = size; > > vmalloc memory is page aligned, so: As per above, will remove with v3. > > s = min(PAGE_SIZE, size); > > > + sg_set_page(sg, pg, s, offset_in_page(data)); > > Offset is always zero. As per above, will remove with v3. > > > + > > + size -= s; > > + data += s; > > + *sg_ents += 1; > > sg_ents isn't used anywhere. It's used for outcnt. > > > + > > + if (size) { > > + sg_unmark_end(sg); > > + } else { > > + sg_mark_end(sg); > > + break; > > + } > > That looks a bit strange. I guess you need only one of the two because > the other is the default? I was being overly paranoid and not wanting to make assumptions about the initial state of the table. I'll simplify. > > > static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device > > *vgdev, > > struct virtio_gpu_vbuffer > > *vbuf, > > struct virtio_gpu_ctrl_hdr > > *hdr, > > struct virtio_gpu_fence *fence) > > { > > struct virtqueue *vq = vgdev->ctrlq.vq; > > + struct scatterlist *vout = NULL, sg; > > + struct sg_table *sgt = NULL; > > int rc; > > + int outcnt = 0; > > + > > + if (vbuf->data_size) { > > + if (is_vmalloc_addr(vbuf->data_buf)) { > > + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, > > + &outcnt); > > + if (!sgt) > > + return -ENOMEM; > > + vout = sgt->sgl; > > + } else { > > + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); > > + vout = &sg; > > + outcnt = 1; > > outcnt must be set in both cases. outcnt is set by vmalloc_to_sgt. > > > +static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, > > + struct virtio_gpu_vbuffer *vbuf) > > +{ > > + return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); > > +} > > Changing virtio_gpu_queue_ctrl_buffer to call > virtio_gpu_queue_fenced_ctrl_buffer should be done in a separate patch. Will do. Thanks, David
[PATCH v3 1/2] drm/virtio: Rewrite virtio_gpu_queue_ctrl_buffer using fenced version.
Factor function in preparation to generating scatterlist prior to locking. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_vq.c | 19 +++ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 981ee16e3ee9..bf5a4a50b002 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -299,17 +299,6 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, return ret; } -static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) -{ - int rc; - - spin_lock(&vgdev->ctrlq.qlock); - rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf); - spin_unlock(&vgdev->ctrlq.qlock); - return rc; -} - static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf, struct virtio_gpu_ctrl_hdr *hdr, @@ -335,13 +324,19 @@ static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, goto again; } - if (fence) + if (hdr && fence) virtio_gpu_fence_emit(vgdev, hdr, fence); rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf); spin_unlock(&vgdev->ctrlq.qlock); return rc; } +static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf) +{ + return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); +} + static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) { -- 2.23.0.162.g0b9fbb3734-goog ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v3 2/2] drm/virtio: Use vmalloc for command buffer allocations.
Userspace requested command buffer allocations could be too large to make as a contiguous allocation. Use vmalloc if necessary to satisfy those allocations. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 79 +++--- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index ac60be9b5c19..a8732a8af766 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -195,7 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, if (ret) goto out_free; - buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); + buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out_unresv; @@ -230,7 +230,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, return 0; out_memdup: - kfree(buf); + kvfree(buf); out_unresv: ttm_eu_backoff_reservation(&ticket, &validate_list); out_free: diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index bf5a4a50b002..76cf2b9d5d1d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -154,7 +154,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev, { if (vbuf->resp_size > MAX_INLINE_RESP_SIZE) kfree(vbuf->resp_buf); - kfree(vbuf->data_buf); + kvfree(vbuf->data_buf); kmem_cache_free(vgdev->vbufs, vbuf); } @@ -251,13 +251,54 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct *work) wake_up(&vgdev->cursorq.ack_queue); } +/* Create sg_table from a vmalloc'd buffer. */ +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) +{ + int ret, s, i; + struct sg_table *sgt; + struct scatterlist *sg; + struct page *pg; + + if (WARN_ON(!PAGE_ALIGNED(data))) + return NULL; + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return NULL; + + *sg_ents = DIV_ROUND_UP(size, PAGE_SIZE); + ret = sg_alloc_table(sgt, *sg_ents, GFP_KERNEL); + if (ret) { + kfree(sgt); + return NULL; + } + + for_each_sg(sgt->sgl, sg, *sg_ents, i) { + pg = vmalloc_to_page(data); + if (!pg) { + sg_free_table(sgt); + kfree(sgt); + return NULL; + } + + s = min_t(int, PAGE_SIZE, size); + sg_set_page(sg, pg, s, 0); + + size -= s; + data += s; + } + + return sgt; +} + static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) + struct virtio_gpu_vbuffer *vbuf, + struct scatterlist *vout) __releases(&vgdev->ctrlq.qlock) __acquires(&vgdev->ctrlq.qlock) { struct virtqueue *vq = vgdev->ctrlq.vq; - struct scatterlist *sgs[3], vcmd, vout, vresp; + struct scatterlist *sgs[3], vcmd, vresp; int outcnt = 0, incnt = 0; int ret; @@ -268,9 +309,8 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, sgs[outcnt + incnt] = &vcmd; outcnt++; - if (vbuf->data_size) { - sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); - sgs[outcnt + incnt] = &vout; + if (vout) { + sgs[outcnt + incnt] = vout; outcnt++; } @@ -305,7 +345,24 @@ static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_fence *fence) { struct virtqueue *vq = vgdev->ctrlq.vq; + struct scatterlist *vout = NULL, sg; + struct sg_table *sgt = NULL; int rc; + int outcnt = 0; + + if (vbuf->data_size) { + if (is_vmalloc_addr(vbuf->data_buf)) { + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, +&outcnt); + if (!sgt) + return -ENOMEM; + vout = sgt->sgl; + } else { + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); + vout = &sg; + outcnt = 1; + } + } again: spin_lock(&vgde
Re: [PATCH v3 1/2] drm/virtio: Rewrite virtio_gpu_queue_ctrl_buffer using fenced version.
They were based off of Linus' https://github.com/torvalds/linux master from yesterday. I can rebase onto drm-misc-next. On Tue, Sep 10, 2019 at 10:12 PM Gerd Hoffmann wrote: > > On Tue, Sep 10, 2019 at 01:06:50PM -0700, David Riley wrote: > > Factor function in preparation to generating scatterlist prior to locking. > > Patches are looking good now, but they don't apply. What tree was used > to create them? > > Latest virtio-gpu driver bits are in drm-misc-next (see > https://cgit.freedesktop.org/drm/drm-misc). > > cheers, > Gerd
[PATCH v4 1/2] drm/virtio: Rewrite virtio_gpu_queue_ctrl_buffer using fenced version.
Factor function in preparation to generating scatterlist prior to locking. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_vq.c | 20 +++- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 7fd2851f7b97..5a64c776138d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -302,18 +302,6 @@ static bool virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, return notify; } -static void virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, -struct virtio_gpu_vbuffer *vbuf) -{ - bool notify; - - spin_lock(&vgdev->ctrlq.qlock); - notify = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf); - spin_unlock(&vgdev->ctrlq.qlock); - if (notify) - virtqueue_notify(vgdev->ctrlq.vq); -} - static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf, struct virtio_gpu_ctrl_hdr *hdr, @@ -339,7 +327,7 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, goto again; } - if (fence) { + if (hdr && fence) { virtio_gpu_fence_emit(vgdev, hdr, fence); if (vbuf->objs) { virtio_gpu_array_add_fence(vbuf->objs, &fence->f); @@ -352,6 +340,12 @@ static void virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, virtqueue_notify(vgdev->ctrlq.vq); } +static void virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, +struct virtio_gpu_vbuffer *vbuf) +{ + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); +} + static void virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) { -- 2.23.0.162.g0b9fbb3734-goog
[PATCH v4 0/2] drm/virtio: Use vmalloc for command buffer alllocations.
Userspace requested command buffer allocations could be too large to make as a contiguous allocation. Use vmalloc if necessary to satisfy those allocations. v1: Initial version. v2: Properly account for number of free descriptors required. v3: Remove offset handling for vmalloc'd buffers. v4: Rebase onto drm-misc-next. David Riley (2): drm/virtio: Rewrite virtio_gpu_queue_ctrl_buffer using fenced version. drm/virtio: Use vmalloc for command buffer allocations. drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 98 -- 2 files changed, 79 insertions(+), 23 deletions(-) -- 2.23.0.162.g0b9fbb3734-goog