[PATCH 2/5] virtio_net: introduce vi->mode
Now, if we want to judge the rx work mode, we have to use such codes: 1. merge mode: vi->mergeable_rx_bufs 2. big mode: vi->big_packets && !vi->mergeable_rx_bufs 3. small: !vi->big_packets && !vi->mergeable_rx_bufs This is inconvenient and abstract, and we also have this use case: if (vi->mergeable_rx_bufs) else if (vi->big_packets) else For this case, I think switch-case is the better choice. So here I introduce vi->mode to record the virtio-net work mode. That is helpful to judge the work mode and choose the branches. Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 61 +++- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 59a99bbaf852..14809b614d62 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -385,6 +385,12 @@ struct control_buf { virtio_net_ctrl_ack status; }; +enum virtnet_mode { + VIRTNET_MODE_SMALL, + VIRTNET_MODE_MERGE, + VIRTNET_MODE_BIG +}; + struct virtnet_info { struct virtio_device *vdev; struct virtqueue *cvq; @@ -414,6 +420,8 @@ struct virtnet_info { /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; + enum virtnet_mode mode; + /* Host supports rss and/or hash report */ bool has_rss; bool has_rss_hash_report; @@ -643,12 +651,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) static void virtnet_rq_free_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf) { - if (vi->mergeable_rx_bufs) + switch (vi->mode) { + case VIRTNET_MODE_SMALL: + case VIRTNET_MODE_MERGE: put_page(virt_to_head_page(buf)); - else if (vi->big_packets) + break; + case VIRTNET_MODE_BIG: give_pages(rq, buf); - else - put_page(virt_to_head_page(buf)); + break; + } } static void enable_delayed_refill(struct virtnet_info *vi) @@ -1315,7 +1326,8 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; - if (!vi->mergeable_rx_bufs) + /* We only support small and merge mode. */ + if (vi->mode == VIRTNET_MODE_SMALL) skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); else skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); @@ -2389,13 +2401,20 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, */ flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; - if (vi->mergeable_rx_bufs) + switch (vi->mode) { + case VIRTNET_MODE_MERGE: skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); - else if (vi->big_packets) + break; + + case VIRTNET_MODE_BIG: skb = receive_big(dev, vi, rq, buf, len, stats); - else + break; + + case VIRTNET_MODE_SMALL: skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); + break; + } if (unlikely(!skb)) return; @@ -2580,12 +2599,19 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, } do { - if (vi->mergeable_rx_bufs) + switch (vi->mode) { + case VIRTNET_MODE_MERGE: err = add_recvbuf_mergeable(vi, rq, gfp); - else if (vi->big_packets) + break; + + case VIRTNET_MODE_BIG: err = add_recvbuf_big(vi, rq, gfp); - else + break; + + case VIRTNET_MODE_SMALL: err = add_recvbuf_small(vi, rq, gfp); + break; + } if (err) break; @@ -2703,7 +2729,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi, int packets = 0; void *buf; - if (!vi->big_packets || vi->mergeable_rx_bufs) { + if (vi->mode != VIRTNET_MODE_BIG) { void *ctx; while (packets < budget && (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { @@ -5510,7 +5536,7 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, /* In big_packets mode, xdp cannot work, so there is no need to * initialize xsk of rq. */ - if (vi->big_packets && !vi->mergeable_rx_bufs) + if (vi->mode == VIRTNET_MODE_BIG) return -ENOENT; if (qid >= vi->curr_queue_pairs) @@ -6007,7 +6033,7 @@ static int virtnet_find_vqs(str
[PATCH 1/5] virtio-net: fix overflow inside virtnet_rq_alloc
When the frag just got a page, then may lead to regression on VM. Specially if the sysctl net.core.high_order_alloc_disable value is 1, then the frag always get a page when do refill. Which could see reliable crashes or scp failure (scp a file 100M in size to VM): The issue is that the virtnet_rq_dma takes up 16 bytes at the beginning of a new frag. When the frag size is larger than PAGE_SIZE, everything is fine. However, if the frag is only one page and the total size of the buffer and virtnet_rq_dma is larger than one page, an overflow may occur. Here, when the frag size is not enough, we reduce the buffer len to fix this problem. Fixes: f9dac92ba908 ("virtio_ring: enable premapped mode whatever use_dma_api") Reported-by: "Si-Wei Liu" Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f8131f92a392..59a99bbaf852 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -926,9 +926,6 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) void *buf, *head; dma_addr_t addr; - if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) - return NULL; - head = page_address(alloc_frag->page); if (rq->do_dma) { @@ -2423,6 +2420,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, len = SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) + return -ENOMEM; + buf = virtnet_rq_alloc(rq, len, gfp); if (unlikely(!buf)) return -ENOMEM; @@ -2525,6 +2525,12 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); + if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) + return -ENOMEM; + + if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) + len -= sizeof(struct virtnet_rq_dma); + buf = virtnet_rq_alloc(rq, len + room, gfp); if (unlikely(!buf)) return -ENOMEM; -- 2.32.0.3.g01195cf9f
[PATCH 3/5] virtio_net: big mode skip the unmap check
The virtio-net big mode did not enable premapped mode, so we did not need to check the unmap. And the subsequent commit will remove the failover code for failing enable premapped for merge and small mode. So we need to remove the checking do_dma code in the big mode path. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 14809b614d62..cd90e77881df 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -998,7 +998,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) return; } - if (rq->do_dma) + if (vi->mode != VIRTNET_MODE_BIG) virtnet_rq_unmap(rq, buf, 0); virtnet_rq_free_buf(vi, rq, buf); @@ -2738,7 +2738,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi, } } else { while (packets < budget && - (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { + (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); packets++; } -- 2.32.0.3.g01195cf9f
[PATCH 0/5] virtio_net: enable premapped mode by default
In the last linux version, we disabled this feature to fix the regress[1]. The patch set is try to fix the problem and re-enable it. More info: http://lore.kernel.org/all/20240820071913.68004-1-xuanz...@linux.alibaba.com Thanks. [1]: http://lore.kernel.org/all/8b20cc28-45a9-4643-8e87-ba164a540...@oracle.com Xuan Zhuo (5): virtio-net: fix overflow inside virtnet_rq_alloc virtio_net: introduce vi->mode virtio_net: big mode skip the unmap check virtio_net: enable premapped mode for merge and small by default virtio_net: rx remove premapped failover code drivers/net/virtio_net.c | 168 --- 1 file changed, 105 insertions(+), 63 deletions(-) -- 2.32.0.3.g01195cf9f
[PATCH 5/5] virtio_net: rx remove premapped failover code
Now, the premapped mode can be enabled unconditionally. So we can remove the failover code for merge and small mode. Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 80 +--- 1 file changed, 33 insertions(+), 47 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8cf24b7b58bd..4d3e35b02478 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -356,9 +356,6 @@ struct receive_queue { struct xdp_rxq_info xsk_rxq_info; struct xdp_buff **xsk_buffs; - - /* Do dma by self */ - bool do_dma; }; /* This structure can contain rss message with maximum settings for indirection table and keysize @@ -899,7 +896,7 @@ static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) void *buf; buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); - if (buf && rq->do_dma) + if (buf) virtnet_rq_unmap(rq, buf, *len); return buf; @@ -912,11 +909,6 @@ static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) u32 offset; void *head; - if (!rq->do_dma) { - sg_init_one(rq->sg, buf, len); - return; - } - head = page_address(rq->alloc_frag.page); offset = buf - head; @@ -939,44 +931,42 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) head = page_address(alloc_frag->page); - if (rq->do_dma) { - dma = head; - - /* new pages */ - if (!alloc_frag->offset) { - if (rq->last_dma) { - /* Now, the new page is allocated, the last dma -* will not be used. So the dma can be unmapped -* if the ref is 0. -*/ - virtnet_rq_unmap(rq, rq->last_dma, 0); - rq->last_dma = NULL; - } + dma = head; - dma->len = alloc_frag->size - sizeof(*dma); + /* new pages */ + if (!alloc_frag->offset) { + if (rq->last_dma) { + /* Now, the new page is allocated, the last dma +* will not be used. So the dma can be unmapped +* if the ref is 0. +*/ + virtnet_rq_unmap(rq, rq->last_dma, 0); + rq->last_dma = NULL; + } - addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, - dma->len, DMA_FROM_DEVICE, 0); - if (virtqueue_dma_mapping_error(rq->vq, addr)) - return NULL; + dma->len = alloc_frag->size - sizeof(*dma); - dma->addr = addr; - dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_dma_mapping_error(rq->vq, addr)) + return NULL; - /* Add a reference to dma to prevent the entire dma from -* being released during error handling. This reference -* will be freed after the pages are no longer used. -*/ - get_page(alloc_frag->page); - dma->ref = 1; - alloc_frag->offset = sizeof(*dma); + dma->addr = addr; + dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); - rq->last_dma = dma; - } + /* Add a reference to dma to prevent the entire dma from +* being released during error handling. This reference +* will be freed after the pages are no longer used. +*/ + get_page(alloc_frag->page); + dma->ref = 1; + alloc_frag->offset = sizeof(*dma); - ++dma->ref; + rq->last_dma = dma; } + ++dma->ref; + buf = head + alloc_frag->offset; get_page(alloc_frag->page); @@ -2452,8 +2442,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { - if (rq->do_dma) - virtnet_rq_unmap(rq, buf, 0); + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } @@ -2573,8 +2562,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, ctx = mergeable_len_to_ctx(len + room, headroom);
[PATCH 4/5] virtio_net: enable premapped mode for merge and small by default
Currently, the virtio core will perform a dma operation for each buffer. Although, the same page may be operated multiple times. In premapped mod, we can perform only one dma operation for the pages of the alloc frag. This is beneficial for the iommu device. kernel command line: intel_iommu=on iommu.passthrough=0 | strict=0 | strict=1 Before | 775496pps | 428614pps After | 1109316pps | 742853pps In the 6.11, we disabled this feature because a regress [1]. Now, we fix the problem and re-enable it. [1]: http://lore.kernel.org/all/8b20cc28-45a9-4643-8e87-ba164a540...@oracle.com Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cd90e77881df..8cf24b7b58bd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -6133,6 +6133,21 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) return -ENOMEM; } +static void virtnet_rq_set_premapped(struct virtnet_info *vi) +{ + int i; + + /* disable for big mode */ + if (vi->mode == VIRTNET_MODE_BIG) + return; + + for (i = 0; i < vi->max_queue_pairs; i++) { + /* error should never happen */ + BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); + vi->rq[i].do_dma = true; + } +} + static int init_vqs(struct virtnet_info *vi) { int ret; @@ -6146,6 +6161,8 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; + virtnet_rq_set_premapped(vi); + cpus_read_lock(); virtnet_set_affinity(vi); cpus_read_unlock(); -- 2.32.0.3.g01195cf9f
Re: [PATCH 0/5] virtio_net: enable premapped mode by default
On Mon, Oct 14, 2024 at 11:12:29AM +0800, Xuan Zhuo wrote: > In the last linux version, we disabled this feature to fix the > regress[1]. > > The patch set is try to fix the problem and re-enable it. > > More info: > http://lore.kernel.org/all/20240820071913.68004-1-xuanz...@linux.alibaba.com > > Thanks. > > [1]: > http://lore.kernel.org/all/8b20cc28-45a9-4643-8e87-ba164a540...@oracle.com Darren, you previously reported crashes with a patch very similar to 1/5. Can you please test this patchset and report whether they are still observed? If yes, any data on how to reproduce would be very benefitial for Xuan Zhuo. > Xuan Zhuo (5): > virtio-net: fix overflow inside virtnet_rq_alloc > virtio_net: introduce vi->mode > virtio_net: big mode skip the unmap check > virtio_net: enable premapped mode for merge and small by default > virtio_net: rx remove premapped failover code > > drivers/net/virtio_net.c | 168 --- > 1 file changed, 105 insertions(+), 63 deletions(-) > > -- > 2.32.0.3.g01195cf9f