From: Toshiaki Makita <makita.toshi...@lab.ntt.co.jp> This allows further redirection of xdp_frames like
NIC -> veth--veth -> veth--veth (XDP) (XDP) (XDP) The intermediate XDP, redirecting packets from NIC to the other veth, reuses xdp_mem info from NIC so that page recycling of the NIC works on the destination veth's XDP. Signed-off-by: Toshiaki Makita <makita.toshi...@lab.ntt.co.jp> --- drivers/net/veth.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index b1d591be0eba..98fc91a64e29 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -43,6 +43,7 @@ struct veth_priv { struct bpf_prog __rcu *xdp_prog; struct net_device __rcu *peer; atomic64_t dropped; + struct xdp_mem_info xdp_mem; unsigned requested_headroom; bool rx_notify_masked; struct ptr_ring xdp_ring; @@ -346,9 +347,21 @@ static void veth_xdp_flush(struct net_device *dev) rcu_read_unlock(); } +static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) +{ + struct xdp_frame *frame = convert_to_xdp_frame(xdp); + + if (unlikely(!frame)) + return -EOVERFLOW; + + return veth_xdp_xmit(dev, frame); +} + static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, - struct xdp_frame *frame) + struct xdp_frame *frame, bool *xdp_xmit, + bool *xdp_redir) { + struct xdp_frame orig_frame; struct bpf_prog *xdp_prog; unsigned int headroom; struct sk_buff *skb; @@ -372,6 +385,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, case XDP_PASS: delta = frame->data - xdp.data; break; + case XDP_TX: + orig_frame = *frame; + xdp.data_hard_start = frame; + xdp.rxq->mem = frame->mem; + if (unlikely(veth_xdp_tx(priv->dev, &xdp))) { + trace_xdp_exception(priv->dev, xdp_prog, act); + frame = &orig_frame; + goto err_xdp; + } + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + orig_frame = *frame; + xdp.data_hard_start = frame; + xdp.rxq->mem = frame->mem; + if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) { + frame = &orig_frame; + goto err_xdp; + } + *xdp_redir = true; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -396,12 +432,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, err_xdp: rcu_read_unlock(); xdp_return_frame(frame); - +xdp_xmit: return NULL; } static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, - struct sk_buff *skb) + struct sk_buff *skb, bool *xdp_xmit, + bool *xdp_redir) { u32 pktlen, headroom, act, metalen; int size, mac_len, delta, off; @@ -469,6 +506,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, switch (act) { case XDP_PASS: break; + case XDP_TX: + get_page(virt_to_page(xdp.data)); + dev_consume_skb_any(skb); + xdp.rxq->mem = priv->xdp_mem; + if (unlikely(veth_xdp_tx(priv->dev, &xdp))) { + trace_xdp_exception(priv->dev, xdp_prog, act); + goto err_xdp; + } + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + get_page(virt_to_page(xdp.data)); + dev_consume_skb_any(skb); + xdp.rxq->mem = priv->xdp_mem; + if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) + goto err_xdp; + *xdp_redir = true; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -496,9 +553,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, rcu_read_unlock(); dev_kfree_skb_any(skb); return NULL; +err_xdp: + rcu_read_unlock(); + page_frag_free(xdp.data); +xdp_xmit: + return NULL; } -static int veth_xdp_rcv(struct veth_priv *priv, int budget) +static int veth_xdp_rcv(struct veth_priv *priv, int budget, bool *xdp_xmit, + bool *xdp_redir) { int i, done = 0; @@ -509,10 +572,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget) if (!ptr) break; - if (veth_is_xdp_frame(ptr)) - skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr)); - else - skb = veth_xdp_rcv_skb(priv, ptr); + if (veth_is_xdp_frame(ptr)) { + skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr), + xdp_xmit, xdp_redir); + } else { + skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit, xdp_redir); + } if (skb) napi_gro_receive(&priv->xdp_napi, skb); @@ -527,9 +592,11 @@ static int veth_poll(struct napi_struct *napi, int budget) { struct veth_priv *priv = container_of(napi, struct veth_priv, xdp_napi); + bool xdp_xmit = false; + bool xdp_redir = false; int done; - done = veth_xdp_rcv(priv, budget); + done = veth_xdp_rcv(priv, budget, &xdp_xmit, &xdp_redir); if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ @@ -540,6 +607,11 @@ static int veth_poll(struct napi_struct *napi, int budget) } } + if (xdp_xmit) + veth_xdp_flush(priv->dev); + if (xdp_redir) + xdp_do_flush_map(); + return done; } @@ -585,6 +657,9 @@ static int veth_open(struct net_device *dev) if (err < 0) goto err_reg_mem; + /* Save original mem info as it can be overwritten */ + priv->xdp_mem = priv->xdp_rxq.mem; + if (rtnl_dereference(priv->xdp_prog)) { err = veth_napi_add(dev); if (err) @@ -615,6 +690,7 @@ static int veth_close(struct net_device *dev) if (rtnl_dereference(priv->xdp_prog)) veth_napi_del(dev); + priv->xdp_rxq.mem = priv->xdp_mem; xdp_rxq_info_unreg(&priv->xdp_rxq); return 0; -- 2.14.3