From: Dongli Zhang <dongli.zh...@oracle.com> The TUN can be used as vhost-net backend. E.g, the tun_net_xmit() is the interface to forward the skb from TUN to vhost-net/virtio-net.
However, there are many "goto drop" in the TUN driver. Therefore, the kfree_skb_reason() is involved at each "goto drop" to help userspace ftrace/ebpf to track the reason for the loss of packets. The below reasons are introduced: - SKB_DROP_REASON_DEV_READY - SKB_DROP_REASON_NOMEM - SKB_DROP_REASON_HDR_TRUNC - SKB_DROP_REASON_TAP_FILTER - SKB_DROP_REASON_TAP_TXFILTER Cc: Joao Martins <joao.m.mart...@oracle.com> Cc: Joe Jin <joe....@oracle.com> Signed-off-by: Dongli Zhang <dongli.zh...@oracle.com> Signed-off-by: David S. Miller <da...@davemloft.net> Acked-by: Nikolay Borisov <nbori...@suse.com> Signed-off-by: Nikolay Borisov <nikolay.bori...@virtuozzo.com> --- drivers/net/tun.c | 38 ++++++++++++++++++++++++++++++-------- include/linux/skbuff.h | 18 ++++++++++++++++++ include/trace/events/skb.h | 5 +++++ 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 438f790ddc72..c3a459b4439b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1009,6 +1009,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + enum skb_drop_reason drop_reason; int txq = skb->queue_mapping; struct netdev_queue *queue; struct tun_file *tfile; @@ -1018,8 +1019,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ - if (!tfile) + if (!tfile) { + drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; + } if (!rcu_dereference(tun->steering_prog)) tun_automq_xmit(tun, skb); @@ -1029,19 +1032,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Drop if the filter does not like it. * This is a noop if the filter is disabled. * Filter can be enabled only for the TAP devices. */ - if (!check_filter(&tun->txflt, skb)) + if (!check_filter(&tun->txflt, skb)) { + drop_reason = SKB_DROP_REASON_TAP_TXFILTER; goto drop; + } if (tfile->socket.sk->sk_filter && - sk_filter(tfile->socket.sk, skb)) + sk_filter(tfile->socket.sk, skb)) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; + } len = run_ebpf_filter(tun, skb, len); - if (len == 0 || pskb_trim(skb, len)) + if (len == 0) { + drop_reason = SKB_DROP_REASON_TAP_FILTER; goto drop; + } - if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) + if (pskb_trim(skb, len)) { + drop_reason = SKB_DROP_REASON_NOMEM; goto drop; + } + + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; + goto drop; + } skb_tx_timestamp(skb); @@ -1052,8 +1068,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); - if (ptr_ring_produce(&tfile->tx_ring, skb)) + if (ptr_ring_produce(&tfile->tx_ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); @@ -1070,7 +1088,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) drop: atomic_long_inc(&dev->tx_dropped); skb_tx_error(skb); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); rcu_read_unlock(); return NET_XMIT_DROP; } @@ -1666,6 +1684,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); + enum skb_drop_reason drop_reason; if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) @@ -1769,9 +1788,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (err) { err = -EFAULT; + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; drop: atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); if (frags) { tfile->napi.skb = NULL; mutex_unlock(&tfile->napi_mutex); @@ -1818,6 +1838,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, case IFF_TAP: if (frags && !pskb_may_pull(skb, ETH_HLEN)) { err = -ENOMEM; + drop_reason = SKB_DROP_REASON_HDR_TRUNC; goto drop; } skb->protocol = eth_type_trans(skb, tun->dev); @@ -1871,6 +1892,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; rcu_read_unlock(); + drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c20966c41348..057c7b7606c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -422,7 +422,25 @@ enum skb_drop_reason { SKB_DROP_REASON_DEV_HDR, /* device driver specific * header/metadata is invalid */ + /* the device is not ready to xmit/recv due to any of its data + * structure that is not up/ready/initialized, e.g., the IFF_UP is + * not set, or driver specific tun->tfiles[txq] is not initialized + */ + SKB_DROP_REASON_DEV_READY, SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ + SKB_DROP_REASON_NOMEM, /* error due to OOM */ + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header + * from networking data, e.g., failed + * to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly + * attached to tun/tap, e.g., via + * TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented + * at tun/tap, e.g., check_filter() + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index bc9a69db4303..81d62df3f1ed 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -55,7 +55,12 @@ EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ + EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ + EM(SKB_DROP_REASON_NOMEM, NOMEM) \ + EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ + EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ + EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- 2.34.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel