On Thu, Nov 27, 2025 at 11:24:03AM +0800, Longjun Tang wrote:
> From: Tang Longjun <[email protected]>
> 
> track skb and virtqueue through the kprobe start_xmit function
> 
> Signed-off-by: Tang Longjun <[email protected]>
> ---
>  tools/virtio/virtnet_mon/virtnet_mon.c | 793 ++++++++++++++++++++++++-
>  1 file changed, 772 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/virtio/virtnet_mon/virtnet_mon.c 
> b/tools/virtio/virtnet_mon/virtnet_mon.c
> index 696e621cf803..36b51d0a13d4 100644
> --- a/tools/virtio/virtnet_mon/virtnet_mon.c
> +++ b/tools/virtio/virtnet_mon/virtnet_mon.c
> @@ -6,15 +6,724 @@
>  #include <linux/uaccess.h>
>  #include <linux/miscdevice.h>
>  #include <linux/poll.h>
> +#include <linux/string.h>
> +#include <linux/if_ether.h>
> +
> +#include <linux/kprobes.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
> +#include <linux/icmp.h>
> +#include <linux/icmpv6.h>
> +#include <linux/version.h>
> +#include <linux/time.h>
> +#include <linux/smp.h>
> +#include <linux/virtio.h>
> +#include <linux/scatterlist.h>
> +#include <linux/bpf.h>
> +#include <linux/dim.h>
> +#include <linux/mutex.h>
> +#include <linux/workqueue.h>
> +#include <linux/spinlock.h>
> +
> +#include <linux/u64_stats_sync.h>
> +#include <linux/mm_types_task.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <net/xdp.h>
> +
>  
>  #define DEVICE_NAME "virtnet_mon"
> -#define KFIFO_SIZE 1024     // ring buffer size
> +#define KFIFO_SIZE 65536     // ring buffer size
> +#define WRITE_SIZE 1024
> +#define READ_SIZE 16384
> +#define LINE_MAX_SIZE 1024
> +
> +#if defined(CONFIG_X86_64)
> +#define KP_GET_ARG(regs, idx) \
> +     ((idx) == 0 ? (unsigned long)(regs)->di : \
> +     (idx) == 1 ? (unsigned long)(regs)->si : 0UL)
> +#elif defined(CONFIG_ARM64)
> +#define KP_GET_ARG(regs, idx) \
> +     ((idx) < 8 ? (unsigned long)(regs)->regs[(idx)] : 0UL)
> +#endif
> +
> +struct _virtnet_sq_stats {
> +     struct u64_stats_sync syncp;
> +     u64_stats_t packets;
> +     u64_stats_t bytes;
> +     u64_stats_t xdp_tx;
> +     u64_stats_t xdp_tx_drops;
> +     u64_stats_t kicks;
> +     u64_stats_t tx_timeouts;
> +     u64_stats_t stop;
> +     u64_stats_t wake;
> +};
> +
> +struct _virtnet_interrupt_coalesce {
> +     u32 max_packets;
> +     u32 max_usecs;
> +};
> +
> +struct _send_queue {
> +     /* Virtqueue associated with this send _queue */
> +     struct virtqueue *vq;
> +
> +     /* TX: fragments + linear part + virtio header */
> +     struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> +     /* Name of the send queue: output.$index */
> +     char name[16];
> +
> +     struct _virtnet_sq_stats stats;
> +
> +     struct _virtnet_interrupt_coalesce intr_coal;
> +
> +     struct napi_struct napi;
> +
> +     /* Record whether sq is in reset state. */
> +     bool reset;
> +
> +     struct xsk_buff_pool *xsk_pool;
> +
> +     dma_addr_t xsk_hdr_dma_addr;
> +};
> +
> +struct _virtnet_rq_stats {
> +     struct u64_stats_sync syncp;
> +     u64_stats_t packets;
> +     u64_stats_t bytes;
> +     u64_stats_t drops;
> +     u64_stats_t xdp_packets;
> +     u64_stats_t xdp_tx;
> +     u64_stats_t xdp_redirects;
> +     u64_stats_t xdp_drops;
> +     u64_stats_t kicks;
> +};
> +
> +struct _ewma_pkt_len {
> +     unsigned long internal;
> +};
> +
> +struct _virtnet_rq_dma {
> +     dma_addr_t addr;
> +     u32 ref;
> +     u16 len;
> +     u16 need_sync;
> +};
> +
> +struct _receive_queue {
> +     /* Virtqueue associated with this receive_queue */
> +     struct virtqueue *vq;
> +
> +     struct napi_struct napi;
> +
> +     struct bpf_prog __rcu *xdp_prog;
> +
> +     struct _virtnet_rq_stats stats;
> +
> +     /* The number of rx notifications */
> +     u16 calls;
> +
> +     /* Is dynamic interrupt moderation enabled? */
> +     bool dim_enabled;
> +
> +     /* Used to protect dim_enabled and inter_coal */
> +     struct mutex dim_lock;
> +
> +     /* Dynamic Interrupt Moderation */
> +     struct dim dim;
> +
> +     u32 packets_in_napi;
> +
> +     struct _virtnet_interrupt_coalesce intr_coal;
> +
> +     /* Chain pages by the private ptr. */
> +     struct page *pages;
> +
> +     /* Average packet length for mergeable receive buffers. */
> +     struct _ewma_pkt_len mrg_avg_pkt_len;
> +
> +     /* Page frag for packet buffer allocation. */
> +     struct page_frag alloc_frag;
> +
> +     /* RX: fragments + linear part + virtio header */
> +     struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> +     /* Min single buffer size for mergeable buffers case. */
> +     unsigned int min_buf_len;
> +
> +     /* Name of this receive queue: input.$index */
> +     char name[16];
> +
> +     struct xdp_rxq_info xdp_rxq;
> +
> +     /* Record the last dma info to free after new pages is allocated. */
> +     struct _virtnet_rq_dma *last_dma;
> +
> +     struct xsk_buff_pool *xsk_pool;
> +
> +     /* xdp rxq used by xsk */
> +     struct xdp_rxq_info xsk_rxq_info;
> +
> +     struct xdp_buff **xsk_buffs;
> +};
> +
> +#define VIRTIO_NET_RSS_MAX_KEY_SIZE     40
> +
> +struct _control_buf {
> +     struct virtio_net_ctrl_hdr hdr;
> +     virtio_net_ctrl_ack status;
> +};
> +
> +struct _virtnet_info {
> +     struct virtio_device *vdev;
> +     struct virtqueue *cvq;
> +     struct net_device *dev;
> +     struct _send_queue *sq;
> +     struct _receive_queue *rq;
> +     unsigned int status;
> +
> +     /* Max # of queue pairs supported by the device */
> +     u16 max_queue_pairs;
> +
> +     /* # of queue pairs currently used by the driver */
> +     u16 curr_queue_pairs;
> +
> +     /* # of XDP queue pairs currently used by the driver */
> +     u16 xdp_queue_pairs;
> +
> +     /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
> +     bool xdp_enabled;
> +
> +     /* I like... big packets and I cannot lie! */
> +     bool big_packets;
> +
> +     /* number of sg entries allocated for big packets */
> +     unsigned int big_packets_num_skbfrags;
> +
> +     /* Host will merge rx buffers for big packets (shake it! shake it!) */
> +     bool mergeable_rx_bufs;
> +
> +     /* Host supports rss and/or hash report */
> +     bool has_rss;
> +     bool has_rss_hash_report;
> +     u8 rss_key_size;
> +     u16 rss_indir_table_size;
> +     u32 rss_hash_types_supported;
> +     u32 rss_hash_types_saved;
> +     struct virtio_net_rss_config_hdr *rss_hdr;
> +     struct virtio_net_rss_config_trailer rss_trailer;
> +     u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> +
> +     /* Has control virtqueue */
> +     bool has_cvq;
> +
> +     /* Lock to protect the control VQ */
> +     struct mutex cvq_lock;
> +
> +     /* Host can handle any s/g split between our header and packet data */
> +     bool any_header_sg;
> +
> +     /* Packet virtio header size */
> +     u8 hdr_len;
> +
> +     /* Work struct for delayed refilling if we run low on memory. */
> +     struct delayed_work refill;
> +
> +     /* UDP tunnel support */
> +     bool tx_tnl;
> +
> +     bool rx_tnl;
> +
> +     bool rx_tnl_csum;
> +
> +     /* Is delayed refill enabled? */
> +     bool refill_enabled;
> +
> +     /* The lock to synchronize the access to refill_enabled */
> +     spinlock_t refill_lock;
> +
> +     /* Work struct for config space updates */
> +     struct work_struct config_work;
> +
> +     /* Work struct for setting rx mode */
> +     struct work_struct rx_mode_work;
> +
> +     /* OK to queue work setting RX mode? */
> +     bool rx_mode_work_enabled;
> +
> +     /* Does the affinity hint is set for virtqueues? */
> +
> +     bool affinity_hint_set;
> +
> +     /* CPU hotplug instances for online & dead */
> +
> +     struct hlist_node node;
> +
> +     struct hlist_node node_dead;
> +
> +     struct _control_buf *ctrl;
> +
> +     /* Ethtool settings */
> +     u8 duplex;
> +     u32 speed;
> +
> +     /* Is rx dynamic interrupt moderation enabled? */
> +     bool rx_dim_enabled;
> +
> +     /* Interrupt coalescing settings */
> +     struct _virtnet_interrupt_coalesce intr_coal_tx;
> +     struct _virtnet_interrupt_coalesce intr_coal_rx;
> +
> +     unsigned long guest_offloads;
> +     unsigned long guest_offloads_capable;
> +
> +     /* failover when STANDBY feature enabled */
> +     struct failover *failover;
> +
> +     u64 device_stats_cap;
> +};
> +
> +
> +struct _vring_desc_state_split {
> +     void *data;                     /* Data for callback. */
> +     struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
> +};
> +
> +struct _vring_desc_extra {
> +     dma_addr_t addr;                /* Descriptor DMA addr. */
> +     u32 len;                        /* Descriptor length. */
> +     u16 flags;                      /* Descriptor flags. */
> +     u16 next;                       /* The next desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_split {
> +     /* Actual memory layout for this queue. */
> +     struct vring vring;
> +
> +     /* Last written value to avail->flags */
> +     u16 avail_flags_shadow;
> +
> +     /*
> +      * Last written value to avail->idx in
> +      * guest byte order.
> +      */
> +     u16 avail_idx_shadow;
> +
> +     /* Per-descriptor state. */
> +     struct _vring_desc_state_split *desc_state;
> +     struct _vring_desc_extra *desc_extra;
> +
> +     /* DMA address and size information */
> +     dma_addr_t queue_dma_addr;
> +     size_t queue_size_in_bytes;
> +
> +     /*
> +      * The parameters for creating vrings are reserved for creating new
> +      * vring.
> +      */
> +     u32 vring_align;
> +     bool may_reduce_num;
> +};
> +
> +struct _vring_desc_state_packed {
> +     void *data;                     /* Data for callback. */
> +     struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
> +     u16 num;                        /* Descriptor list length. */
> +     u16 last;                       /* The last desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_packed {
> +     /* Actual memory layout for this queue. */
> +     struct {
> +             unsigned int num;
> +             struct vring_packed_desc *desc;
> +             struct vring_packed_desc_event *driver;
> +             struct vring_packed_desc_event *device;
> +     } vring;
> +
> +     /* Driver ring wrap counter. */
> +     bool avail_wrap_counter;
> +
> +     /* Avail used flags. */
> +     u16 avail_used_flags;
> +
> +     /* Index of the next avail descriptor. */
> +     u16 next_avail_idx;
> +
> +     /*
> +      * Last written value to driver->flags in
> +      * guest byte order.
> +      */
> +     u16 event_flags_shadow;
> +
> +     /* Per-descriptor state. */
> +     struct _vring_desc_state_packed *desc_state;
> +     struct _vring_desc_extra *desc_extra;
> +
> +     /* DMA address and size information */
> +     dma_addr_t ring_dma_addr;
> +     dma_addr_t driver_event_dma_addr;
> +     dma_addr_t device_event_dma_addr;
> +     size_t ring_size_in_bytes;
> +     size_t event_size_in_bytes;
> +};
> +
> +struct _vring_virtqueue {
> +     struct virtqueue vq;
> +
> +     /* Is this a packed ring? */
> +     bool packed_ring;
> +
> +     /* Is DMA API used? */
> +     bool use_dma_api;
> +
> +     /* Can we use weak barriers? */
> +     bool weak_barriers;
> +
> +     /* Other side has made a mess, don't try any more. */
> +     bool broken;
> +
> +     /* Host supports indirect buffers */
> +     bool indirect;
> +
> +     /* Host publishes avail event idx */
> +     bool event;
> +
> +     /* Head of free buffer list. */
> +     unsigned int free_head;
> +     /* Number we've added since last sync. */
> +     unsigned int num_added;
> +
> +     /* Last used index  we've seen.
> +      * for split ring, it just contains last used index
> +      * for packed ring:
> +      * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
> +      * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap 
> counter.
> +      */
> +     u16 last_used_idx;
>  
> -static DEFINE_KFIFO(virtnet_mon_kfifo, char, KFIFO_SIZE);
> +     /* Hint for event idx: already triggered no need to disable. */
> +     bool event_triggered;
> +
> +     union {
> +             /* Available for split ring */
> +             struct _vring_virtqueue_split split;
> +
> +             /* Available for packed ring */
> +             struct _vring_virtqueue_packed packed;
> +     };
> +
> +     /* How to notify other side. FIXME: commonalize hcalls! */
> +     bool (*notify)(struct virtqueue *vq);
> +
> +     /* DMA, allocation, and size information */
> +     bool we_own_ring;
> +
> +     union virtio_map map;
> +};
> +
> +/* RX or TX */
> +enum pkt_dir {
> +     PKT_DIR_UN = 0,      /* Unknown */
> +     PKT_DIR_RX = 1,           /* RX */
> +     PKT_DIR_TX = 2,           /* TX */
> +     PKT_DIR_MAX
> +};
> +
> +enum event_type {
> +     START_XMIT_PRE_EVENT = 1,
> +     START_XMIT_POST_EVENT = 2,
> +};
> +
> +struct iph_info {
> +     struct sk_buff *skb;        /* SKB */
> +     u8 iph_proto;    /* iph protocol type */
> +     u32 seq;         /* absolute sequence number */
> +};
> +
> +struct queue_info {
> +     struct virtqueue *vq;
> +     char name[16];
> +     unsigned int num_free;
> +     unsigned int num;
> +     __virtio16 avail_flags;
> +     __virtio16 avail_idx;
> +     u16 avail_flags_shadow;
> +     u16 avail_idx_shadow;
> +     __virtio16 used_flags;
> +     __virtio16 used_idx;
> +     u16 last_used_idx;
> +     bool broken;
> +};


Not at all excited about all the code duplication going on here.


Reply via email to