From: Björn Töpel <bjorn.to...@intel.com> The bpf_xsk_redirect function is a new redirect bpf function, in addition to bpf_redirect/bpf_redirect_map. If an XDP socket has been attached to a netdev Rx queue via the XDP_ATTACH bind() option and bpf_xsk_redirect is called, the packet will be redirected to the attached socket.
The bpf_xsk_redirect function returns XDP_REDIRECT if there is a socket attached to the originated queue, otherwise XDP_PASS. This commit also adds the corresponding trace points for the redirect call. Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- include/linux/filter.h | 4 ++ include/trace/events/xdp.h | 61 ++++++++++++++++++++++ include/uapi/linux/bpf.h | 14 +++++- net/core/filter.c | 100 +++++++++++++++++++++++++++++++++++++ 4 files changed, 178 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d16deead65c6..691b5c1003c8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -525,6 +525,10 @@ struct bpf_redirect_info { u32 flags; struct bpf_map *map; struct bpf_map *map_to_flush; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_sock *xsk; + struct xdp_sock *xsk_to_flush; +#endif u32 kern_flags; }; diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index e95cb86b65cf..30f399bd462b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -158,6 +158,67 @@ struct _bpf_dtab_netdev { trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \ err, map, idx) +DECLARE_EVENT_CLASS(xsk_redirect_template, + + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int err, + struct xdp_buff *xbuff), + + TP_ARGS(dev, xdp, err, xbuff), + + TP_STRUCT__entry( + __field(int, prog_id) + __field(u32, act) + __field(int, ifindex) + __field(int, err) + __field(u32, queue_index) + __field(enum xdp_mem_type, mem_type) + ), + + TP_fast_assign( + __entry->prog_id = xdp->aux->id; + __entry->act = XDP_REDIRECT; + __entry->ifindex = dev->ifindex; + __entry->err = err; + __entry->queue_index = xbuff->rxq->queue_index; + __entry->mem_type = xbuff->rxq->mem.type; + ), + + TP_printk("prog_id=%d action=%s ifindex=%d err=%d queue_index=%d" + " mem_type=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, + __entry->err, + __entry->queue_index, + __entry->mem_type) +); + +DEFINE_EVENT(xsk_redirect_template, xsk_redirect, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int err, + struct xdp_buff *xbuff), + + TP_ARGS(dev, xdp, err, xbuff) +); + +DEFINE_EVENT(xsk_redirect_template, xsk_redirect_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int err, + struct xdp_buff *xbuff), + + TP_ARGS(dev, xdp, err, xbuff) +); + +#define _trace_xsk_redirect(dev, xdp, xbuff) \ + trace_xsk_redirect(dev, xdp, 0, xbuff) + +#define _trace_xsk_redirect_err(dev, xdp, xbuff, err) \ + trace_xsk_redirect_err(dev, xdp, err, xbuff) + TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a84fd232d934..2912d87a39ba 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2298,6 +2298,17 @@ union bpf_attr { * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_xsk_redirect(struct xdp_buff *xdp_md) + * Description + * Redirect the packet to the attached XDP socket, if any. + * An XDP socket can be attached to a network interface Rx + * queue by passing the XDP_ATTACH option at bind point of + * the socket. + * + * Return + * **XDP_REDIRECT** if there is an XDP socket attached to the Rx + * queue receiving the frame, otherwise **XDP_PASS**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2391,7 +2402,8 @@ union bpf_attr { FN(map_pop_elem), \ FN(map_peek_elem), \ FN(msg_push_data), \ - FN(msg_pop_data), + FN(msg_pop_data), \ + FN(xsk_redirect), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 3d54af4c363d..86c5fe5a9ec0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3415,6 +3415,17 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, return 0; } +static void xdp_do_flush_xsk(struct bpf_redirect_info *ri) +{ +#ifdef CONFIG_XDP_SOCKETS + struct xdp_sock *xsk = ri->xsk_to_flush; + + ri->xsk_to_flush = NULL; + if (xsk) + xsk_flush(xsk); +#endif +} + void xdp_do_flush_map(void) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -3436,6 +3447,8 @@ void xdp_do_flush_map(void) break; } } + + xdp_do_flush_xsk(ri); } EXPORT_SYMBOL_GPL(xdp_do_flush_map); @@ -3501,6 +3514,30 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, return err; } +#ifdef CONFIG_XDP_SOCKETS +static int xdp_do_xsk_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, + struct bpf_redirect_info *ri) +{ + struct xdp_sock *xsk = ri->xsk; + int err; + + ri->xsk = NULL; + ri->xsk_to_flush = xsk; + + err = xsk_attached_rcv(xsk, xdp); + if (unlikely(err)) + goto err; + + _trace_xsk_redirect(dev, xdp_prog, xdp); + return 0; + +err: + _trace_xsk_redirect_err(dev, xdp_prog, xdp, err); + return err; +} +#endif /* CONFIG_XDP_SOCKETS */ + int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { @@ -3510,6 +3547,10 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, if (likely(map)) return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri); +#ifdef CONFIG_XDP_SOCKETS + if (ri->xsk) + return xdp_do_xsk_redirect(dev, xdp, xdp_prog, ri); +#endif return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri); } EXPORT_SYMBOL_GPL(xdp_do_redirect); @@ -3560,6 +3601,33 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, return err; } +#ifdef CONFIG_XDP_SOCKETS +static int xdp_do_generic_xsk_redirect(struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, + struct sk_buff *skb) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct xdp_sock *xsk = ri->xsk; + int err; + + ri->xsk = NULL; + ri->xsk_to_flush = NULL; + + err = xsk_generic_attached_rcv(xsk, xdp); + if (err) + goto err; + + consume_skb(skb); + _trace_xsk_redirect(dev, xdp_prog, xdp); + return 0; + +err: + _trace_xsk_redirect_err(dev, xdp_prog, xdp, err); + return err; +} +#endif /* CONFIG_XDP_SOCKETS */ + int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { @@ -3572,6 +3640,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, if (map) return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, map); +#ifdef CONFIG_XDP_SOCKETS + if (ri->xsk) + return xdp_do_generic_xsk_redirect(dev, xdp, xdp_prog, skb); +#endif + ri->ifindex = 0; fwd = dev_get_by_index_rcu(dev_net(dev), index); if (unlikely(!fwd)) { @@ -3639,6 +3712,29 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_XDP_SOCKETS +BPF_CALL_1(bpf_xdp_xsk_redirect, struct xdp_buff *, xdp) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct xdp_sock *xsk; + + xsk = READ_ONCE(xdp->rxq->dev->_rx[xdp->rxq->queue_index].xsk); + if (xsk) { + ri->xsk = xsk; + return XDP_REDIRECT; + } + + return XDP_PASS; +} + +static const struct bpf_func_proto bpf_xdp_xsk_redirect_proto = { + .func = bpf_xdp_xsk_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; +#endif /* CONFIG_XDP_SOCKETS */ + static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, unsigned long off, unsigned long len) { @@ -5510,6 +5606,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_sk_lookup_tcp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; +#endif +#ifdef CONFIG_XDP_SOCKETS + case BPF_FUNC_xsk_redirect: + return &bpf_xdp_xsk_redirect_proto; #endif default: return bpf_base_func_proto(func_id); -- 2.19.1