On Tue, Sep 20, 2016 at 12:55 PM, Jesper Dangaard Brouer <bro...@redhat.com> wrote: > Currently the XDP program is simply a bpf_prog pointer. While it > is good for simplicity, it is limiting extendability for upcoming > features. > Hi Jesper,
Can you take a look (or try) the RFC patches I just posted to generalize XDP. I believe that should subsume most of what you're doing here! Thanks, Tom > Introducing a new struct xdp_prog, that can carry information > related to the XDP program. Notice this approach does not affect > performance (tested and benchmarked), because the extra dereference > for the eBPF program only happens once per 64 packets in the poll > function. > > The features that need this is: > > * Multi-port TX: > Need to know own port index and port lookup table. > > * XDP program per RX queue: > Need setup info about program type, global or specific, due to > replace semantics. > > * Capabilities negotiation: > Need to store information about features program want to use, > in-order to validate this. > > I do realize this new struct xdp_prog features cannot go into the > kernel before one of the three users of the struct is also implemented. > > Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com> > --- > drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 +++--- > drivers/net/ethernet/mellanox/mlx4/en_rx.c | 10 +++-- > drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 - > include/linux/filter.h | 14 +++++++ > include/linux/netdevice.h | 2 - > net/core/dev.c | 15 +++++-- > net/core/filter.c | 50 > ++++++++++++++++++++++++ > 7 files changed, 89 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c > b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c > index 62516f8369ba..f86f65b170f7 100644 > --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c > +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c > @@ -2622,11 +2622,11 @@ static int mlx4_en_set_tx_maxrate(struct net_device > *dev, int queue_index, u32 m > return err; > } > > -static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) > +static int mlx4_xdp_set(struct net_device *dev, struct xdp_prog *prog) > { > struct mlx4_en_priv *priv = netdev_priv(dev); > struct mlx4_en_dev *mdev = priv->mdev; > - struct bpf_prog *old_prog; > + struct xdp_prog *old_prog; > int xdp_ring_num; > int port_up = 0; > int err; > @@ -2639,7 +2639,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct > bpf_prog *prog) > */ > if (priv->xdp_ring_num == xdp_ring_num) { > if (prog) { > - prog = bpf_prog_add(prog, priv->rx_ring_num - 1); > + prog = xdp_prog_add(prog, priv->rx_ring_num); > if (IS_ERR(prog)) > return PTR_ERR(prog); > } > @@ -2650,7 +2650,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct > bpf_prog *prog) > lockdep_is_held(&mdev->state_lock)); > rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); > if (old_prog) > - bpf_prog_put(old_prog); > + xdp_prog_put(old_prog); > } > mutex_unlock(&mdev->state_lock); > return 0; > @@ -2669,7 +2669,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct > bpf_prog *prog) > } > > if (prog) { > - prog = bpf_prog_add(prog, priv->rx_ring_num - 1); > + prog = xdp_prog_add(prog, priv->rx_ring_num); > if (IS_ERR(prog)) > return PTR_ERR(prog); > } > @@ -2690,7 +2690,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct > bpf_prog *prog) > lockdep_is_held(&mdev->state_lock)); > rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); > if (old_prog) > - bpf_prog_put(old_prog); > + xdp_prog_put(old_prog); > } > > if (port_up) { > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c > b/drivers/net/ethernet/mellanox/mlx4/en_rx.c > index c46355bce613..e1182879ea6f 100644 > --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c > +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c > @@ -535,13 +535,13 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, > { > struct mlx4_en_dev *mdev = priv->mdev; > struct mlx4_en_rx_ring *ring = *pring; > - struct bpf_prog *old_prog; > + struct xdp_prog *old_prog; > > old_prog = rcu_dereference_protected( > ring->xdp_prog, > lockdep_is_held(&mdev->state_lock)); > if (old_prog) > - bpf_prog_put(old_prog); > + xdp_prog_put(old_prog); > mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); > vfree(ring->rx_info); > ring->rx_info = NULL; > @@ -783,7 +783,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct > mlx4_en_cq *cq, int bud > struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; > struct mlx4_en_rx_alloc *frags; > struct mlx4_en_rx_desc *rx_desc; > - struct bpf_prog *xdp_prog; > + struct xdp_prog *xdp_prog; > + struct bpf_prog *bpf_prog; > int doorbell_pending; > struct sk_buff *skb; > int tx_index; > @@ -805,6 +806,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct > mlx4_en_cq *cq, int bud > /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ > rcu_read_lock(); > xdp_prog = rcu_dereference(ring->xdp_prog); > + bpf_prog = rcu_dereference(xdp_prog->bpf); > doorbell_pending = 0; > tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; > > @@ -897,7 +899,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct > mlx4_en_cq *cq, int bud > frags[0].page_offset; > xdp.data_end = xdp.data + length; > > - act = bpf_prog_run_xdp(xdp_prog, &xdp); > + act = bpf_prog_run_xdp(bpf_prog, &xdp); > switch (act) { > case XDP_PASS: > break; > diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h > b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h > index a3528dd1e72e..8942201bcc38 100644 > --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h > +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h > @@ -340,7 +340,7 @@ struct mlx4_en_rx_ring { > u8 fcs_del; > void *buf; > void *rx_info; > - struct bpf_prog __rcu *xdp_prog; > + struct xdp_prog __rcu *xdp_prog; > struct mlx4_en_page_cache page_cache; > unsigned long bytes; > unsigned long packets; > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 1f09c521adfe..f1eee2f0f70c 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -437,6 +437,20 @@ struct xdp_buff { > void *data_end; > }; > > +struct xdp_prog { > + u64 flags; > + struct bpf_prog *bpf; > + atomic_t refcnt; > + struct rcu_head rcu; // Do we need RCU freeing? likely right? > + > + /* Data associated with XDP program goes here */ > + > +} ____cacheline_aligned_in_smp; > + > +struct xdp_prog *xdp_prog_alloc(struct bpf_prog *bpf); > +struct xdp_prog *xdp_prog_add(struct xdp_prog *xdp, int users); > +void xdp_prog_put(struct xdp_prog *prog); > + > /* compute the linear packet data range [data, data_end) which > * will be accessed by cls_bpf and act_bpf programs > */ > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index a10d8d18ce19..f9d900e62cd5 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -825,7 +825,7 @@ struct netdev_xdp { > enum xdp_netdev_command command; > union { > /* XDP_SETUP_PROG */ > - struct bpf_prog *prog; > + struct xdp_prog *prog; > /* XDP_QUERY_PROG */ > bool prog_attached; > }; > diff --git a/net/core/dev.c b/net/core/dev.c > index 9dbece2f1296..df3f7d3cf62e 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -6643,23 +6643,30 @@ EXPORT_SYMBOL(dev_change_proto_down); > int dev_change_xdp_fd(struct net_device *dev, int fd) > { > const struct net_device_ops *ops = dev->netdev_ops; > - struct bpf_prog *prog = NULL; > + struct xdp_prog *prog = NULL; > + struct bpf_prog *bpf; > struct netdev_xdp xdp = {}; > int err; > > if (!ops->ndo_xdp) > return -EOPNOTSUPP; > if (fd >= 0) { > - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); > - if (IS_ERR(prog)) > + bpf = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); /* inc refcnt > */ > + if (IS_ERR(bpf)) > + return PTR_ERR(bpf); > + > + prog = xdp_prog_alloc(bpf); > + if (IS_ERR(prog)) { > + bpf_prog_put(prog->bpf); > return PTR_ERR(prog); > + } > } > > xdp.command = XDP_SETUP_PROG; > xdp.prog = prog; > err = ops->ndo_xdp(dev, &xdp); > if (err < 0 && prog) > - bpf_prog_put(prog); > + bpf_prog_put(prog->bpf); > > return err; > } > diff --git a/net/core/filter.c b/net/core/filter.c > index 298b146b47e7..a61ca13b8eaa 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -2912,3 +2912,53 @@ out: > release_sock(sk); > return ret; > } > + > +struct xdp_prog *xdp_prog_alloc(struct bpf_prog *bpf) > +{ > + struct xdp_prog *xdp; > + > + xdp = kzalloc(sizeof(*xdp), GFP_KERNEL); > + if (!xdp) > + return ERR_PTR(-ENOMEM); > + > + /* Note dev_change_xdp_fd() already refcnt inc on bpf prog */ > + xdp->bpf = bpf; > + > + return xdp; > +} > +EXPORT_SYMBOL(xdp_prog_alloc); > + > +struct xdp_prog *xdp_prog_add(struct xdp_prog *xdp, int users) > +{ > + struct bpf_prog *bpf; > + > + bpf = bpf_prog_add(xdp->bpf, users); > + if (IS_ERR(bpf)) { > + return (void*)bpf; /* it is already a PTR_ERR */ > + } > + atomic_add(users, &xdp->refcnt); > + > + return xdp; > +} > +EXPORT_SYMBOL(xdp_prog_add); > + > +void __xdp_prog_put_rcu(struct rcu_head *rcu) > +{ > + struct xdp_prog *xdp = container_of(rcu, struct xdp_prog, rcu); > + > + /* Release reference to (future) xdp_port_table here */ > + > + /* Release refcnt from dev_change_xdp_fd() calling > bpf_prog_get_type()*/ > + bpf_prog_put(xdp->bpf); > + > + kfree(xdp); > +} > + > +void xdp_prog_put(struct xdp_prog *prog) > +{ > + bpf_prog_put(prog->bpf); > + if (atomic_dec_and_test(&prog->refcnt)) { > + call_rcu(&prog->rcu, __xdp_prog_put_rcu); > + } > +} > +EXPORT_SYMBOL(xdp_prog_put); >