From: Magnus Karlsson <magnus.karls...@intel.com>

Here the bind syscall is implemented. Also, two frame receive
functions are introduced: xsk_rcv and xsk_generic_rcv. The latter is
used for the XDP_SKB path, and the first is used for XDP_DRV.

Later commits will wire up the receive functions.

Signed-off-by: Magnus Karlsson <magnus.karls...@intel.com>
---
 include/linux/netdevice.h |   3 +
 net/xdp/xsk.c             | 211 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 210 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4c77f39ebd65..36cc7e92bd8e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -688,6 +688,9 @@ struct netdev_rx_queue {
        struct kobject                  kobj;
        struct net_device               *dev;
        struct xdp_rxq_info             xdp_rxq;
+#ifdef CONFIG_XDP_SOCKETS
+       struct xdp_sock __rcu           *xs;
+#endif
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 34294ac2f75f..db918e31079b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -34,8 +34,11 @@
 #include "xsk_ring.h"
 
 #define XSK_UMEM_MIN_FRAME_SIZE 2048
+#define XSK_ARRAY_SIZE 512
 
 struct xsk_info {
+       struct xsk_packet_array *pa;
+       spinlock_t pa_lock;
        struct xsk_queue *q;
        struct xsk_umem *umem;
        struct socket *mrsock;
@@ -47,7 +50,10 @@ struct xdp_sock {
        struct sock sk;
        struct xsk_info rx;
        struct xsk_info tx;
+       struct net_device *dev;
        struct xsk_umem *umem;
+       u32 ifindex;
+       u16 queue_id;
 };
 
 static struct xdp_sock *xdp_sk(struct sock *sk)
@@ -330,9 +336,21 @@ static int xsk_release(struct socket *sock)
        sock_prot_inuse_add(net, sk->sk_prot, -1);
        local_bh_enable();
 
-       xsk_umem_destroy(xs->umem);
-       xskq_destroy(xs->rx.q);
-       xskq_destroy(xs->tx.q);
+       if (xs->dev) {
+               struct xdp_sock *xs_prev;
+
+               xs_prev = xs->dev->_rx[xs->queue_id].xs;
+               rcu_assign_pointer(xs->dev->_rx[xs->queue_id].xs, NULL);
+
+               /* Wait for driver to stop using the xdp socket. */
+               synchronize_net();
+
+               xskpa_destroy(xs->rx.pa);
+               xsk_umem_destroy(xs_prev->umem);
+               xskq_destroy(xs_prev->rx.q);
+               kobject_put(&xs_prev->dev->_rx[xs->queue_id].kobj);
+               dev_put(xs_prev->dev);
+       }
 
        sock_orphan(sk);
        sock->sk = NULL;
@@ -345,8 +363,193 @@ static int xsk_release(struct socket *sock)
 
 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
-       return -EOPNOTSUPP;
+       struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
+       struct net_device *dev_curr;
+       struct net_device *dev;
+       int err = 0;
+
+       if (addr_len < sizeof(struct sockaddr_xdp))
+               return -EINVAL;
+       if (sxdp->sxdp_family != AF_XDP)
+               return -EINVAL;
+
+       lock_sock(sk);
+       dev_curr = xs->dev;
+       dev = dev_get_by_index_rcu(sock_net(sk), sxdp->sxdp_ifindex);
+       if (!dev) {
+               err = -ENODEV;
+               goto out_unlock;
+       }
+       dev_hold(dev);
+
+       if (dev_curr && dev_curr != dev) {
+               /* XXX Needs rebind code here */
+               err = -EBUSY;
+               goto out_unlock;
+       }
+
+       if (!xs->rx.q || !xs->tx.q) {
+               /* XXX For now require Tx and Rx */
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
+       if (sxdp->sxdp_queue_id > dev->num_rx_queues) {
+               err = -EINVAL;
+               goto out_unlock;
+       }
+       kobject_get(&dev->_rx[sxdp->sxdp_queue_id].kobj);
+
+       xs->dev = dev;
+       xs->ifindex = sxdp->sxdp_ifindex;
+       xs->queue_id = sxdp->sxdp_queue_id;
+       spin_lock_init(&xs->rx.pa_lock);
+
+       /* Rx */
+       xs->rx.buff_info = xsk_buff_info_create(xs->rx.umem);
+       if (!xs->rx.buff_info) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+       xskq_set_buff_info(xs->rx.q, xs->rx.buff_info, XSK_VALIDATION_RX);
+
+       /* Rx packet array is used for copy semantics... */
+       xs->rx.pa = xskpa_create((struct xsk_user_queue *)xs->rx.q,
+                                xs->rx.buff_info, XSK_ARRAY_SIZE);
+       if (!xs->rx.pa) {
+               err = -ENOMEM;
+               goto out_rx_pa;
+       }
+
+       rcu_assign_pointer(dev->_rx[sxdp->sxdp_queue_id].xs, xs);
+
+       goto out_unlock;
+
+out_rx_pa:
+       xsk_buff_info_destroy(xs->rx.buff_info);
+       xs->rx.buff_info = NULL;
+out_unlock:
+       if (err)
+               dev_put(dev);
+       release_sock(sk);
+       if (dev_curr)
+               dev_put(dev_curr);
+       return err;
+}
+
+static inline struct xdp_sock *lookup_xsk(struct net_device *dev,
+                                         unsigned int queue_id)
+{
+       if (unlikely(queue_id > dev->num_rx_queues))
+               return NULL;
+
+       return rcu_dereference(dev->_rx[queue_id].xs);
+}
+
+int xsk_generic_rcv(struct xdp_buff *xdp)
+{
+       u32 len = xdp->data_end - xdp->data;
+       struct xsk_frame_set p;
+       struct xdp_sock *xsk;
+       bool ok;
+
+       rcu_read_lock();
+       xsk = lookup_xsk(xdp->rxq->dev, xdp->rxq->queue_index);
+       if (unlikely(!xsk)) {
+               rcu_read_unlock();
+               return -EINVAL;
+       }
+
+       spin_lock(&xsk->rx.pa_lock);
+       ok = xskpa_next_frame_populate(xsk->rx.pa, &p);
+       spin_unlock(&xsk->rx.pa_lock);
+
+       if (!ok) {
+               rcu_read_unlock();
+               return -ENOSPC;
+       }
+
+       memcpy(xskf_get_data(&p), xdp->data, len);
+       xskf_set_frame_no_offset(&p, len, true);
+       spin_lock(&xsk->rx.pa_lock);
+       xskpa_flush(xsk->rx.pa);
+       spin_unlock(&xsk->rx.pa_lock);
+       rcu_read_unlock();
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(xsk_generic_rcv);
+
+struct xdp_sock *xsk_rcv(struct xdp_sock *xsk, struct xdp_buff *xdp)
+{
+       u32 len = xdp->data_end - xdp->data;
+       struct xsk_frame_set p;
+
+       rcu_read_lock();
+       if (!xsk)
+               xsk = lookup_xsk(xdp->rxq->dev, xdp->rxq->queue_index);
+       if (unlikely(!xsk)) {
+               rcu_read_unlock();
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (!xskpa_next_frame_populate(xsk->rx.pa, &p)) {
+               rcu_read_unlock();
+               return ERR_PTR(-ENOSPC);
+       }
+
+       memcpy(xskf_get_data(&p), xdp->data, len);
+       xskf_set_frame_no_offset(&p, len, true);
+       rcu_read_unlock();
+
+       /* We assume that the semantic of xdp_do_redirect is such that
+        * ndo_xdp_xmit will decrease the refcount of the page when it
+        * is done with the page. Thus, if we want to guarantee the
+        * existence of the page in the calling driver, we need to
+        * bump the refcount. Unclear what the correct semantic is
+        * supposed to be.
+        */
+       page_frag_free(xdp->data);
+
+       return xsk;
+}
+EXPORT_SYMBOL_GPL(xsk_rcv);
+
+int xsk_zc_rcv(struct xdp_sock *xsk, struct xdp_buff *xdp)
+{
+       u32 offset = xdp->data - xdp->data_hard_start;
+       u32 len = xdp->data_end - xdp->data;
+       struct xsk_frame_set p;
+
+       /* We do not need any locking here since we are guaranteed
+        * a single producer and a single consumer.
+        */
+       if (xskpa_next_frame_populate(xsk->rx.pa, &p)) {
+               xskf_set_frame(&p, len, offset, true);
+               return 0;
+       }
+
+       /* No user-space buffer to put the packet in. */
+       return -ENOSPC;
+}
+EXPORT_SYMBOL_GPL(xsk_zc_rcv);
+
+void xsk_flush(struct xdp_sock *xsk)
+{
+       rcu_read_lock();
+       if (!xsk)
+               xsk = lookup_xsk(xsk->dev, xsk->queue_id);
+       if (unlikely(!xsk)) {
+               rcu_read_unlock();
+               return;
+       }
+
+       WARN_ON_ONCE(xskpa_flush(xsk->rx.pa));
+       rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(xsk_flush);
 
 static unsigned int xsk_poll(struct file *file, struct socket *sock,
                             struct poll_table_struct *wait)
-- 
2.14.1

Reply via email to