len)
                metalen = xdp->data - xdp->data_meta;
        }

-       to_buf = xdp_umem_get_data(xs->umem, addr);
-       memcpy(to_buf, from_buf, len + metalen);
-       addr += metalen;
+       __xsk_rcv_memcpy(xs->umem, addr + offset, from_buf, len, metalen);
+
+       offset += metalen;
+       if (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+               addr |= offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+       else
+               addr += offset;
        err = xskq_produce_batch_desc(xs->rx, addr, len);
        if (!err) {
                xskq_discard_addr(xs->umem->fq);
@@ -127,6 +152,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
        u32 len = xdp->data_end - xdp->data;
        void *buffer;
        u64 addr;
+       u64 offset = xs->umem->headroom;
        int err;

        spin_lock_bh(&xs->rx_lock);
@@ -136,17 +162,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
                goto out_unlock;
        }

-       if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+       if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
            len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
                err = -ENOSPC;
                goto out_drop;
        }

-       addr += xs->umem->headroom;
-
-       buffer = xdp_umem_get_data(xs->umem, addr);
+       buffer = xdp_umem_get_data(xs->umem, addr + offset);
        memcpy(buffer, xdp->data_meta, len + metalen);
-       addr += metalen;
+       offset += metalen;
+
+       addr = xsk_umem_handle_offset(xs->umem, addr, offset);
        err = xskq_produce_batch_desc(xs->rx, addr, len);
        if (err)
                goto out_drop;
@@ -190,7 +216,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)

        rcu_read_lock();
        list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
-               if (!xskq_peek_desc(xs->tx, desc))
+               if (!xskq_peek_desc(xs->tx, desc, umem))
                        continue;

                if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -243,7 +269,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
        if (xs->queue_id >= xs->dev->real_num_tx_queues)
                goto out;

-       while (xskq_peek_desc(xs->tx, &desc)) {
+       while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
                char *buffer;
                u64 addr;
                u32 len;
@@ -262,6 +288,10 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,

                skb_put(skb, len);
                addr = desc.addr;
+               if (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+                       addr = (addr & XSK_UNALIGNED_BUF_ADDR_MASK) |
+                               (addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);

This doesn't look right to me. Shouldn't it be "(addr & mask) + (addr >> shift)"?
I'd also prefer to see this type of logic in an inline/macro

+
                buffer = xdp_umem_get_data(xs->umem, addr);
                err = skb_store_bits(skb, 0, buffer, len);
                if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
@@ -272,7 +302,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
                skb->dev = xs->dev;
                skb->priority = sk->sk_priority;
                skb->mark = sk->sk_mark;
-               skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+               skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
                skb->destructor = xsk_destruct_skb;

                err = dev_direct_xmit(skb, xs->queue_id);
@@ -412,6 +442,28 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
        return sock;
 }

+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+       int i;
+
+       if (flags & XDP_ZEROCOPY) {
+               for (i = 0; i < umem->npgs - 1; i++)
+                       umem->pages[i].next_pg_contig =
+                                       (umem->pages[i].dma + PAGE_SIZE ==
+                                               umem->pages[i + 1].dma);
+               return;
+       }
+
+       for (i = 0; i < umem->npgs - 1; i++)
+               umem->pages[i].next_pg_contig =
+                               (umem->pages[i].addr + PAGE_SIZE ==
+                                       umem->pages[i + 1].addr);
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
        struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -500,6 +552,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
                if (err)
                        goto out_unlock;
+
+               xsk_check_page_contiguity(xs->umem, flags);
        }

        xs->dev = dev;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..9986a759fe06 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
        du.id = umem->id;
        du.size = umem->size;
        du.num_pages = umem->npgs;
-       du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+       du.chunk_size = umem->chunk_size_nohr + umem->headroom;
        du.headroom = umem->headroom;
        du.ifindex = umem->dev ? umem->dev->ifindex : 0;
        du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 909c5168ed0f..0d77212367f0 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -133,6 +133,16 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)

 /* UMEM queue */

+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+                                             u64 length)
+{
+       bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+       bool next_pg_contig =
+               umem->pages[(addr >> PAGE_SHIFT) + 1].next_pg_contig;
+
+       return cross_pg && !next_pg_contig;
+}
+
 static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
 {
        if (addr >= q->size) {
@@ -143,23 +153,50 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
        return true;
 }

-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+                                               u64 length,
+                                               struct xdp_umem *umem)
+{
+       addr += addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+       addr &= XSK_UNALIGNED_BUF_ADDR_MASK;
+       if (addr >= q->size ||
+           xskq_crosses_non_contig_pg(umem, addr, length)) {
+               q->invalid_descs++;
+               return false;
+       }
+
+       return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+                                     struct xdp_umem *umem)
 {
        while (q->cons_tail != q->cons_head) {
                struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
                unsigned int idx = q->cons_tail & q->ring_mask;

                *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask> +
+               if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {
+                       if (xskq_is_valid_addr_unaligned(q, *addr,
+                                                        umem->chunk_size_nohr,
+                                                        umem))
+                               return addr;
+                       goto out;
+               }
+
                if (xskq_is_valid_addr(q, *addr))
                        return addr;

+out:
                q->cons_tail++;
        }

        return NULL;
 }

-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+                                 struct xdp_umem *umem)
 {
        if (q->cons_tail == q->cons_head) {
                smp_mb(); /* D, matches A */
@@ -170,7 +207,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
                smp_rmb();
        }

-       return xskq_validate_addr(q, addr);
+       return xskq_validate_addr(q, addr, umem);
 }

 static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -229,8 +266,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)

 /* Rx/Tx queue */

-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) +static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+                                     struct xdp_umem *umem)
 {
+       if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {
+               if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+                       return false;
+
+               if (d->len > umem->chunk_size_nohr || d->options) {
+                       q->invalid_descs++;
+                       return false;
+               }
+
+               return true;
+       }
+
        if (!xskq_is_valid_addr(q, d->addr))
                return false;

@@ -244,14 +294,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
 }

static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
-                                                 struct xdp_desc *desc)
+                                                 struct xdp_desc *desc,
+                                                 struct xdp_umem *umem)
 {
        while (q->cons_tail != q->cons_head) {
                struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
                unsigned int idx = q->cons_tail & q->ring_mask;

                *desc = READ_ONCE(ring->desc[idx]);
-               if (xskq_is_valid_desc(q, desc))
+               if (xskq_is_valid_desc(q, desc, umem))
                        return desc;

                q->cons_tail++;
@@ -261,7 +312,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
 }

 static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
-                                             struct xdp_desc *desc)
+                                             struct xdp_desc *desc,
+                                             struct xdp_umem *umem)
 {
        if (q->cons_tail == q->cons_head) {
                smp_mb(); /* D, matches A */
@@ -272,7 +324,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
                smp_rmb(); /* C, matches B */
        }

-       return xskq_validate_desc(q, desc);
+       return xskq_validate_desc(q, desc, umem);
 }

 static inline void xskq_discard_desc(struct xsk_queue *q)
--
2.17.1

Reply via email to