len)
metalen = xdp->data - xdp->data_meta;
}
- to_buf = xdp_umem_get_data(xs->umem, addr);
- memcpy(to_buf, from_buf, len + metalen);
- addr += metalen;
+ __xsk_rcv_memcpy(xs->umem, addr + offset, from_buf, len, metalen);
+
+ offset += metalen;
+ if (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+ addr |= offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+ else
+ addr += offset;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -127,6 +152,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct
xdp_buff *xdp)
u32 len = xdp->data_end - xdp->data;
void *buffer;
u64 addr;
+ u64 offset = xs->umem->headroom;
int err;
spin_lock_bh(&xs->rx_lock);
@@ -136,17 +162,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct
xdp_buff *xdp)
goto out_unlock;
}
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
err = -ENOSPC;
goto out_drop;
}
- addr += xs->umem->headroom;
-
- buffer = xdp_umem_get_data(xs->umem, addr);
+ buffer = xdp_umem_get_data(xs->umem, addr + offset);
memcpy(buffer, xdp->data_meta, len + metalen);
- addr += metalen;
+ offset += metalen;
+
+ addr = xsk_umem_handle_offset(xs->umem, addr, offset);
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (err)
goto out_drop;
@@ -190,7 +216,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem,
struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
- if (!xskq_peek_desc(xs->tx, desc))
+ if (!xskq_peek_desc(xs->tx, desc, umem))
continue;
if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -243,7 +269,7 @@ static int xsk_generic_xmit(struct sock *sk,
struct msghdr *m,
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_peek_desc(xs->tx, &desc)) {
+ while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
char *buffer;
u64 addr;
u32 len;
@@ -262,6 +288,10 @@ static int xsk_generic_xmit(struct sock *sk,
struct msghdr *m,
skb_put(skb, len);
addr = desc.addr;
+ if (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+ addr = (addr & XSK_UNALIGNED_BUF_ADDR_MASK) |
+ (addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);
This doesn't look right to me. Shouldn't it be "(addr & mask) + (addr
>> shift)"?
I'd also prefer to see this type of logic in an inline/macro
+
buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
@@ -272,7 +302,7 @@ static int xsk_generic_xmit(struct sock *sk,
struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
@@ -412,6 +442,28 @@ static struct socket *xsk_lookup_xsk_from_fd(int
fd)
return sock;
}
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity
check
+ * For all other modes we use addr (kernel virtual address)
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32
flags)
+{
+ int i;
+
+ if (flags & XDP_ZEROCOPY) {
+ for (i = 0; i < umem->npgs - 1; i++)
+ umem->pages[i].next_pg_contig =
+ (umem->pages[i].dma + PAGE_SIZE ==
+ umem->pages[i + 1].dma);
+ return;
+ }
+
+ for (i = 0; i < umem->npgs - 1; i++)
+ umem->pages[i].next_pg_contig =
+ (umem->pages[i].addr + PAGE_SIZE ==
+ umem->pages[i + 1].addr);
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int
addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -500,6 +552,8 @@ static int xsk_bind(struct socket *sock, struct
sockaddr *addr, int addr_len)
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
goto out_unlock;
+
+ xsk_check_page_contiguity(xs->umem, flags);
}
xs->dev = dev;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..9986a759fe06 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock
*xs, struct sk_buff *nlskb)
du.id = umem->id;
du.size = umem->size;
du.num_pages = umem->npgs;
- du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+ du.chunk_size = umem->chunk_size_nohr + umem->headroom;
du.headroom = umem->headroom;
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 909c5168ed0f..0d77212367f0 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -133,6 +133,16 @@ static inline bool xskq_has_addrs(struct
xsk_queue *q, u32 cnt)
/* UMEM queue */
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem,
u64 addr,
+ u64 length)
+{
+ bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+ bool next_pg_contig =
+ umem->pages[(addr >> PAGE_SHIFT) + 1].next_pg_contig;
+
+ return cross_pg && !next_pg_contig;
+}
+
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
if (addr >= q->size) {
@@ -143,23 +153,50 @@ static inline bool xskq_is_valid_addr(struct
xsk_queue *q, u64 addr)
return true;
}
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q,
u64 addr,
+ u64 length,
+ struct xdp_umem *umem)
+{
+ addr += addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+ addr &= XSK_UNALIGNED_BUF_ADDR_MASK;
+ if (addr >= q->size ||
+ xskq_crosses_non_contig_pg(umem, addr, length)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask> +
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {
+ if (xskq_is_valid_addr_unaligned(q, *addr,
+ umem->chunk_size_nohr,
+ umem))
+ return addr;
+ goto out;
+ }
+
if (xskq_is_valid_addr(q, *addr))
return addr;
+out:
q->cons_tail++;
}
return NULL;
}
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -170,7 +207,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue
*q, u64 *addr)
smp_rmb();
}
- return xskq_validate_addr(q, addr);
+ return xskq_validate_addr(q, addr, umem);
}
static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -229,8 +266,21 @@ static inline int xskq_reserve_addr(struct
xsk_queue *q)
/* Rx/Tx queue */
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct
xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct
xdp_desc *d,
+ struct xdp_umem *umem)
{
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {
+ if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+ return false;
+
+ if (d->len > umem->chunk_size_nohr || d->options) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+ }
+
if (!xskq_is_valid_addr(q, d->addr))
return false;
@@ -244,14 +294,15 @@ static inline bool xskq_is_valid_desc(struct
xsk_queue *q, struct xdp_desc *d)
}
static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue
*q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*desc = READ_ONCE(ring->desc[idx]);
- if (xskq_is_valid_desc(q, desc))
+ if (xskq_is_valid_desc(q, desc, umem))
return desc;
q->cons_tail++;
@@ -261,7 +312,8 @@ static inline struct xdp_desc
*xskq_validate_desc(struct xsk_queue *q,
}
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -272,7 +324,7 @@ static inline struct xdp_desc
*xskq_peek_desc(struct xsk_queue *q,
smp_rmb(); /* C, matches B */
}
- return xskq_validate_desc(q, desc);
+ return xskq_validate_desc(q, desc, umem);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
--
2.17.1