The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a9b615120628411cc20af730edb291c52be02f66

commit a9b615120628411cc20af730edb291c52be02f66
Author:     John Baldwin <j...@freebsd.org>
AuthorDate: 2024-01-09 19:05:03 +0000
Commit:     John Baldwin <j...@freebsd.org>
CommitDate: 2024-01-09 19:09:43 +0000

    memdesc: Helper function to construct mbuf chain backed by memdesc buffer
    
    memdesc_alloc_ext_mbufs constructs a chain of external (M_EXT or
    M_EXTPG) mbufs backed by a data buffer described by a memory
    descriptor.
    
    Since memory descriptors are not an actual buffer just a description
    of a buffer, the caller is required to supply a couple of helper
    routines to manage allocation of the raw mbufs and associating them
    with a reference to the underlying buffer.
    
    Reviewed by:    markj
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D42933
---
 sys/kern/subr_memdesc.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++
 sys/sys/memdesc.h       |  36 ++++
 2 files changed, 510 insertions(+)

diff --git a/sys/kern/subr_memdesc.c b/sys/kern/subr_memdesc.c
index 9ba9d7fe031b..ff8aad7731cd 100644
--- a/sys/kern/subr_memdesc.c
+++ b/sys/kern/subr_memdesc.c
@@ -33,9 +33,14 @@
 #include <sys/uio.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/vm_page.h>
 #include <vm/vm_param.h>
 #include <machine/bus.h>
 
+/*
+ * memdesc_copyback copies data from a source buffer into a buffer
+ * described by a memory descriptor.
+ */
 static void
 phys_copyback(vm_paddr_t pa, int off, int size, const void *src)
 {
@@ -180,6 +185,10 @@ memdesc_copyback(struct memdesc *mem, int off, int size, 
const void *src)
        }
 }
 
+/*
+ * memdesc_copydata copies data from a buffer described by a memory
+ * descriptor into a destination buffer.
+ */
 static void
 phys_copydata(vm_paddr_t pa, int off, int size, void *dst)
 {
@@ -323,3 +332,468 @@ memdesc_copydata(struct memdesc *mem, int off, int size, 
void *dst)
                __assert_unreachable();
        }
 }
+
+/*
+ * memdesc_alloc_ext_mbufs allocates a chain of external mbufs backed
+ * by the storage of a memory descriptor's data buffer.
+ */
+static struct mbuf *
+vaddr_ext_mbuf(memdesc_alloc_ext_mbuf_t *ext_alloc, void *cb_arg, int how,
+    void *buf, size_t len, size_t *actual_len)
+{
+       *actual_len = len;
+       return (ext_alloc(cb_arg, how, buf, len));
+}
+
+static bool
+can_append_paddr(struct mbuf *m, vm_paddr_t pa)
+{
+       u_int last_len;
+
+       /* Can always append to an empty mbuf. */
+       if (m->m_epg_npgs == 0)
+               return (true);
+
+       /* Can't append to a full mbuf. */
+       if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS)
+               return (false);
+
+       /* Can't append a non-page-aligned address to a non-empty mbuf. */
+       if ((pa & PAGE_MASK) != 0)
+               return (false);
+
+       /* Can't append if the last page is not a full page. */
+       last_len = m->m_epg_last_len;
+       if (m->m_epg_npgs == 1)
+               last_len += m->m_epg_1st_off;
+       return (last_len == PAGE_SIZE);
+}
+
+/*
+ * Returns amount of data added to an M_EXTPG mbuf.
+ */
+static size_t
+append_paddr_range(struct mbuf *m, vm_paddr_t pa, size_t len)
+{
+       size_t appended;
+
+       appended = 0;
+
+       /* Append the first page. */
+       if (m->m_epg_npgs == 0) {
+               m->m_epg_pa[0] = trunc_page(pa);
+               m->m_epg_npgs = 1;
+               m->m_epg_1st_off = pa & PAGE_MASK;
+               m->m_epg_last_len = PAGE_SIZE - m->m_epg_1st_off;
+               if (m->m_epg_last_len > len)
+                       m->m_epg_last_len = len;
+               m->m_len = m->m_epg_last_len;
+               len -= m->m_epg_last_len;
+               pa += m->m_epg_last_len;
+               appended += m->m_epg_last_len;
+       }
+       KASSERT(len == 0 || (pa & PAGE_MASK) == 0,
+           ("PA not aligned before full pages"));
+
+       /* Full pages. */
+       while (len >= PAGE_SIZE && m->m_epg_npgs < MBUF_PEXT_MAX_PGS) {
+               m->m_epg_pa[m->m_epg_npgs] = pa;
+               m->m_epg_npgs++;
+               m->m_epg_last_len = PAGE_SIZE;
+               m->m_len += PAGE_SIZE;
+               pa += PAGE_SIZE;
+               len -= PAGE_SIZE;
+               appended += PAGE_SIZE;
+       }
+
+       /* Final partial page. */
+       if (len > 0 && m->m_epg_npgs < MBUF_PEXT_MAX_PGS) {
+               KASSERT(len < PAGE_SIZE, ("final page is full page"));
+               m->m_epg_pa[m->m_epg_npgs] = pa;
+               m->m_epg_npgs++;
+               m->m_epg_last_len = len;
+               m->m_len += len;
+               appended += len;
+       }
+
+       return (appended);
+}
+
+static struct mbuf *
+paddr_ext_mbuf(memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how,
+    vm_paddr_t pa, size_t len, size_t *actual_len, bool can_truncate)
+{
+       struct mbuf *m, *tail;
+       size_t appended;
+
+       if (can_truncate) {
+               vm_paddr_t end;
+
+               /*
+                * Trim any partial page at the end, but not if it's
+                * the only page.
+                */
+               end = trunc_page(pa + len);
+               if (end > pa)
+                       len = end - pa;
+       }
+       *actual_len = len;
+
+       m = tail = extpg_alloc(cb_arg, how);
+       if (m == NULL)
+               return (NULL);
+       while (len > 0) {
+               if (!can_append_paddr(tail, pa)) {
+                       MBUF_EXT_PGS_ASSERT_SANITY(tail);
+                       tail->m_next = extpg_alloc(cb_arg, how);
+                       if (tail->m_next == NULL)
+                               goto error;
+                       tail = tail->m_next;
+               }
+
+               appended = append_paddr_range(tail, pa, len);
+               KASSERT(appended > 0, ("did not append anything"));
+               KASSERT(appended <= len, ("appended too much"));
+
+               pa += appended;
+               len -= appended;
+       }
+
+       MBUF_EXT_PGS_ASSERT_SANITY(tail);
+       return (m);
+error:
+       m_freem(m);
+       return (NULL);
+}
+
+static struct mbuf *
+vlist_ext_mbuf(memdesc_alloc_ext_mbuf_t *ext_alloc, void *cb_arg, int how,
+    struct bus_dma_segment *vlist, u_int sglist_cnt, size_t offset,
+    size_t len, size_t *actual_len)
+{
+       struct mbuf *m, *n, *tail;
+       size_t todo;
+
+       *actual_len = len;
+
+       while (vlist->ds_len <= offset) {
+               KASSERT(sglist_cnt > 1, ("out of sglist entries"));
+
+               offset -= vlist->ds_len;
+               vlist++;
+               sglist_cnt--;
+       }
+
+       m = tail = NULL;
+       while (len > 0) {
+               KASSERT(sglist_cnt >= 1, ("out of sglist entries"));
+
+               todo = len;
+               if (todo > vlist->ds_len - offset)
+                       todo = vlist->ds_len - offset;
+
+               n = ext_alloc(cb_arg, how, (char *)(uintptr_t)vlist->ds_addr +
+                   offset, todo);
+               if (n == NULL)
+                       goto error;
+
+               if (m == NULL) {
+                       m = n;
+                       tail = m;
+               } else {
+                       tail->m_next = n;
+                       tail = n;
+               }
+
+               offset = 0;
+               vlist++;
+               sglist_cnt--;
+               len -= todo;
+       }
+
+       return (m);
+error:
+       m_freem(m);
+       return (NULL);
+}
+
+static struct mbuf *
+plist_ext_mbuf(memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how,
+    struct bus_dma_segment *plist, u_int sglist_cnt, size_t offset, size_t len,
+    size_t *actual_len, bool can_truncate)
+{
+       vm_paddr_t pa;
+       struct mbuf *m, *tail;
+       size_t appended, totlen, todo;
+
+       while (plist->ds_len <= offset) {
+               KASSERT(sglist_cnt > 1, ("out of sglist entries"));
+
+               offset -= plist->ds_len;
+               plist++;
+               sglist_cnt--;
+       }
+
+       totlen = 0;
+       m = tail = extpg_alloc(cb_arg, how);
+       if (m == NULL)
+               return (NULL);
+       while (len > 0) {
+               KASSERT(sglist_cnt >= 1, ("out of sglist entries"));
+
+               pa = plist->ds_addr + offset;
+               todo = len;
+               if (todo > plist->ds_len - offset)
+                       todo = plist->ds_len - offset;
+
+               /*
+                * If truncation is enabled, avoid sending a final
+                * partial page, but only if there is more data
+                * available in the current segment.  Also, at least
+                * some data must be sent, so only drop the final page
+                * for this segment if the segment spans multiple
+                * pages or some other data is already queued.
+                */
+               else if (can_truncate) {
+                       vm_paddr_t end;
+
+                       end = trunc_page(pa + len);
+                       if (end <= pa && totlen != 0) {
+                               /*
+                                * This last segment is only a partial
+                                * page.
+                                */
+                               len = 0;
+                               break;
+                       }
+                       todo = end - pa;
+               }
+
+               offset = 0;
+               len -= todo;
+               totlen += todo;
+
+               while (todo > 0) {
+                       if (!can_append_paddr(tail, pa)) {
+                               MBUF_EXT_PGS_ASSERT_SANITY(tail);
+                               tail->m_next = extpg_alloc(cb_arg, how);
+                               if (tail->m_next == NULL)
+                                       goto error;
+                               tail = tail->m_next;
+                       }
+
+                       appended = append_paddr_range(tail, pa, todo);
+                       KASSERT(appended > 0, ("did not append anything"));
+
+                       pa += appended;
+                       todo -= appended;
+               }
+       }
+
+       MBUF_EXT_PGS_ASSERT_SANITY(tail);
+       *actual_len = totlen;
+       return (m);
+error:
+       m_freem(m);
+       return (NULL);
+}
+
+static struct mbuf *
+vmpages_ext_mbuf(memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int 
how,
+    vm_page_t *ma, size_t offset, size_t len, size_t *actual_len,
+    bool can_truncate)
+{
+       struct mbuf *m, *tail;
+
+       while (offset >= PAGE_SIZE) {
+               ma++;
+               offset -= PAGE_SIZE;
+       }
+
+       if (can_truncate) {
+               size_t end;
+
+               /*
+                * Trim any partial page at the end, but not if it's
+                * the only page.
+                */
+               end = trunc_page(offset + len);
+               if (end > offset)
+                       len = end - offset;
+       }
+       *actual_len = len;
+
+       m = tail = extpg_alloc(cb_arg, how);
+       if (m == NULL)
+               return (NULL);
+
+       /* First page. */
+       m->m_epg_pa[0] = VM_PAGE_TO_PHYS(*ma);
+       ma++;
+       m->m_epg_npgs = 1;
+       m->m_epg_1st_off = offset;
+       m->m_epg_last_len = PAGE_SIZE - offset;
+       if (m->m_epg_last_len > len)
+               m->m_epg_last_len = len;
+       m->m_len = m->m_epg_last_len;
+       len -= m->m_epg_last_len;
+
+       /* Full pages. */
+       while (len >= PAGE_SIZE) {
+               if (tail->m_epg_npgs == MBUF_PEXT_MAX_PGS) {
+                       MBUF_EXT_PGS_ASSERT_SANITY(tail);
+                       tail->m_next = extpg_alloc(cb_arg, how);
+                       if (tail->m_next == NULL)
+                               goto error;
+                       tail = tail->m_next;
+               }
+
+               tail->m_epg_pa[tail->m_epg_npgs] = VM_PAGE_TO_PHYS(*ma);
+               ma++;
+               tail->m_epg_npgs++;
+               tail->m_epg_last_len = PAGE_SIZE;
+               tail->m_len += PAGE_SIZE;
+               len -= PAGE_SIZE;
+       }
+
+       /* Last partial page. */
+       if (len > 0) {
+               if (tail->m_epg_npgs == MBUF_PEXT_MAX_PGS) {
+                       MBUF_EXT_PGS_ASSERT_SANITY(tail);
+                       tail->m_next = extpg_alloc(cb_arg, how);
+                       if (tail->m_next == NULL)
+                               goto error;
+                       tail = tail->m_next;
+               }
+
+               tail->m_epg_pa[tail->m_epg_npgs] = VM_PAGE_TO_PHYS(*ma);
+               ma++;
+               tail->m_epg_npgs++;
+               tail->m_epg_last_len = len;
+               tail->m_len += len;
+       }
+
+       MBUF_EXT_PGS_ASSERT_SANITY(tail);
+       return (m);
+error:
+       m_freem(m);
+       return (NULL);
+}
+
+/*
+ * Somewhat similar to m_copym but optionally avoids a partial mbuf at
+ * the end.
+ */
+static struct mbuf *
+mbuf_subchain(struct mbuf *m0, size_t offset, size_t len,
+    size_t *actual_len, bool can_truncate, int how)
+{
+       struct mbuf *m, *tail;
+       size_t totlen;
+
+       while (offset >= m0->m_len) {
+               offset -= m0->m_len;
+               m0 = m0->m_next;
+       }
+
+       /* Always return at least one mbuf. */
+       totlen = m0->m_len - offset;
+       if (totlen > len)
+               totlen = len;
+
+       m = m_get(how, MT_DATA);
+       if (m == NULL)
+               return (NULL);
+       m->m_len = totlen;
+       if (m0->m_flags & (M_EXT | M_EXTPG)) {
+               m->m_data = m0->m_data + offset;
+               mb_dupcl(m, m0);
+       } else
+               memcpy(mtod(m, void *), mtodo(m0, offset), m->m_len);
+
+       tail = m;
+       m0 = m0->m_next;
+       len -= totlen;
+       while (len > 0) {
+               /*
+                * If truncation is enabled, don't send any partial
+                * mbufs besides the first one.
+                */
+               if (can_truncate && m0->m_len > len)
+                       break;
+
+               tail->m_next = m_get(how, MT_DATA);
+               if (tail->m_next == NULL)
+                       goto error;
+               tail = tail->m_next;
+               tail->m_len = m0->m_len;
+               if (m0->m_flags & (M_EXT | M_EXTPG)) {
+                       tail->m_data = m0->m_data;
+                       mb_dupcl(tail, m0);
+               } else
+                       memcpy(mtod(tail, void *), mtod(m0, void *),
+                           tail->m_len);
+
+               totlen += tail->m_len;
+               m0 = m0->m_next;
+               len -= tail->m_len;
+       }
+       *actual_len = totlen;
+       return (m);
+error:
+       m_freem(m);
+       return (NULL);
+}
+
+struct mbuf *
+memdesc_alloc_ext_mbufs(struct memdesc *mem,
+    memdesc_alloc_ext_mbuf_t *ext_alloc,
+    memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how,
+    size_t offset, size_t len, size_t *actual_len, bool can_truncate)
+{
+       struct mbuf *m;
+       size_t done;
+
+       switch (mem->md_type) {
+       case MEMDESC_VADDR:
+               m = vaddr_ext_mbuf(ext_alloc, cb_arg, how,
+                   (char *)mem->u.md_vaddr + offset, len, &done);
+               break;
+       case MEMDESC_PADDR:
+               m = paddr_ext_mbuf(extpg_alloc, cb_arg, how, mem->u.md_paddr +
+                   offset, len, &done, can_truncate);
+               break;
+       case MEMDESC_VLIST:
+               m = vlist_ext_mbuf(ext_alloc, cb_arg, how, mem->u.md_list,
+                   mem->md_nseg, offset, len, &done);
+               break;
+       case MEMDESC_PLIST:
+               m = plist_ext_mbuf(extpg_alloc, cb_arg, how, mem->u.md_list,
+                   mem->md_nseg, offset, len, &done, can_truncate);
+               break;
+       case MEMDESC_UIO:
+               panic("uio not supported");
+       case MEMDESC_MBUF:
+               m = mbuf_subchain(mem->u.md_mbuf, offset, len, &done,
+                   can_truncate, how);
+               break;
+       case MEMDESC_VMPAGES:
+               m = vmpages_ext_mbuf(extpg_alloc, cb_arg, how, mem->u.md_ma,
+                   mem->md_offset + offset, len, &done, can_truncate);
+               break;
+       default:
+               __assert_unreachable();
+       }
+       if (m == NULL)
+               return (NULL);
+
+       if (can_truncate) {
+               KASSERT(done <= len, ("chain too long"));
+       } else {
+               KASSERT(done == len, ("short chain with no limit"));
+       }
+       KASSERT(m_length(m, NULL) == done, ("length mismatch"));
+       if (actual_len != NULL)
+               *actual_len = done;
+       return (m);
+}
diff --git a/sys/sys/memdesc.h b/sys/sys/memdesc.h
index 24a2c2da6347..f1880fee2cda 100644
--- a/sys/sys/memdesc.h
+++ b/sys/sys/memdesc.h
@@ -163,4 +163,40 @@ void       memdesc_copyback(struct memdesc *mem, int off, 
int size,
     const void *src);
 void   memdesc_copydata(struct memdesc *mem, int off, int size, void *dst);
 
+/*
+ * This routine constructs a chain of M_EXT mbufs backed by a data
+ * buffer described by a memory descriptor.  Some buffers may require
+ * multiple mbufs.  For memory descriptors using unmapped storage
+ * (e.g. memdesc_vmpages), M_EXTPG mbufs are used.
+ *
+ * Since memory descriptors are not an actual buffer, just a
+ * description of the buffer, the caller is required to supply a
+ * couple of helper routines to manage allocation of the raw mbufs and
+ * associate them with a reference to the underlying buffer.
+ *
+ * The memdesc_alloc_ext_mbuf_t callback is passed the callback
+ * argument as its first argument, the how flag as its second
+ * argument, and the pointer and length of a KVA buffer.  This
+ * callback should allocate an mbuf for the KVA buffer, either by
+ * making a copy of the data or using m_extaddref().
+ *
+ * The memdesc_alloc_extpg_mbuf_t callback is passed the callback
+ * argument as its first argument and the how flag as its second
+ * argument.  It should return an empty mbuf allocated by
+ * mb_alloc_ext_pgs.
+ *
+ * If either of the callbacks returns NULL, any partially allocated
+ * chain is freed and this routine returns NULL.
+ *
+ * If can_truncate is true, then this function might return a short
+ * chain to avoid gratuitously splitting up a page.
+ */
+typedef struct mbuf *memdesc_alloc_ext_mbuf_t(void *, int, void *, size_t);
+typedef struct mbuf *memdesc_alloc_extpg_mbuf_t(void *, int);
+
+struct mbuf *memdesc_alloc_ext_mbufs(struct memdesc *mem,
+    memdesc_alloc_ext_mbuf_t *ext_alloc,
+    memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how,
+    size_t offset, size_t len, size_t *actual_len, bool can_truncate);
+
 #endif /* _SYS_MEMDESC_H_ */

Reply via email to