On Fri, Oct 07, 2016 at 20:48 +1000, David Gwynne wrote:
> On Thu, Oct 06, 2016 at 12:13:20PM +0200, Mike Belopuhov wrote:
> > m_pullup will always get a new mbuf and on
> > strict alignment architectures you will always do a m_dup_pkt
> > (verified by my -DTEST1).
>
> i didnt think m_pullup was that stupid, but yes, yes it is.
>
> im not sure we can do better than m_dup_pkt on strict alignment
> archs, except to use memmove to realign simple mbufs. the alternative
> is vxlan parses the packet to figure out the most conservative
> realignment it can do, but itll have to allocate a new mbuf to
> prefix it with, which would be about the same cost as blindly copying
> it all.
>
> anyway, m_pullup could be simpler. while im there, it should maintain
> the alignment of the payload.
>
> the diff below makes it return early if the current mbuf already
> has the requested region, regardless of whether it has a cluster
> or not.
>
> if that fails, but there is enough space in the first mbuf for the
> requested region, itll memmove the data around to make the request
> contig.
>
> if that fails, itll attempt to allocate an mbuf and maybe a cluster
> to prefix the chain with.
>
> after shuffling the first mbuf data or prefixing an empty mbuf,
> itll copy from the chain into the first mbuf.
>
> i reckon this would be comparable to m_pulldown now.
>
> ok?
>
> Index: uipc_mbuf.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.231
> diff -u -p -r1.231 uipc_mbuf.c
> --- uipc_mbuf.c 15 Sep 2016 02:00:16 -0000 1.231
> +++ uipc_mbuf.c 7 Oct 2016 10:47:54 -0000
> @@ -838,64 +842,73 @@ struct mbuf *
> m_pullup(struct mbuf *n, int len)
> {
> struct mbuf *m;
> - int count;
> + unsigned int align;
> + unsigned int space;
> +
> + /* if n is already contig then don't do any work */
> + if (len <= n->m_len)
> + return (n);
> +
> + align = (unsigned long)n->m_data & ALIGNBYTES;
> + space = M_LEADINGSPACE(n) - align;
> +
M_LEADINGSPACE for mbufs that have (m->m_len == 0) is actually zero
so here you can have space underflow and become huge.
> + if (len <= space + n->m_len + M_TRAILINGSPACE(n)) {
> + /* if there's enough space in the first mbuf memmove into it */
> + memmove(mtod(n, caddr_t) - space, mtod(n, caddr_t), n->m_len);
> + n->m_data -= space;
> + len -= n->m_len;
>
You're not performing a m_getptr operation so you never optimize
for a case where you can memcpy from the other mbuf into the first
one which is what old code does (when m->m_next != NULL)... I think
these should be preserved.
> - /*
> - * If first mbuf has no cluster, and has room for len bytes
> - * without shifting current data, pullup into it,
> - * otherwise allocate a new mbuf to prepend to the chain.
> - */
> - if ((n->m_flags & M_EXT) == 0 && n->m_next &&
> - n->m_data + len < &n->m_dat[MLEN]) {
> - if (n->m_len >= len)
> - return (n);
> - m = n;
> - n = n->m_next;
> - len -= m->m_len;
> - } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next &&
> - n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) {
> - if (n->m_len >= len)
> - return (n);
> m = n;
> - n = n->m_next;
> - len -= m->m_len;
> + n = m->m_next;
> } else {
> - if (len > MAXMCLBYTES)
> + /* the first mbuf is too small so prepend one with space */
> + space = align + len;
> +
> + if (space > MAXMCLBYTES)
> goto bad;
> +
> MGET(m, M_DONTWAIT, n->m_type);
> if (m == NULL)
> goto bad;
> - if (len > MHLEN) {
> - MCLGETI(m, M_DONTWAIT, NULL, len);
> + if (space > MHLEN) {
> + MCLGETI(m, M_DONTWAIT, NULL, space);
> if ((m->m_flags & M_EXT) == 0) {
> m_free(m);
> goto bad;
> }
> }
> - m->m_len = 0;
> +
> if (n->m_flags & M_PKTHDR)
> M_MOVE_PKTHDR(m, n);
> +
> + m->m_len = 0;
> + m->m_data += align;
> }
>
> + KASSERT(M_TRAILINGSPACE(m) >= len);
> +
> do {
> - count = min(len, n->m_len);
> - memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
> - count);
> - len -= count;
> - m->m_len += count;
> - n->m_len -= count;
> - if (n->m_len)
> - n->m_data += count;
> + if (n == NULL) {
> + (void)m_free(m);
> + goto bad;
> + }
> +
> + space = min(len, n->m_len);
> + memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), space);
> + len -= space;
> + m->m_len += space;
> + n->m_len -= space;
> +
> + if (n->m_len > 0)
> + n->m_data += space;
> else
> n = m_free(n);
> - } while (len > 0 && n);
> - if (len > 0) {
> - (void)m_free(m);
> - goto bad;
> - }
> + } while (len > 0);
> +
> m->m_next = n;
>
> return (m);
> +
> bad:
> m_freem(n);
> return (NULL);