On Tue, Oct 11, 2016 at 20:41 +1000, David Gwynne wrote:
> On Tue, Oct 11, 2016 at 12:06:46AM +0200, Mike Belopuhov wrote:
> > On Fri, Oct 07, 2016 at 20:48 +1000, David Gwynne wrote:
> > > On Thu, Oct 06, 2016 at 12:13:20PM +0200, Mike Belopuhov wrote:
> > > > m_pullup will always get a new mbuf and on
> > > > strict alignment architectures you will always do a m_dup_pkt
> > > > (verified by my -DTEST1).
> > >
> > > i didnt think m_pullup was that stupid, but yes, yes it is.
> > >
> > > im not sure we can do better than m_dup_pkt on strict alignment
> > > archs, except to use memmove to realign simple mbufs. the alternative
> > > is vxlan parses the packet to figure out the most conservative
> > > realignment it can do, but itll have to allocate a new mbuf to
> > > prefix it with, which would be about the same cost as blindly copying
> > > it all.
> > >
> > > anyway, m_pullup could be simpler. while im there, it should maintain
> > > the alignment of the payload.
> > >
> > > the diff below makes it return early if the current mbuf already
> > > has the requested region, regardless of whether it has a cluster
> > > or not.
> > >
> > > if that fails, but there is enough space in the first mbuf for the
> > > requested region, itll memmove the data around to make the request
> > > contig.
> > >
> > > if that fails, itll attempt to allocate an mbuf and maybe a cluster
> > > to prefix the chain with.
> > >
> > > after shuffling the first mbuf data or prefixing an empty mbuf,
> > > itll copy from the chain into the first mbuf.
> > >
> > > i reckon this would be comparable to m_pulldown now.
> > >
> > > ok?
> > >
> > > Index: uipc_mbuf.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> > > retrieving revision 1.231
> > > diff -u -p -r1.231 uipc_mbuf.c
> > > --- uipc_mbuf.c 15 Sep 2016 02:00:16 -0000 1.231
> > > +++ uipc_mbuf.c 7 Oct 2016 10:47:54 -0000
> > > @@ -838,64 +842,73 @@ struct mbuf *
> > > m_pullup(struct mbuf *n, int len)
> > > {
> > > struct mbuf *m;
> > > - int count;
> > > + unsigned int align;
> > > + unsigned int space;
> > > +
> > > + /* if n is already contig then don't do any work */
> > > + if (len <= n->m_len)
> > > + return (n);
> > > +
> > > + align = (unsigned long)n->m_data & ALIGNBYTES;
> > > + space = M_LEADINGSPACE(n) - align;
> > > +
> >
> > M_LEADINGSPACE for mbufs that have (m->m_len == 0) is actually zero
> > so here you can have space underflow and become huge.
>
> yeesh. why do we let cthulu craft mbufs?
>
> > > + if (len <= space + n->m_len + M_TRAILINGSPACE(n)) {
> > > + /* if there's enough space in the first mbuf memmove into it */
> > > + memmove(mtod(n, caddr_t) - space, mtod(n, caddr_t), n->m_len);
> > > + n->m_data -= space;
> > > + len -= n->m_len;
> > >
> >
> > You're not performing a m_getptr operation so you never optimize
> > for a case where you can memcpy from the other mbuf into the first
> > one which is what old code does (when m->m_next != NULL)... I think
> > these should be preserved.
>
> thats what this chunk does. in the diff below i made the the memmove
> conditional on if there's not enough space in the tail.
>
Right, I've looked at the wrong one.
As far as I can tell and test this new diff is fine. I've double
checked the alignment math and it looks good to me. OK mikeb
> Index: uipc_mbuf.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.233
> diff -u -p -r1.233 uipc_mbuf.c
> --- uipc_mbuf.c 10 Oct 2016 00:41:17 -0000 1.233
> +++ uipc_mbuf.c 11 Oct 2016 09:07:09 -0000
> @@ -842,64 +842,79 @@ struct mbuf *
> m_pullup(struct mbuf *n, int len)
> {
> struct mbuf *m;
> - int count;
> + unsigned int adj;
> + caddr_t head, tail;
> + unsigned int space;
>
> - /*
> - * If first mbuf has no cluster, and has room for len bytes
> - * without shifting current data, pullup into it,
> - * otherwise allocate a new mbuf to prepend to the chain.
> - */
> - if ((n->m_flags & M_EXT) == 0 && n->m_next &&
> - n->m_data + len < &n->m_dat[MLEN]) {
> - if (n->m_len >= len)
> - return (n);
> - m = n;
> - n = n->m_next;
> - len -= m->m_len;
> - } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next &&
> - n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) {
> - if (n->m_len >= len)
> - return (n);
> + /* if n is already contig then don't do any work */
> + if (len <= n->m_len)
> + return (n);
> +
> + adj = (unsigned long)n->m_data & ALIGNBYTES;
> + head = (caddr_t)ALIGN(mtod(n, caddr_t) - M_LEADINGSPACE(n)) + adj;
> + tail = mtod(n, caddr_t) + n->m_len + M_TRAILINGSPACE(n);
> +
> + if (head < tail && len <= tail - head) {
> + /* there's enough space in the first mbuf */
> +
> + if (len > tail - mtod(n, caddr_t)) {
> + /* need to memmove to make space at the end */
> + memmove(head, mtod(n, caddr_t), n->m_len);
> + m->m_data = head;
> + }
> +
> + len -= n->m_len;
> m = n;
> - n = n->m_next;
> - len -= m->m_len;
> + n = m->m_next;
> } else {
> - if (len > MAXMCLBYTES)
> + /* the first mbuf is too small so prepend one with space */
> + space = adj + len;
> +
> + if (space > MAXMCLBYTES)
> goto bad;
> +
> MGET(m, M_DONTWAIT, n->m_type);
> if (m == NULL)
> goto bad;
> - if (len > MHLEN) {
> - MCLGETI(m, M_DONTWAIT, NULL, len);
> + if (space > MHLEN) {
> + MCLGETI(m, M_DONTWAIT, NULL, space);
> if ((m->m_flags & M_EXT) == 0) {
> m_free(m);
> goto bad;
> }
> }
> - m->m_len = 0;
> +
> if (n->m_flags & M_PKTHDR)
> M_MOVE_PKTHDR(m, n);
> +
> + m->m_len = 0;
> + m->m_data += adj;
> }
>
> + KASSERT(M_TRAILINGSPACE(m) >= len);
> +
> do {
> - count = min(len, n->m_len);
> - memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
> - count);
> - len -= count;
> - m->m_len += count;
> - n->m_len -= count;
> - if (n->m_len)
> - n->m_data += count;
> + if (n == NULL) {
> + (void)m_free(m);
> + goto bad;
> + }
> +
> + space = min(len, n->m_len);
> + memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), space);
> + len -= space;
> + m->m_len += space;
> + n->m_len -= space;
> +
> + if (n->m_len > 0)
> + n->m_data += space;
> else
> n = m_free(n);
> - } while (len > 0 && n);
> - if (len > 0) {
> - (void)m_free(m);
> - goto bad;
> - }
> + } while (len > 0);
> +
> m->m_next = n;
>
> return (m);
> +
> bad:
> m_freem(n);
> return (NULL);