Thanks for the feedback...
there is another (performance) problem in my code, actually:
with TCP, or non-datagram sockets, the socket buffer usually
contains a single record, so that sbappend would still have to
scan the chain m->m_next . An additional tail pointer would
be needed to avoid that overhead as well in the generic case:
sb_mb------>[ ]---[ ]---[ ]
|
[ ]---[ ]---[ ]---[ ]
|
[ ]---[ ]
|
sb_mb_tail->[ ]---[ ]---[ ]---[ ]<--sb_mb_end
> - I dislike having sb_mb_tail only being valid if sb_mb is NULL;
> to me, this is non-obvious, and I feel it would be cleaner to
> always make it valid
(actually, in my code sb_mb_tail is only valid if sb_mb is non_null,
and to me this seems reasonable, as the append code needs to
differentiate an empty queue anyways). I see your point, but the
end result is probably not that different, with my approach
sbappendmbuf would just be
if (sb->sb_mb == NULL)
sb->sb_mb = m ;
else
sb->sb_mb_tail->m_nextpkt = m;
sb->sb_mb_tail = m ;
instead of
struct mbuf *m = sb->sb_mb_tail;
if (m)
m->m_nextpkt = m0;
else
sb->sb_mb = m0;
sb->sb_mb_tail = m0;
so basically you just change the variable to test.
> - sbgettail() is misnamed, it should reflect the function that
> it is being used for in most cases (appending the new mbuf to
> an existing chain)
yes, the fact is that i first implemented as a 'get' function
and only at a later time realized that append was the most common
usage.
> - I don't think that the fastscan sysctl is appropriate; either
that is part of the debugging cruft, and was useful in this stage
to assert the difference in performance with and without the tail
pointer.
> - Also, I believe that there may be a problem with the original
> patch, in that the sb_mb_tail is not updated in sbdrop(), in
yes, i noticed it and already fixed in a similar way.
Would you like to commit your patch to -current ?
cheers
luigi
> train.
>
> In that spirit, I offer an amended patch below.
> --
> Jonathan
>
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /ncvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.68.2.11
> diff -u -r1.68.2.11 uipc_socket.c
> --- kern/uipc_socket.c 2000/12/22 10:25:21 1.68.2.11
> +++ kern/uipc_socket.c 2001/02/09 23:57:14
> @@ -778,6 +778,11 @@
> goto restart;
> }
> dontblock:
> + /*
> + * On entry here, m points to the first record on the socket buffer.
> + * While we process the initial mbufs containing address and control
> + * info we save a copy of m->m_nextpkt into nextrecord.
> + */
> if (uio->uio_procp)
> uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
> nextrecord = m->m_nextpkt;
> @@ -821,6 +826,18 @@
> controlp = &(*controlp)->m_next;
> }
> }
> + /*
> + * if nextrecord == NULL (this is a single chain) then m_tail
> + * may not be valid here if m was changed earlier.
> + */
> + if (nextrecord == NULL && (flags & MSG_PEEK) == 0)
> + so->so_rcv.sb_mb_tail = m;
> +
> + /*
> + * If m is non-null, we have some data to read. From now on, make
> + * sure to keep sb_mb_tail consistent when working on the last
> + * packet on the chain (nextrecord==NULL) and we change m->m_nextpkt.
> + */
> if (m) {
> if ((flags & MSG_PEEK) == 0)
> m->m_nextpkt = nextrecord;
> @@ -881,6 +898,8 @@
> }
> if (m)
> m->m_nextpkt = nextrecord;
> + if (nextrecord == NULL)
> + so->so_rcv.sb_mb_tail = m;
> }
> } else {
> if (flags & MSG_PEEK)
> @@ -937,8 +956,11 @@
> (void) sbdroprecord(&so->so_rcv);
> }
> if ((flags & MSG_PEEK) == 0) {
> - if (m == 0)
> + if (m == 0) {
> so->so_rcv.sb_mb = nextrecord;
> + if (nextrecord == NULL || nextrecord->m_nextpkt == NULL)
> + so->so_rcv.sb_mb_tail = nextrecord;
> + }
> if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
> (*pr->pr_usrreqs->pru_rcvd)(so, flags);
> }
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /ncvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.55.2.8
> diff -u -r1.55.2.8 uipc_socket2.c
> --- kern/uipc_socket2.c 2001/02/04 14:49:45 1.55.2.8
> +++ kern/uipc_socket2.c 2001/02/09 23:59:16
> @@ -63,6 +65,9 @@
>
> static u_long sb_efficiency = 8; /* parameter for sbreserve() */
>
> +static int sbtailchk(struct sockbuf *sb);
> +static __inline void sbappendmbuf(struct sockbuf *sb, struct mbuf *m0);
> +
> /*
> * Procedures to manipulate state flags of socket
> * and do appropriate wakeups. Normal sequence from the
> @@ -477,6 +482,29 @@
> * or sbdroprecord() when the data is acknowledged by the peer.
> */
>
> +static int
> +sbtailchk(struct sockbuf *sb)
> +{
> + struct mbuf *m = sb->sb_mb;
> +
> + while (m && m->m_nextpkt)
> + m = m->m_nextpkt;
> + return (m == sb->sb_mb_tail);
> +}
> +
> +static __inline void
> +sbappendmbuf(struct sockbuf *sb, struct mbuf *m0)
> +{
> + struct mbuf *m = sb->sb_mb_tail;
> +
> + KASSERT(sbtailchk(sb), ("sbappendmbuf: bad sb_mb_tail"));
> + if (m)
> + m->m_nextpkt = m0;
> + else
> + sb->sb_mb = m0;
> + sb->sb_mb_tail = m0;
> +}
> +
> /*
> * Append mbuf chain m to the last record in the
> * socket buffer sb. The additional space associated
> @@ -492,10 +520,9 @@
>
> if (m == 0)
> return;
> - n = sb->sb_mb;
> + KASSERT(sbtailchk(sb), ("sbappend: bad sb_mb_tail"));
> + n = sb->sb_mb_tail;
> if (n) {
> - while (n->m_nextpkt)
> - n = n->m_nextpkt;
> do {
> if (n->m_flags & M_EOR) {
> sbappendrecord(sb, m); /* XXXXXX!!!! */
> @@ -545,19 +572,12 @@
>
> if (m0 == 0)
> return;
> - m = sb->sb_mb;
> - if (m)
> - while (m->m_nextpkt)
> - m = m->m_nextpkt;
> /*
> * Put the first mbuf on the queue.
> * Note this permits zero length records.
> */
> sballoc(sb, m0);
> - if (m)
> - m->m_nextpkt = m0;
> - else
> - sb->sb_mb = m0;
> + sbappendmbuf(sb, m0);
> m = m0->m_next;
> m0->m_next = 0;
> if (m && (m0->m_flags & M_EOR)) {
> @@ -603,6 +623,8 @@
> */
> sballoc(sb, m0);
> m0->m_nextpkt = *mp;
> + if (*mp == NULL) /* m0 is actually the new tail */
> + sb->sb_mb_tail = m0;
> *mp = m0;
> m = m0->m_next;
> m0->m_next = 0;
> @@ -653,13 +675,7 @@
> m->m_next = control;
> for (n = m; n; n = n->m_next)
> sballoc(sb, n);
> - n = sb->sb_mb;
> - if (n) {
> - while (n->m_nextpkt)
> - n = n->m_nextpkt;
> - n->m_nextpkt = m;
> - } else
> - sb->sb_mb = m;
> + sbappendmbuf(sb, m);
> return (1);
> }
>
> @@ -686,13 +702,7 @@
> n->m_next = m0; /* concatenate data to control */
> for (m = control; m; m = m->m_next)
> sballoc(sb, m);
> - n = sb->sb_mb;
> - if (n) {
> - while (n->m_nextpkt)
> - n = n->m_nextpkt;
> - n->m_nextpkt = control;
> - } else
> - sb->sb_mb = control;
> + sbappendmbuf(sb, control);
> return (1);
> }
>
> @@ -733,7 +743,7 @@
> if (n)
> n->m_next = m;
> else
> - sb->sb_mb = m;
> + sb->sb_mb = sb->sb_mb_tail = m;
> sballoc(sb, m);
> n = m;
> m->m_flags &= ~M_EOR;
> @@ -813,6 +823,9 @@
> m->m_nextpkt = next;
> } else
> sb->sb_mb = next;
> + m = sb->sb_mb;
> + if (m == NULL || m->m_nextpkt == NULL)
> + sb->sb_mb_tail = m;
> }
>
> /*
> @@ -833,6 +846,9 @@
> MFREE(m, mn);
> m = mn;
> } while (m);
> + m = sb->sb_mb;
> + if (m == NULL || m->m_nextpkt == NULL)
> + sb->sb_mb_tail = m;
> }
> }
>
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /ncvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.46.2.4
> diff -u -r1.46.2.4 socketvar.h
> --- sys/socketvar.h 2000/11/26 02:30:08 1.46.2.4
> +++ sys/socketvar.h 2001/02/08 17:55:46
> @@ -93,6 +93,7 @@
> u_long sb_mbmax; /* max chars of mbufs to use */
> long sb_lowat; /* low water mark */
> struct mbuf *sb_mb; /* the mbuf chain */
> + struct mbuf *sb_mb_tail; /* last pkt in chain */
> struct selinfo sb_sel; /* process selecting read/write */
> short sb_flags; /* flags, see below */
> short sb_timeo; /* timeout for read/write */
>
To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-net" in the body of the message