In article <local.mail.freebsd-net/[EMAIL PROTECTED]> you 
write:
>
>I would be grateful if someone could test the attached patch which
>deals with the following problem:
>
>       on all *BSD version, socket buffers contain a list of
>       incoming and/or outgoing mbufs. Unfortunately the list only
>       has a pointer to the head, meaning that all append operations
>       require to scan the full list. The overhead can be very
>       bad in some cases (e.g. small UDP packets), and becomes
>       worse and worse as the socket buffer size increases (which
>       is what one would commonly do when expecting a lot of
>       traffic!).
>
>The attached patch implements a tail pointer to the list, so that
>you can append in constant time. By default, the code works exactly
>as before -- the tail of the list is reached with the usual linear
>scan, and the pointer to the tail is only used for comparison
>purposes to make sure that it yields the same value.

Aside from the obvious style bugs, and debugging cruft, I have a couple
of issues with this patch:

    - I dislike having sb_mb_tail only being valid if sb_mb is NULL;
      to me, this is non-obvious, and I feel it would be cleaner to
      always make it valid

    - sbgettail() is misnamed, it should reflect the function that 
      it is being used for in most cases (appending the new mbuf to 
      an existing chain)

    - I don't think that the fastscan sysctl is appropriate; either 
      the code works, or it should be debugged further before committing.
      You could move the debugging checks under a KASSERT though.

    - Also, I believe that there may be a problem with the original
      patch, in that the sb_mb_tail is not updated in sbdrop(), in 
      the case where we drop partial mbufs from the first packet 
      train.

In that spirit, I offer an amended patch below.
--
Jonathan

Index: kern/uipc_socket.c
===================================================================
RCS file: /ncvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.68.2.11
diff -u -r1.68.2.11 uipc_socket.c
--- kern/uipc_socket.c  2000/12/22 10:25:21     1.68.2.11
+++ kern/uipc_socket.c  2001/02/09 23:57:14
@@ -778,6 +778,11 @@
                goto restart;
        }
 dontblock:
+       /* 
+        * On entry here, m points to the first record on the socket buffer.
+        * While we process the initial mbufs containing address and control
+        * info we save a copy of m->m_nextpkt into nextrecord.
+        */
        if (uio->uio_procp)
                uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
        nextrecord = m->m_nextpkt;
@@ -821,6 +826,18 @@
                        controlp = &(*controlp)->m_next;
                }
        }
+       /*
+        * if nextrecord == NULL (this is a single chain) then m_tail
+        * may not be valid here if m was changed earlier.
+        */
+       if (nextrecord == NULL && (flags & MSG_PEEK) == 0)
+               so->so_rcv.sb_mb_tail = m;
+
+       /*
+        * If m is non-null, we have some data to read. From now on, make
+        * sure to keep sb_mb_tail consistent when working on the last
+        * packet on the chain (nextrecord==NULL) and we change m->m_nextpkt.
+        */
        if (m) {
                if ((flags & MSG_PEEK) == 0)
                        m->m_nextpkt = nextrecord;
@@ -881,6 +898,8 @@
                                }
                                if (m)
                                        m->m_nextpkt = nextrecord;
+                               if (nextrecord == NULL)
+                                       so->so_rcv.sb_mb_tail = m;
                        }
                } else {
                        if (flags & MSG_PEEK)
@@ -937,8 +956,11 @@
                        (void) sbdroprecord(&so->so_rcv);
        }
        if ((flags & MSG_PEEK) == 0) {
-               if (m == 0)
+               if (m == 0) {
                        so->so_rcv.sb_mb = nextrecord;
+                       if (nextrecord == NULL || nextrecord->m_nextpkt == NULL)
+                               so->so_rcv.sb_mb_tail = nextrecord;
+               }
                if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
                        (*pr->pr_usrreqs->pru_rcvd)(so, flags);
        }
Index: kern/uipc_socket2.c
===================================================================
RCS file: /ncvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.55.2.8
diff -u -r1.55.2.8 uipc_socket2.c
--- kern/uipc_socket2.c 2001/02/04 14:49:45     1.55.2.8
+++ kern/uipc_socket2.c 2001/02/09 23:59:16
@@ -63,6 +65,9 @@
 
 static u_long sb_efficiency = 8;       /* parameter for sbreserve() */
 
+static int     sbtailchk(struct sockbuf *sb);
+static __inline        void    sbappendmbuf(struct sockbuf *sb, struct mbuf *m0);
+
 /*
  * Procedures to manipulate state flags of socket
  * and do appropriate wakeups.  Normal sequence from the
@@ -477,6 +482,29 @@
  * or sbdroprecord() when the data is acknowledged by the peer.
  */
 
+static int
+sbtailchk(struct sockbuf *sb)
+{
+       struct mbuf *m = sb->sb_mb;
+
+       while (m && m->m_nextpkt)
+               m = m->m_nextpkt;
+       return (m == sb->sb_mb_tail);
+}
+
+static __inline void
+sbappendmbuf(struct sockbuf *sb, struct mbuf *m0)
+{
+       struct mbuf *m = sb->sb_mb_tail;
+
+       KASSERT(sbtailchk(sb), ("sbappendmbuf: bad sb_mb_tail"));
+       if (m)
+               m->m_nextpkt = m0;
+       else
+               sb->sb_mb = m0;
+       sb->sb_mb_tail = m0;
+}
+
 /*
  * Append mbuf chain m to the last record in the
  * socket buffer sb.  The additional space associated
@@ -492,10 +520,9 @@
 
        if (m == 0)
                return;
-       n = sb->sb_mb;
+       KASSERT(sbtailchk(sb), ("sbappend: bad sb_mb_tail"));
+       n = sb->sb_mb_tail;
        if (n) {
-               while (n->m_nextpkt)
-                       n = n->m_nextpkt;
                do {
                        if (n->m_flags & M_EOR) {
                                sbappendrecord(sb, m); /* XXXXXX!!!! */
@@ -545,19 +572,12 @@
 
        if (m0 == 0)
                return;
-       m = sb->sb_mb;
-       if (m)
-               while (m->m_nextpkt)
-                       m = m->m_nextpkt;
        /*
         * Put the first mbuf on the queue.
         * Note this permits zero length records.
         */
        sballoc(sb, m0);
-       if (m)
-               m->m_nextpkt = m0;
-       else
-               sb->sb_mb = m0;
+       sbappendmbuf(sb, m0);
        m = m0->m_next;
        m0->m_next = 0;
        if (m && (m0->m_flags & M_EOR)) {
@@ -603,6 +623,8 @@
         */
        sballoc(sb, m0);
        m0->m_nextpkt = *mp;
+       if (*mp == NULL)                /* m0 is actually the new tail */
+               sb->sb_mb_tail = m0;
        *mp = m0;
        m = m0->m_next;
        m0->m_next = 0;
@@ -653,13 +675,7 @@
        m->m_next = control;
        for (n = m; n; n = n->m_next)
                sballoc(sb, n);
-       n = sb->sb_mb;
-       if (n) {
-               while (n->m_nextpkt)
-                       n = n->m_nextpkt;
-               n->m_nextpkt = m;
-       } else
-               sb->sb_mb = m;
+       sbappendmbuf(sb, m);
        return (1);
 }
 
@@ -686,13 +702,7 @@
        n->m_next = m0;                 /* concatenate data to control */
        for (m = control; m; m = m->m_next)
                sballoc(sb, m);
-       n = sb->sb_mb;
-       if (n) {
-               while (n->m_nextpkt)
-                       n = n->m_nextpkt;
-               n->m_nextpkt = control;
-       } else
-               sb->sb_mb = control;
+       sbappendmbuf(sb, control);
        return (1);
 }
 
@@ -733,7 +743,7 @@
                if (n)
                        n->m_next = m;
                else
-                       sb->sb_mb = m;
+                       sb->sb_mb = sb->sb_mb_tail = m;
                sballoc(sb, m);
                n = m;
                m->m_flags &= ~M_EOR;
@@ -813,6 +823,9 @@
                m->m_nextpkt = next;
        } else
                sb->sb_mb = next;
+       m = sb->sb_mb;
+       if (m == NULL || m->m_nextpkt == NULL)
+               sb->sb_mb_tail = m;
 }
 
 /*
@@ -833,6 +846,9 @@
                        MFREE(m, mn);
                        m = mn;
                } while (m);
+               m = sb->sb_mb;
+               if (m == NULL || m->m_nextpkt == NULL)
+                       sb->sb_mb_tail = m;
        }
 }
 
Index: sys/socketvar.h
===================================================================
RCS file: /ncvs/src/sys/sys/socketvar.h,v
retrieving revision 1.46.2.4
diff -u -r1.46.2.4 socketvar.h
--- sys/socketvar.h     2000/11/26 02:30:08     1.46.2.4
+++ sys/socketvar.h     2001/02/08 17:55:46
@@ -93,6 +93,7 @@
                u_long  sb_mbmax;       /* max chars of mbufs to use */
                long    sb_lowat;       /* low water mark */
                struct  mbuf *sb_mb;    /* the mbuf chain */
+               struct  mbuf *sb_mb_tail; /* last pkt in chain */
                struct  selinfo sb_sel; /* process selecting read/write */
                short   sb_flags;       /* flags, see below */
                short   sb_timeo;       /* timeout for read/write */


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-net" in the body of the message

Reply via email to