Thanks for the feedback...

there is another (performance) problem in my code, actually:
with TCP, or non-datagram sockets, the socket buffer usually
contains a single record, so that sbappend would still have to
scan the chain m->m_next . An additional tail pointer would
be needed to avoid that overhead as well in the generic case:

    sb_mb------>[  ]---[  ]---[  ]
                 |
                [  ]---[  ]---[  ]---[  ]
                 |
                [  ]---[  ]
                 |
    sb_mb_tail->[  ]---[  ]---[  ]---[  ]<--sb_mb_end


>     - I dislike having sb_mb_tail only being valid if sb_mb is NULL;
>       to me, this is non-obvious, and I feel it would be cleaner to
>       always make it valid

(actually, in my code sb_mb_tail is only valid if sb_mb is non_null,
and to me this seems reasonable, as the append code needs to
differentiate an empty queue anyways).  I see your point, but the
end result is probably not that different, with my approach
sbappendmbuf would just be

        if (sb->sb_mb == NULL)
                sb->sb_mb = m ;
        else
                sb->sb_mb_tail->m_nextpkt = m;
        sb->sb_mb_tail = m ;

instead of

        struct mbuf *m = sb->sb_mb_tail;
        if (m)
                m->m_nextpkt = m0;
        else
                sb->sb_mb = m0;
        sb->sb_mb_tail = m0;

so basically you just change the variable to test.

>     - sbgettail() is misnamed, it should reflect the function that 
>       it is being used for in most cases (appending the new mbuf to 
>       an existing chain)

yes, the fact is that i first implemented as a 'get' function
and only at a later time realized that append was the most common
usage.

>     - I don't think that the fastscan sysctl is appropriate; either 

that is part of the debugging cruft, and was useful in this stage
to assert the difference in performance with and without the tail
pointer.

>     - Also, I believe that there may be a problem with the original
>       patch, in that the sb_mb_tail is not updated in sbdrop(), in 

yes, i noticed it and already fixed in a similar way.
Would you like to commit your patch to -current ?

        cheers
        luigi


>       train.
> 
> In that spirit, I offer an amended patch below.
> --
> Jonathan
> 
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /ncvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.68.2.11
> diff -u -r1.68.2.11 uipc_socket.c
> --- kern/uipc_socket.c        2000/12/22 10:25:21     1.68.2.11
> +++ kern/uipc_socket.c        2001/02/09 23:57:14
> @@ -778,6 +778,11 @@
>               goto restart;
>       }
>  dontblock:
> +     /* 
> +      * On entry here, m points to the first record on the socket buffer.
> +      * While we process the initial mbufs containing address and control
> +      * info we save a copy of m->m_nextpkt into nextrecord.
> +      */
>       if (uio->uio_procp)
>               uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
>       nextrecord = m->m_nextpkt;
> @@ -821,6 +826,18 @@
>                       controlp = &(*controlp)->m_next;
>               }
>       }
> +     /*
> +      * if nextrecord == NULL (this is a single chain) then m_tail
> +      * may not be valid here if m was changed earlier.
> +      */
> +     if (nextrecord == NULL && (flags & MSG_PEEK) == 0)
> +             so->so_rcv.sb_mb_tail = m;
> +
> +     /*
> +      * If m is non-null, we have some data to read. From now on, make
> +      * sure to keep sb_mb_tail consistent when working on the last
> +      * packet on the chain (nextrecord==NULL) and we change m->m_nextpkt.
> +      */
>       if (m) {
>               if ((flags & MSG_PEEK) == 0)
>                       m->m_nextpkt = nextrecord;
> @@ -881,6 +898,8 @@
>                               }
>                               if (m)
>                                       m->m_nextpkt = nextrecord;
> +                             if (nextrecord == NULL)
> +                                     so->so_rcv.sb_mb_tail = m;
>                       }
>               } else {
>                       if (flags & MSG_PEEK)
> @@ -937,8 +956,11 @@
>                       (void) sbdroprecord(&so->so_rcv);
>       }
>       if ((flags & MSG_PEEK) == 0) {
> -             if (m == 0)
> +             if (m == 0) {
>                       so->so_rcv.sb_mb = nextrecord;
> +                     if (nextrecord == NULL || nextrecord->m_nextpkt == NULL)
> +                             so->so_rcv.sb_mb_tail = nextrecord;
> +             }
>               if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
>                       (*pr->pr_usrreqs->pru_rcvd)(so, flags);
>       }
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /ncvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.55.2.8
> diff -u -r1.55.2.8 uipc_socket2.c
> --- kern/uipc_socket2.c       2001/02/04 14:49:45     1.55.2.8
> +++ kern/uipc_socket2.c       2001/02/09 23:59:16
> @@ -63,6 +65,9 @@
>  
>  static       u_long sb_efficiency = 8;       /* parameter for sbreserve() */
>  
> +static int   sbtailchk(struct sockbuf *sb);
> +static __inline      void    sbappendmbuf(struct sockbuf *sb, struct mbuf *m0);
> +
>  /*
>   * Procedures to manipulate state flags of socket
>   * and do appropriate wakeups.  Normal sequence from the
> @@ -477,6 +482,29 @@
>   * or sbdroprecord() when the data is acknowledged by the peer.
>   */
>  
> +static int
> +sbtailchk(struct sockbuf *sb)
> +{
> +     struct mbuf *m = sb->sb_mb;
> +
> +     while (m && m->m_nextpkt)
> +             m = m->m_nextpkt;
> +     return (m == sb->sb_mb_tail);
> +}
> +
> +static __inline void
> +sbappendmbuf(struct sockbuf *sb, struct mbuf *m0)
> +{
> +     struct mbuf *m = sb->sb_mb_tail;
> +
> +     KASSERT(sbtailchk(sb), ("sbappendmbuf: bad sb_mb_tail"));
> +     if (m)
> +             m->m_nextpkt = m0;
> +     else
> +             sb->sb_mb = m0;
> +     sb->sb_mb_tail = m0;
> +}
> +
>  /*
>   * Append mbuf chain m to the last record in the
>   * socket buffer sb.  The additional space associated
> @@ -492,10 +520,9 @@
>  
>       if (m == 0)
>               return;
> -     n = sb->sb_mb;
> +     KASSERT(sbtailchk(sb), ("sbappend: bad sb_mb_tail"));
> +     n = sb->sb_mb_tail;
>       if (n) {
> -             while (n->m_nextpkt)
> -                     n = n->m_nextpkt;
>               do {
>                       if (n->m_flags & M_EOR) {
>                               sbappendrecord(sb, m); /* XXXXXX!!!! */
> @@ -545,19 +572,12 @@
>  
>       if (m0 == 0)
>               return;
> -     m = sb->sb_mb;
> -     if (m)
> -             while (m->m_nextpkt)
> -                     m = m->m_nextpkt;
>       /*
>        * Put the first mbuf on the queue.
>        * Note this permits zero length records.
>        */
>       sballoc(sb, m0);
> -     if (m)
> -             m->m_nextpkt = m0;
> -     else
> -             sb->sb_mb = m0;
> +     sbappendmbuf(sb, m0);
>       m = m0->m_next;
>       m0->m_next = 0;
>       if (m && (m0->m_flags & M_EOR)) {
> @@ -603,6 +623,8 @@
>        */
>       sballoc(sb, m0);
>       m0->m_nextpkt = *mp;
> +     if (*mp == NULL)                /* m0 is actually the new tail */
> +             sb->sb_mb_tail = m0;
>       *mp = m0;
>       m = m0->m_next;
>       m0->m_next = 0;
> @@ -653,13 +675,7 @@
>       m->m_next = control;
>       for (n = m; n; n = n->m_next)
>               sballoc(sb, n);
> -     n = sb->sb_mb;
> -     if (n) {
> -             while (n->m_nextpkt)
> -                     n = n->m_nextpkt;
> -             n->m_nextpkt = m;
> -     } else
> -             sb->sb_mb = m;
> +     sbappendmbuf(sb, m);
>       return (1);
>  }
>  
> @@ -686,13 +702,7 @@
>       n->m_next = m0;                 /* concatenate data to control */
>       for (m = control; m; m = m->m_next)
>               sballoc(sb, m);
> -     n = sb->sb_mb;
> -     if (n) {
> -             while (n->m_nextpkt)
> -                     n = n->m_nextpkt;
> -             n->m_nextpkt = control;
> -     } else
> -             sb->sb_mb = control;
> +     sbappendmbuf(sb, control);
>       return (1);
>  }
>  
> @@ -733,7 +743,7 @@
>               if (n)
>                       n->m_next = m;
>               else
> -                     sb->sb_mb = m;
> +                     sb->sb_mb = sb->sb_mb_tail = m;
>               sballoc(sb, m);
>               n = m;
>               m->m_flags &= ~M_EOR;
> @@ -813,6 +823,9 @@
>               m->m_nextpkt = next;
>       } else
>               sb->sb_mb = next;
> +     m = sb->sb_mb;
> +     if (m == NULL || m->m_nextpkt == NULL)
> +             sb->sb_mb_tail = m;
>  }
>  
>  /*
> @@ -833,6 +846,9 @@
>                       MFREE(m, mn);
>                       m = mn;
>               } while (m);
> +             m = sb->sb_mb;
> +             if (m == NULL || m->m_nextpkt == NULL)
> +                     sb->sb_mb_tail = m;
>       }
>  }
>  
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /ncvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.46.2.4
> diff -u -r1.46.2.4 socketvar.h
> --- sys/socketvar.h   2000/11/26 02:30:08     1.46.2.4
> +++ sys/socketvar.h   2001/02/08 17:55:46
> @@ -93,6 +93,7 @@
>               u_long  sb_mbmax;       /* max chars of mbufs to use */
>               long    sb_lowat;       /* low water mark */
>               struct  mbuf *sb_mb;    /* the mbuf chain */
> +             struct  mbuf *sb_mb_tail; /* last pkt in chain */
>               struct  selinfo sb_sel; /* process selecting read/write */
>               short   sb_flags;       /* flags, see below */
>               short   sb_timeo;       /* timeout for read/write */
> 



To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-net" in the body of the message

Reply via email to