On Thu, 12 May 2005, Gandalf The White wrote:

# patch ip_reass-20050507.diff
Recompile kernel

I ran:
# top

I ran the test again and CPU utilization was at close to 98% to 99% in the
interrupt column.

Ken

Brooks Davis and myself ran some tests tonight while sitting around at BSDCan and came to the conclusion that IP Reassembly overhead is not the main problem here. This conclusion was derived from the patch I've attached to this e-mail (please tell me if it gets stripped off.)


On my laptop, we found that we could hit it with 14000 frags per second, and it didn't matter if those frags were all processed, or all ignored (via the net.inet.ip.maxfragspersecond sysctl). Either way, the amount of cpu time used was about the same - 70%.

But on another laptop with the same processor, 8000 pps could effectively freeze it. We believe this is because the network card on that machine shares an IRQ with the sound card, making interrupt processing very expensive.

So, test out my attached patch with varying settings of maxfragspersecond and see if it makes any difference for you.

Thanks,

Mike "Silby" Silbersack
diff -u -r /usr/src/sys.old/netinet/in_pcb.c /usr/src/sys/netinet/in_pcb.c
--- /usr/src/sys.old/netinet/in_pcb.c   Sun Apr 17 18:05:05 2005
+++ /usr/src/sys/netinet/in_pcb.c       Thu May 12 21:47:39 2005
@@ -1234,5 +1234,10 @@
                        ipport_stoprandom--;
        }
        ipport_tcplastcount = ipport_tcpallocs;
+       if (ip_curfragspersecond > ip_maxfragspersecond) {
+               printf("Received %d frags, exceeded %d per second\n.",
+                       ip_curfragspersecond, ip_maxfragspersecond);
+       }
+       ip_curfragspersecond = 0;
        callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
diff -u -r /usr/src/sys.old/netinet/ip_input.c /usr/src/sys/netinet/ip_input.c
--- /usr/src/sys.old/netinet/ip_input.c Sun Apr 17 18:05:06 2005
+++ /usr/src/sys/netinet/ip_input.c     Thu May 12 21:49:52 2005
@@ -130,6 +130,12 @@
        &maxfragsperpacket, 0,
        "Maximum number of IPv4 fragments allowed per packet");
 
+int ip_curfragspersecond;
+int ip_maxfragspersecond;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragspersecond, CTLFLAG_RW,
+       &ip_maxfragspersecond, 0,
+       "Maximum number of IPv4 fragments allowed per second");
+
 static int     ip_sendsourcequench = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
        &ip_sendsourcequench, 0,
@@ -284,6 +290,7 @@
            TAILQ_INIT(&ipq[i]);
        maxnipq = nmbclusters / 32;
        maxfragsperpacket = 16;
+       ip_maxfragspersecond = 100;
 
        /* Start ipport_tick. */
        callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
@@ -802,7 +809,9 @@
        u_short hash;
 
        /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
-       if (maxnipq == 0 || maxfragsperpacket == 0) {
+       if (maxnipq == 0 || maxfragsperpacket == 0 ||
+               ip_curfragspersecond >= ip_maxfragspersecond) {
+               ip_curfragspersecond++;
                ipstat.ips_fragments++;
                ipstat.ips_fragdropped++;
                m_freem(m);
@@ -884,6 +893,7 @@
         * ip_reass() will return a different mbuf.
         */
        ipstat.ips_fragments++;
+       ip_curfragspersecond++;
        m->m_pkthdr.header = ip;
 
        /* Previous ip_reass() started here. */
@@ -1069,6 +1079,7 @@
        ip->ip_len = (ip->ip_hl << 2) + next;
        ip->ip_src = fp->ipq_src;
        ip->ip_dst = fp->ipq_dst;
+       ip_curfragspersecond -= fp->ipq_nfrags;
        TAILQ_REMOVE(head, fp, ipq_list);
        nipq--;
        (void) m_free(dtom(fp));
Only in /usr/src/sys/netinet: ip_input.c.old
diff -u -r /usr/src/sys.old/netinet/ip_var.h /usr/src/sys/netinet/ip_var.h
--- /usr/src/sys.old/netinet/ip_var.h   Sun Apr 17 18:05:06 2005
+++ /usr/src/sys/netinet/ip_var.h       Thu May 12 21:16:47 2005
@@ -61,6 +61,8 @@
        struct mbuf *ipq_frags;         /* to ip headers of fragments */
        struct  in_addr ipq_src,ipq_dst;
        u_char  ipq_nfrags;             /* # frags in this packet */
+       u_short ipq_len;                /* length of final packet */
+       u_short ipq_curlen;             /* how much we've gotten so far */
        struct label *ipq_label;                /* MAC label */
 };
 #endif /* _KERNEL */
@@ -156,6 +158,8 @@
 extern u_long  (*ip_mcast_src)(int);
 extern int rsvp_on;
 extern struct  pr_usrreqs rip_usrreqs;
+extern int     ip_curfragspersecond;
+extern int     ip_maxfragspersecond;
 
 int     ip_ctloutput(struct socket *, struct sockopt *sopt);
 void    ip_drain(void);
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to