Probably help if I attach the patch.  8-).

-- Terry

Terry Lambert wrote:
> 
> > Well... I have all those stats, but I wasn't wanting to type that
> > much.  IIRC, we normally test with 80 byte packets ... they can be UDP
> > or TCP ... we're testing the routing.  The box has two interfaces and
> > we measure the number of PPS that get to the box on the other side.
> >
> > Without polling patches, the single processor box definately
> > experiences live lock.  Interestingly, the degree of livelock is
> > fairly motherboard dependant.  We have tested many cards and so far
> > fxp's are our best performers.
> 
> Please try the attached patch, with and without DEVICE_POLLING.
> 
> The patch gets rid of the NETISR calls to ipintr(), by calling
> the ip_input() routine directly from the ether_input() called
> at input interrupt time by the ethernet controller.
> 
> The effect of this should be that the input processing will run
> to completion at interrupt time, and therefore avoid input
> livelock, all the way up to the top of the TCP stack, but not
> past the bottom of the sockets layer (I have patches to take it
> all the way to the system call layer).
> 
> What this will basically do is get rid of the latency in the
> NETISR delay for processing input IP packets via ipintr, and it
> should avoid locking out IP and TCP processing, if the interrupt
> load is very high.
> 
> I've tested this locally, with no ill effects (so far).
> 
> This should drastically increase your packets per second number
> for performance under load.  It will also increase your connections
> per second number on the peak before falloff, by about a factor of
> 1.5 (I have been unable to load it sufficiently in the DEVICE_POLLING
> case to guess a number in the non-falloff case).
> 
> The connections per second number will be better when connection
> requests run to completion from the TCP stack, up through the
> sockets layer (I will provide patches for this, after you have
> tested this one).
> 
> The basic theory here is that ipintr processing can be delayed
> indefinitely, if interrupt load is high enough, and there will
> be a maximum latency of 10ms for IP processing after ether_input(),
> in the normal stack case, without the patches.
> 
> Let me know how this works.
> 
> -- Terry
Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.125
diff -c -r1.125 if_ethersubr.c
*** net/if_ethersubr.c  28 Sep 2002 17:15:22 -0000      1.125
--- net/if_ethersubr.c  18 Nov 2002 18:03:15 -0000
***************
*** 34,39 ****
--- 34,43 ----
   * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.125 2002/09/28 17:15:22 phk Exp $
   */
  
+ #define       TERRY_LRP
+ /*
+ */
+ 
  #include "opt_atalk.h"
  #include "opt_inet.h"
  #include "opt_inet6.h"
***************
*** 720,728 ****
--- 724,738 ----
        case ETHERTYPE_IP:
                if (ipflow_fastforward(m))
                        return;
+ #ifndef TERRY_LRP
                schednetisr(NETISR_IP);
                inq = &ipintrq;
                break;
+ #else /* !TERRY_LRP */
+               /* call ip_input directly, without queueing */
+               ip_input(m);
+               return;
+ #endif        /* !TERRY_LRP */
  
        case ETHERTYPE_ARP:
                if (ifp->if_flags & IFF_NOARP) {
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.210
diff -c -r1.210 ip_input.c
*** netinet/ip_input.c  28 Sep 2002 17:15:25 -0000      1.210
--- netinet/ip_input.c  18 Nov 2002 18:03:20 -0000
***************
*** 35,40 ****
--- 35,43 ----
   */
  
  #define       _IP_VHL
+ #define       TERRY_LRP
+ /*
+ */
  
  #include "opt_bootp.h"
  #include "opt_ipfw.h"
***************
*** 221,227 ****
--- 224,232 ----
  static void   ip_freef(struct ipqhead *, struct ipq *);
  static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *,
                struct ipq *, u_int32_t *, u_int16_t *);
+ #ifndef TERRY_LRP
  static void   ipintr(void);
+ #endif        /* !TERRY_LRP*/
  
  /*
   * IP initialization: fill in IP protocol switch table.
***************
*** 259,265 ****
--- 264,272 ----
        mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
        ipintrq_present = 1;
  
+ #ifndef TERRY_LRP
        register_netisr(NETISR_IP, ipintr);
+ #endif        /* !TERRY_LRP*/
  }
  
  /*
***************
*** 848,853 ****
--- 855,861 ----
        m_freem(m);
  }
  
+ #ifndef TERRY_LRP
  /*
   * IP software interrupt routine - to go away sometime soon
   */
***************
*** 863,868 ****
--- 871,877 ----
                ip_input(m);
        }
  }
+ #endif        /* !TERRY_LRP */
  
  /*
   * Take incoming datagram fragment and try to reassemble it into

Reply via email to